dragon-ml-toolbox 6.1.2__py3-none-any.whl → 6.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 6.1.2
3
+ Version: 6.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,18 +1,18 @@
1
- dragon_ml_toolbox-6.1.2.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
- dragon_ml_toolbox-6.1.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
1
+ dragon_ml_toolbox-6.2.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
2
+ dragon_ml_toolbox-6.2.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
3
3
  ml_tools/ETL_engineering.py,sha256=4wwZXi9_U7xfCY70jGBaKniOeZ0m75ppxWpQBd_DmLc,39369
4
4
  ml_tools/GUI_tools.py,sha256=n4ZZ5kEjwK5rkOCFJE41HeLFfjhpJVLUSzk9Kd9Kr_0,45410
5
5
  ml_tools/MICE_imputation.py,sha256=oFHg-OytOzPYTzBR_wIRHhP71cMn3aupDeT59ABsXlQ,11576
6
- ml_tools/ML_callbacks.py,sha256=FEJ80TSEtY0-hdnOsAWeVApQt1mdzTdOntqtoWmMAzE,13310
6
+ ml_tools/ML_callbacks.py,sha256=Mlj0tbaQ7A4zI054k9_HdIh_tebtZyGGgu_zb8kGuS0,13726
7
7
  ml_tools/ML_datasetmaster.py,sha256=bbKCNA_b_uDIfxP9YIYKZm-VSfUSD15LvegFxpE9DIQ,34315
8
8
  ml_tools/ML_evaluation.py,sha256=-Z5fXQi2ou6l5Oyir06bO90SZIZVrjQfgoVAqKgSjks,13800
9
9
  ml_tools/ML_inference.py,sha256=blEDgzvDqatxbfloBKsyNPacRwoq9g6WTpIKQ3zoTak,5758
10
10
  ml_tools/ML_models.py,sha256=SJhKHGAN2VTBqzcHUOpFWuVZ2Y7U1M4P_axG_LNYWcI,6460
11
- ml_tools/ML_optimization.py,sha256=BWwaco2IiYgvQbD-WoTMKtlHGh94zHBpeGHKtN51pFs,13475
11
+ ml_tools/ML_optimization.py,sha256=GX-qZ2mCI3gWRCTP5w7lXrZpfGle3J_mE0O68seIoio,13475
12
12
  ml_tools/ML_trainer.py,sha256=1q_CDXuMfndRsPuNofUn2mg2TlhG6MYuGqjWxTDgN9c,15112
13
13
  ml_tools/PSO_optimization.py,sha256=9Y074d-B5h4Wvp9YPiy6KAeXM-Yv6Il3gWalKvOLVgo,22705
14
14
  ml_tools/RNN_forecast.py,sha256=2CyjBLSYYc3xLHxwLXUmP5Qv8AmV1OB_EndETNX1IBk,1956
15
- ml_tools/SQL.py,sha256=9zzS6AFEJM9aj6nE31hDe8S9TqLonk-J1amwZoiHNbk,10468
15
+ ml_tools/SQL.py,sha256=bkSTmMV4CtEqa67hApYWaRxTqwAlKIc5_b28P1bnDwg,10475
16
16
  ml_tools/VIF_factor.py,sha256=2nUMupfUoogf8o6ghoFZk_OwWhFXU0R3C9Gj0HOlI14,10415
17
17
  ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
18
18
  ml_tools/_logger.py,sha256=TpgYguxO-CWYqqgLW0tqFjtwZ58PE_W2OCfWNGZr0n0,1175
@@ -24,10 +24,10 @@ ml_tools/ensemble_inference.py,sha256=rtU7eUaQne615n2g7IHZCJI-OvrBCcjxbTkEIvtCGF
24
24
  ml_tools/ensemble_learning.py,sha256=dAyFgSTyvxJWjc_enJ_8EUoWwiekBeoNyJNxVY-kcUU,21868
25
25
  ml_tools/handle_excel.py,sha256=J9iwIqMZemoxK49J5osSwp9Ge0h9YTKyYGbOm53hcno,13007
26
26
  ml_tools/keys.py,sha256=HtPG8-MWh89C32A7eIlfuuA-DLwkxGkoDfwR2TGN9CQ,1074
27
- ml_tools/optimization_tools.py,sha256=MuT4OG7_r1QqLUti-yYix7QeCpglezD0oe9BDCq0QXk,5086
27
+ ml_tools/optimization_tools.py,sha256=EL5tgNFwRo-82pbRE1CFVy9noNhULD7wprWuKadPheg,5090
28
28
  ml_tools/path_manager.py,sha256=Z8e7w3MPqQaN8xmTnKuXZS6CIW59BFwwqGhGc00sdp4,13692
29
29
  ml_tools/utilities.py,sha256=LqXXTovaHbA5AOKRk6Ru6DgAPAM0wPfYU70kUjYBryo,19231
30
- dragon_ml_toolbox-6.1.2.dist-info/METADATA,sha256=97hB43rYNc9a-iCnyxzfRvXfo6jdpgDmnBEvthYqv1M,6698
31
- dragon_ml_toolbox-6.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
- dragon_ml_toolbox-6.1.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
33
- dragon_ml_toolbox-6.1.2.dist-info/RECORD,,
30
+ dragon_ml_toolbox-6.2.0.dist-info/METADATA,sha256=oqCuLoevs6BjBZ7IuwLqa0o4AvZZpW2l9-eA0upbFfA,6698
31
+ dragon_ml_toolbox-6.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
+ dragon_ml_toolbox-6.2.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
33
+ dragon_ml_toolbox-6.2.0.dist-info/RECORD,,
ml_tools/ML_callbacks.py CHANGED
@@ -201,7 +201,7 @@ class ModelCheckpoint(Callback):
201
201
  mode (str): One of {'auto', 'min', 'max'}.
202
202
  verbose (int): Verbosity mode.
203
203
  """
204
- def __init__(self, save_dir: Union[str,Path], monitor: str = PyTorchLogKeys.VAL_LOSS,
204
+ def __init__(self, save_dir: Union[str,Path], checkpoint_name: Optional[str]=None, monitor: str = PyTorchLogKeys.VAL_LOSS,
205
205
  save_best_only: bool = True, mode: Literal['auto', 'min', 'max']= 'auto', verbose: int = 0):
206
206
  super().__init__()
207
207
  self.save_dir = make_fullpath(save_dir, make=True, enforce="directory")
@@ -212,6 +212,7 @@ class ModelCheckpoint(Callback):
212
212
  self.monitor = monitor
213
213
  self.save_best_only = save_best_only
214
214
  self.verbose = verbose
215
+ self.checkpoint_name = checkpoint_name
215
216
 
216
217
  # State variables to be managed during training
217
218
  self.saved_checkpoints = []
@@ -254,7 +255,10 @@ class ModelCheckpoint(Callback):
254
255
  old_best_str = f"{self.best:.4f}" if self.best not in [np.inf, -np.inf] else "inf"
255
256
 
256
257
  # Create a descriptive filename
257
- filename = f"epoch_{epoch}-{self.monitor}_{current:.4f}.pth"
258
+ if self.checkpoint_name is None:
259
+ filename = f"epoch_{epoch}-{self.monitor}_{current:.4f}.pth"
260
+ else:
261
+ filename = f"epoch{epoch}_{self.checkpoint_name}.pth"
258
262
  new_filepath = self.save_dir / filename
259
263
 
260
264
  if self.verbose > 0:
@@ -273,7 +277,11 @@ class ModelCheckpoint(Callback):
273
277
 
274
278
  def _save_rolling_checkpoints(self, epoch, logs):
275
279
  """Saves the latest model and keeps only the most recent ones."""
276
- filename = f"epoch_{epoch}.pth"
280
+ current = logs.get(self.monitor)
281
+ if self.checkpoint_name is None:
282
+ filename = f"epoch_{epoch}-{self.monitor}_{current:.4f}.pth"
283
+ else:
284
+ filename = f"epoch{epoch}_{self.checkpoint_name}.pth"
277
285
  filepath = self.save_dir / filename
278
286
 
279
287
  if self.verbose > 0:
@@ -114,10 +114,10 @@ def create_pytorch_problem(
114
114
 
115
115
  operators = [
116
116
  SimulatedBinaryCrossOver(problem,
117
- tournament_size=4,
118
- eta=0.8),
117
+ tournament_size=3,
118
+ eta=0.6),
119
119
  GaussianMutation(problem,
120
- stdev=0.1)
120
+ stdev=0.4)
121
121
  ]
122
122
 
123
123
  searcher_kwargs["operators"] = operators
ml_tools/SQL.py CHANGED
@@ -59,7 +59,7 @@ class DatabaseManager:
59
59
  try:
60
60
  self.conn = sqlite3.connect(self.db_path)
61
61
  self.cursor = self.conn.cursor()
62
- _LOGGER.info(f" Successfully connected to database: {self.db_path}")
62
+ _LOGGER.info(f"❇️ Successfully connected to database: {self.db_path}")
63
63
  return self
64
64
  except sqlite3.Error as e:
65
65
  _LOGGER.error(f"❌ Database connection failed: {e}")
@@ -99,7 +99,7 @@ class DatabaseManager:
99
99
 
100
100
  query = f"CREATE TABLE {exists_clause} {table_name} ({columns_def})"
101
101
 
102
- _LOGGER.info(f"🗂️ Executing: {query}")
102
+ _LOGGER.info(f"➡️ Executing: {query}")
103
103
  self.cursor.execute(query)
104
104
 
105
105
  def insert_row(self, table_name: str, data: Dict[str, Any]):
@@ -192,7 +192,7 @@ class DatabaseManager:
192
192
  query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
193
193
 
194
194
  self.cursor.executemany(query, values_to_insert)
195
- _LOGGER.info(f" Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
195
+ _LOGGER.info(f"➡️ Bulk inserted {len(values_to_insert)} rows into '{table_name}'.")
196
196
 
197
197
  def insert_from_dataframe(self, table_name: str, df: pd.DataFrame, if_exists: Literal['fail', 'replace', 'append'] = 'append'):
198
198
  """
@@ -219,7 +219,7 @@ class DatabaseManager:
219
219
  if_exists=if_exists,
220
220
  index=False # Typically, we don't want to save the DataFrame index
221
221
  )
222
- _LOGGER.info(f" Wrote {len(df)} rows from DataFrame to table '{table_name}' using mode '{if_exists}'.")
222
+ _LOGGER.info(f"➡️ Wrote {len(df)} rows from DataFrame to table '{table_name}' using mode '{if_exists}'.")
223
223
 
224
224
  def list_tables(self) -> List[str]:
225
225
  """Returns a list of all table names in the database."""
@@ -264,7 +264,7 @@ class DatabaseManager:
264
264
 
265
265
  query = f"CREATE {unique_clause} INDEX IF NOT EXISTS {index_name} ON {table_name} ({column_name})"
266
266
 
267
- _LOGGER.info(f"🗂️ Executing: {query}")
267
+ _LOGGER.info(f"➡️ Executing: {query}")
268
268
  self.cursor.execute(query)
269
269
 
270
270
 
@@ -29,24 +29,24 @@ def parse_lower_upper_bounds(source: dict[str,tuple[Any,Any]]):
29
29
  return lower, upper
30
30
 
31
31
 
32
- def plot_optimal_feature_distributions(results_dir: Union[str, Path], save_dir: Union[str, Path]):
32
+ def plot_optimal_feature_distributions(results_dir: Union[str, Path]):
33
33
  """
34
34
  Analyzes optimization results and plots the distribution of optimal values for each feature.
35
35
 
36
36
  For features with more than two unique values, this function generates a color-coded
37
37
  Kernel Density Estimate (KDE) plot. For binary or constant features, it generates a bar plot
38
38
  showing relative frequency.
39
+
40
+ Plots are saved in a subdirectory inside the source directory.
39
41
 
40
42
  Parameters
41
43
  ----------
42
44
  results_dir : str or Path
43
45
  The path to the directory containing the optimization result CSV files.
44
- save_dir : str or Path
45
- The directory where the output plots will be saved.
46
46
  """
47
47
  # Check results_dir and create output path
48
- results_path = make_fullpath(results_dir)
49
- output_path = make_fullpath(save_dir, make=True)
48
+ results_path = make_fullpath(results_dir, enforce="directory")
49
+ output_path = make_fullpath(results_path / "DistributionPlots", make=True)
50
50
 
51
51
  # Check that the directory contains csv files
52
52
  list_csv_paths(results_path, verbose=False)