dragon-ml-toolbox 20.3.0__py3-none-any.whl → 20.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.3.0
3
+ Version: 20.5.0
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-20.3.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-20.3.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
1
+ dragon_ml_toolbox-20.5.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-20.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
3
3
  ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
5
5
  ml_tools/ETL_cleaning/__init__.py,sha256=8dsHiguUkI6Ix1759IPdGU3IXcjMz4DyaSCkdYhxxg8,490
@@ -11,7 +11,7 @@ ml_tools/ETL_engineering/_dragon_engineering.py,sha256=D-D6tmhyQ3I9-cXgxLVVbQBRT
11
11
  ml_tools/ETL_engineering/_transforms.py,sha256=qOxa_vjh3gzS4IiGFqq_0Wnh0ilQO41jRiIp-6Ej4vw,47079
12
12
  ml_tools/GUI_tools/_GUI_tools.py,sha256=vjiBbiU3qCxB4rivBWHNBnq-NhpDZZERslkmi_61WxY,48987
13
13
  ml_tools/GUI_tools/__init__.py,sha256=zjUFxE3AnYPtd_Ptq7UCGQH5bXAM324t03rGQtcYLo4,372
14
- ml_tools/IO_tools/_IO_loggers.py,sha256=ytX7toElLb4jzqTk_Lcb3EzeHHWdD82mVXDheKfmML4,9001
14
+ ml_tools/IO_tools/_IO_loggers.py,sha256=wgIuCPl5WzQ0rzYrIovMsIufciXwRr17Dqxu0y9G2xY,9200
15
15
  ml_tools/IO_tools/_IO_save_load.py,sha256=xVeQzrd4r-L9ruFPvO8cV3bvzYHhJ0coOfZMrNWq5rs,4426
16
16
  ml_tools/IO_tools/_IO_utils.py,sha256=quOqBVSi_z0AI7qznCNAcLRB_f4kaI-abANngXUBcYA,4384
17
17
  ml_tools/IO_tools/__init__.py,sha256=Klu0Qf0qNfb7SHG33cs6qqecH5lEnYlXfe_xYdZ1Ry4,474
@@ -39,7 +39,7 @@ ml_tools/ML_datasetmaster/_datasetmaster.py,sha256=Oy2UE3YJpKTaFwQF5TkQLgLB54-BF
39
39
  ml_tools/ML_datasetmaster/_sequence_datasetmaster.py,sha256=cW3fuILZWs-7Yuo4T2fgGfTC4vwho3Gp4ohIKJYS7O0,18452
40
40
  ml_tools/ML_datasetmaster/_vision_datasetmaster.py,sha256=kvSqXYeNBN1JSRfSEEXYeIcsqy9HsJAl_EwFWClqlsw,67025
41
41
  ml_tools/ML_evaluation/__init__.py,sha256=e3c8JNP0tt4Kxc7QSQpGcOgrxf8JAucH4UkJvJxUL2E,1122
42
- ml_tools/ML_evaluation/_classification.py,sha256=SmhIxJy81iIFrys36LUoKrH5ey9IqzE-UxR2-tdgseI,28396
42
+ ml_tools/ML_evaluation/_classification.py,sha256=xXCh87RE9_VXYalc7l6CbakYfB0rijGrY76RZIrqLBk,28922
43
43
  ml_tools/ML_evaluation/_feature_importance.py,sha256=mTwi3LKom_axu6UFKunELj30APDdhG9GQC2w7I9mYhI,17137
44
44
  ml_tools/ML_evaluation/_loss.py,sha256=1a4O25i3Ya_3naNZNL7ELLUL46BY86g1scA7d7q2UFM,3625
45
45
  ml_tools/ML_evaluation/_regression.py,sha256=hnT2B2_6AnQ7aA7uk-X2lZL9G5JFGCduDXyZbr1gFCA,11037
@@ -76,7 +76,7 @@ ml_tools/ML_models_vision/_image_classification.py,sha256=miwMNoTXpmmZSiqeXvDKpx
76
76
  ml_tools/ML_models_vision/_image_segmentation.py,sha256=NRjn91bDD2OJWSJFrrNW9s41qgg5w7pw68Q61-kg-As,4157
77
77
  ml_tools/ML_models_vision/_object_detection.py,sha256=AOGER5bx0REc-FfBtspJmyLJxn3GdwDSPwFGveobR94,5608
78
78
  ml_tools/ML_optimization/__init__.py,sha256=No18Dsw6Q9zPt8B9fpG0bWomuXmwDC7DiokiaPuwmRI,485
79
- ml_tools/ML_optimization/_multi_dragon.py,sha256=oUTscfoySSypgX2_7wfkDzjJ60Y93utSW2OZvAGTGE0,37494
79
+ ml_tools/ML_optimization/_multi_dragon.py,sha256=zQhDxFY8FNxUlcbSnHMVArfojzYjgNa21jSE3pJmRW0,38956
80
80
  ml_tools/ML_optimization/_single_dragon.py,sha256=jh5-SK6NKAzbheQhquiYoROozk-RzUv1jiFkIzK_AFg,7288
81
81
  ml_tools/ML_optimization/_single_manual.py,sha256=h-_k9JmRqPkjTra1nu7AyYbSyWkYZ1R3utiNmW06WFs,21809
82
82
  ml_tools/ML_scaler/_ML_scaler.py,sha256=P75X0Sx8N-VxC2Qy8aG7mWaZlkTfjspiZDi1YiMQD1I,8872
@@ -103,12 +103,12 @@ ml_tools/_core/__init__.py,sha256=m-VP0RW0tOTm9N5NI3kFNcpM7WtVgs0RK9pK3ZJRZQQ,14
103
103
  ml_tools/_core/_logger.py,sha256=xzhn_FouMDRVNwXGBGlPC9Ruq6i5uCrmNaS5jesguMU,4972
104
104
  ml_tools/_core/_schema_load_ops.py,sha256=KLs9vBzANz5ESe2wlP-C41N4VlgGil-ywcfvWKSOGss,1551
105
105
  ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHzQ,234
106
- ml_tools/data_exploration/__init__.py,sha256=ahCjELrum2aIj_cLK-sdGbJjTvvolf3US_oaB97rOQg,1736
106
+ ml_tools/data_exploration/__init__.py,sha256=nYKg1bPBgXibC5nhmNKPw3VaKFeVtlNGL_YpHixW-Pg,1795
107
107
  ml_tools/data_exploration/_analysis.py,sha256=H6LryV56FFCHWjvQdkhZbtprZy6aP8EqU_hC2Cf9CLE,7832
108
108
  ml_tools/data_exploration/_cleaning.py,sha256=pAZOXgGK35j7O8q6cnyTwYK1GLNnD04A8p2fSyMB1mg,20906
109
109
  ml_tools/data_exploration/_features.py,sha256=wW-M8n2aLIy05DR2z4fI8wjpPjn3mOAnm9aSGYbMKwI,23363
110
110
  ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
111
- ml_tools/data_exploration/_schema_ops.py,sha256=PoFeHaS9dXI9gfL0SRD-8uSP4owqmbQFbtfA-HxkLnY,7108
111
+ ml_tools/data_exploration/_schema_ops.py,sha256=Fd6fBGGv4OpxmJ1HG9pith6QL90z0tzssCvzkQxlEEQ,11083
112
112
  ml_tools/ensemble_evaluation/__init__.py,sha256=t4Gr8EGEk8RLatyc92-S0BzbQvdvodzoF-qDAH2qjVg,546
113
113
  ml_tools/ensemble_evaluation/_ensemble_evaluation.py,sha256=-sX9cLMaa0FOQDikmVv2lsCYtQ56Kftd3tILnNej0Hg,28346
114
114
  ml_tools/ensemble_inference/__init__.py,sha256=VMX-Kata2V0UmiURIU2jx6mRuZmvTWf-QXzCpHmVGZA,255
@@ -118,7 +118,7 @@ ml_tools/ensemble_learning/_ensemble_learning.py,sha256=MHDZBR20_nStlSSeThFI3bSu
118
118
  ml_tools/excel_handler/__init__.py,sha256=AaWM3n_dqBhJLTs3OEA57ex5YykKXNOwVCyHlVsdnqI,530
119
119
  ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8WxDOfQ4sgxxLs,13965
120
120
  ml_tools/keys/__init__.py,sha256=-0c2pmrhyfROc-oQpEjJGLBMhSagA3CyFijQaaqZRqU,399
121
- ml_tools/keys/_keys.py,sha256=DLP0BYibRueM_8Dz9pSbWUpKypcimFL5kmXUl4wSwdU,9292
121
+ ml_tools/keys/_keys.py,sha256=kBcW3euNmD57_4aoRaAeqJP3FtU3iSuvgYv-BZqnEWw,9290
122
122
  ml_tools/math_utilities/__init__.py,sha256=K7Obkkc4rPKj4EbRZf1BsXHfiCg7FXYv_aN9Yc2Z_Vg,400
123
123
  ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
124
124
  ml_tools/optimization_tools/__init__.py,sha256=KD8JXpfGuPndO4AHnjJGu6uV1GRwhOfboD0KZV45kzw,658
@@ -130,14 +130,14 @@ ml_tools/path_manager/_path_tools.py,sha256=LcZE31QlkzZWUR8g1MW_N_mPY2DpKBJLA45V
130
130
  ml_tools/plot_fonts/__init__.py,sha256=KIxXRCjQ3SliEoLhEcqs7zDVZbVTn38bmSdL-yR1Q2w,187
131
131
  ml_tools/plot_fonts/_plot_fonts.py,sha256=mfjXNT9P59ymHoTI85Q8CcvfxfK5BIFBWtTZH-hNIC4,2209
132
132
  ml_tools/schema/__init__.py,sha256=K6uiZ9f0GCQ7etw1yl2-dQVLhU7RkL3KHesO3HNX6v4,334
133
- ml_tools/schema/_feature_schema.py,sha256=aVY3AJt1j4D2mtusVy2l6lDR2SYzPMyfvG1o9zOn0Kw,8585
133
+ ml_tools/schema/_feature_schema.py,sha256=MuPf6Nf7tDhUTGyX7tcFHZh-lLSNsJkLmlf9IxdF4O4,9660
134
134
  ml_tools/schema/_gui_schema.py,sha256=IVwN4THAdFrvh2TpV4SFd_zlzMX3eioF-w-qcSVTndE,7245
135
135
  ml_tools/serde/__init__.py,sha256=IDirr8i-qjUHB71hmHO6lGiODhUoOnUcXYrvb_XgrzE,292
136
136
  ml_tools/serde/_serde.py,sha256=8QnYK8ZG21zdNaC0v63iSz2bhgwOKRKAWxTVQvMV0A8,5525
137
137
  ml_tools/utilities/__init__.py,sha256=iQb-S5JesEjGGI8983Vkj-14LCtchFxdWRhaziyvnoY,808
138
138
  ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
139
139
  ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
140
- dragon_ml_toolbox-20.3.0.dist-info/METADATA,sha256=pPP6u48MGLf2mBIMTn6ddq7knczywo8sm9XO3iF9Zzg,7866
141
- dragon_ml_toolbox-20.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
- dragon_ml_toolbox-20.3.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
143
- dragon_ml_toolbox-20.3.0.dist-info/RECORD,,
140
+ dragon_ml_toolbox-20.5.0.dist-info/METADATA,sha256=sf0thvyXG1fpiAdeFpjiTdsZBkdVEECxdTDz0oGFgv8,7866
141
+ dragon_ml_toolbox-20.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
+ dragon_ml_toolbox-20.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
143
+ dragon_ml_toolbox-20.5.0.dist-info/RECORD,,
@@ -170,7 +170,7 @@ def _log_dict_to_json(data: dict[Any, Any], path: Path) -> None:
170
170
 
171
171
 
172
172
  def train_logger(train_config: Union[dict, Any],
173
- model_parameters: Union[dict, Any],
173
+ model_parameters: Union[dict, Any, None],
174
174
  train_history: Union[dict, None],
175
175
  save_directory: Union[str, Path],
176
176
  verbose: int = 3) -> None:
@@ -179,7 +179,7 @@ def train_logger(train_config: Union[dict, Any],
179
179
 
180
180
  Args:
181
181
  train_config (dict | Any): Training configuration parameters. If object, must have a `.to_log()` method returning a dict.
182
- model_parameters (dict | Any): Model parameters. If object, must have a `.to_log()` method returning a dict.
182
+ model_parameters (dict | Any | None): Model parameters. If object, must have a `.to_log()` method returning a dict.
183
183
  train_history (dict | None): Training history log.
184
184
  save_directory (str | Path): Directory to save the log file.
185
185
  """
@@ -201,23 +201,27 @@ def train_logger(train_config: Union[dict, Any],
201
201
 
202
202
  train_config_dict = train_config
203
203
 
204
- # model_parameters should be a dict or a custom object with the ".to_log()" method
205
- if not isinstance(model_parameters, dict):
206
- if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
207
- model_parameters_dict: dict = model_parameters.to_log()
208
- if not isinstance(model_parameters_dict, dict):
209
- _LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
204
+ # model_parameters should be a dict or a custom object with the ".to_log()" method or None
205
+ model_parameters_dict = {}
206
+
207
+ if model_parameters is not None:
208
+ if not isinstance(model_parameters, dict):
209
+ if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
210
+ params_result: dict = model_parameters.to_log()
211
+ if not isinstance(params_result, dict):
212
+ _LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
213
+ raise ValueError()
214
+ model_parameters_dict = params_result
215
+ else:
216
+ _LOGGER.error("'model_parameters' must be a dict, None, or an object with a 'to_log()' method.")
210
217
  raise ValueError()
211
218
  else:
212
- _LOGGER.error("'model_parameters' must be a dict or an object with a 'to_log()' method.")
213
- raise ValueError()
214
- else:
215
- # check for empty dict
216
- if not model_parameters:
217
- _LOGGER.error("'model_parameters' dictionary is empty.")
218
- raise ValueError()
219
-
220
- model_parameters_dict = model_parameters
219
+ # check for empty dict
220
+ if not model_parameters:
221
+ _LOGGER.error("'model_parameters' dictionary is empty.")
222
+ raise ValueError()
223
+
224
+ model_parameters_dict = model_parameters
221
225
 
222
226
  # make base dictionary
223
227
  data: dict = train_config_dict | model_parameters_dict
@@ -329,7 +329,11 @@ def classification_metrics(save_dir: Union[str, Path],
329
329
  fig_roc, ax_roc = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
330
330
  ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=format_config.ROC_PR_line)
331
331
  ax_roc.plot([0, 1], [0, 1], 'k--')
332
- ax_roc.set_title(f'Receiver Operating Characteristic{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
332
+ # use "ROC" if extra title, else use "Receiver Operating Characteristic" title
333
+ if plot_title.strip():
334
+ ax_roc.set_title(f'ROC{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
335
+ else:
336
+ ax_roc.set_title(f'Receiver Operating Characteristic', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
333
337
  ax_roc.set_xlabel('False Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
334
338
  ax_roc.set_ylabel('True Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
335
339
 
@@ -351,7 +355,11 @@ def classification_metrics(save_dir: Union[str, Path],
351
355
  ap_score = average_precision_score(y_true_binary, y_score)
352
356
  fig_pr, ax_pr = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
353
357
  ax_pr.plot(recall, precision, label=f'Avg Precision = {ap_score:.2f}', color=format_config.ROC_PR_line)
354
- ax_pr.set_title(f'Precision-Recall Curve{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
358
+ # Use "PR Curve" if extra title, else use "Precision-Recall Curve" title
359
+ if plot_title.strip():
360
+ ax_pr.set_title(f'PR Curve{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
361
+ else:
362
+ ax_pr.set_title(f'Precision-Recall Curve', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
355
363
  ax_pr.set_xlabel('Recall', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
356
364
  ax_pr.set_ylabel('Precision', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
357
365
 
@@ -170,9 +170,13 @@ class DragonParetoOptimizer:
170
170
  re_evaluate=False # model is deterministic
171
171
  )
172
172
 
173
- def run(self) -> pd.DataFrame:
173
+ def run(self,
174
+ plots_and_log: bool=True) -> pd.DataFrame:
174
175
  """
175
176
  Execute the optimization with progress tracking and periodic logging.
177
+
178
+ Args:
179
+ plots_and_log (bool): If True, generates plots and logs during optimization. Disable for multi-run scenarios.
176
180
 
177
181
  Returns:
178
182
  pd.DataFrame: A DataFrame containing the non-dominated solutions (Pareto Front).
@@ -189,9 +193,10 @@ class DragonParetoOptimizer:
189
193
  _LOGGER.info(f"🧬 Starting NSGA-II (GeneticAlgorithm) for {generations} generations...")
190
194
 
191
195
  # Initialize log file
192
- with open(log_file, "w") as f:
193
- f.write(f"Pareto Optimization Log - {generations} Generations\n")
194
- f.write("=" * 60 + "\n")
196
+ if plots_and_log:
197
+ with open(log_file, "w") as f:
198
+ f.write(f"Pareto Optimization Log - {generations} Generations\n")
199
+ f.write("=" * 60 + "\n")
195
200
 
196
201
  # History tracking for visualization
197
202
  history_records = []
@@ -201,43 +206,44 @@ class DragonParetoOptimizer:
201
206
  for gen in range(1, generations + 1):
202
207
  self.algorithm.step()
203
208
 
204
- # Capture stats for history (every generation for smooth plots)
205
- current_evals = self.algorithm.population.evals.clone() # type: ignore
206
-
207
- gen_stats = {}
208
- for i, target_name in enumerate(self.ordered_target_names):
209
- vals = current_evals[:, i]
210
- v_mean = float(vals.mean())
211
- v_min = float(vals.min())
212
- v_max = float(vals.max())
209
+ if plots_and_log:
210
+ # Capture stats for history (every generation for smooth plots)
211
+ current_evals = self.algorithm.population.evals.clone() # type: ignore
213
212
 
214
- # Store for plotting
215
- history_records.append({
216
- "Generation": gen,
217
- "Target": target_name,
218
- "Mean": v_mean,
219
- "Min": v_min,
220
- "Max": v_max
221
- })
213
+ gen_stats = {}
214
+ for i, target_name in enumerate(self.ordered_target_names):
215
+ vals = current_evals[:, i]
216
+ v_mean = float(vals.mean())
217
+ v_min = float(vals.min())
218
+ v_max = float(vals.max())
219
+
220
+ # Store for plotting
221
+ history_records.append({
222
+ "Generation": gen,
223
+ "Target": target_name,
224
+ "Mean": v_mean,
225
+ "Min": v_min,
226
+ "Max": v_max
227
+ })
228
+
229
+ gen_stats[target_name] = (v_mean, v_min, v_max)
222
230
 
223
- gen_stats[target_name] = (v_mean, v_min, v_max)
224
-
225
- # Periodic Logging of Population Stats to FILE
226
- if gen % log_interval == 0 or gen == generations:
227
- stats_msg = [f"Gen {gen}:"]
228
- for t_name, (v_mean, v_min, v_max) in gen_stats.items():
229
- stats_msg.append(f"{t_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
230
-
231
- log_line = " | ".join(stats_msg)
232
-
233
- # Write to file
234
- with open(log_file, "a") as f:
235
- f.write(log_line + "\n")
231
+ # Periodic Logging of Population Stats to FILE
232
+ if gen % log_interval == 0 or gen == generations:
233
+ stats_msg = [f"Gen {gen}:"]
234
+ for t_name, (v_mean, v_min, v_max) in gen_stats.items():
235
+ stats_msg.append(f"{t_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
236
+
237
+ log_line = " | ".join(stats_msg)
238
+
239
+ # Write to file
240
+ with open(log_file, "a") as f:
241
+ f.write(log_line + "\n")
236
242
 
237
243
  pbar.update(1)
238
244
 
239
245
  # --- Post-Optimization Visualization ---
240
- if history_records:
246
+ if plots_and_log and history_records:
241
247
  _LOGGER.debug("Generating optimization history plots...")
242
248
  history_df = pd.DataFrame(history_records)
243
249
  self._plot_optimization_history(history_df, save_path)
@@ -308,19 +314,21 @@ class DragonParetoOptimizer:
308
314
  _LOGGER.info(f"Optimization complete. Found {len(pareto_df)} non-dominated solutions.")
309
315
 
310
316
  # --- Plotting ---
311
- self._generate_plots(pareto_df, save_path)
317
+ if plots_and_log:
318
+ self._generate_plots(pareto_df, save_path)
312
319
 
313
320
  return pareto_df
314
321
 
315
322
  def save_solutions(self,
323
+ csv_if_exists: Literal['fail', 'replace', 'append'] = 'replace',
316
324
  save_to_sql: bool = False,
317
325
  sql_table_name: Optional[str] = None,
318
326
  sql_if_exists: Literal['fail', 'replace', 'append'] = 'replace') -> None:
319
327
  """
320
- Saves the current Pareto front to a CSV file, with optional integer rounding
321
- for specific continuous columns. Optionally saves to a SQL database.
328
+ Saves the current Pareto front to a CSV file. Optionally saves to a SQL database.
322
329
 
323
330
  Args:
331
+ csv_if_exists (str): Behavior if CSV file exists ('fail', 'replace', 'append').
324
332
  save_to_sql (bool): If True, also writes the results to a SQLite database in the save_dir.
325
333
  sql_table_name (str, optional): Specific table name for SQL. If None, uses the solutions filename.
326
334
  sql_if_exists (str): Behavior if SQL table exists ('fail', 'replace', 'append').
@@ -377,9 +385,24 @@ class DragonParetoOptimizer:
377
385
  # sanitize filename and add extension if missing
378
386
  sanitized_filename = sanitize_filename(filename)
379
387
  csv_filename = sanitized_filename if sanitized_filename.lower().endswith(".csv") else f"{sanitized_filename}.csv"
388
+ full_csv_path = save_path / csv_filename
380
389
 
381
- save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename, verbose=1)
382
- _LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'. Shape: {df_to_save.shape}")
390
+ # Logic to handle Append/Fail/Replace for CSV
391
+ if csv_if_exists == 'append' and full_csv_path.exists():
392
+ try:
393
+ # Append mode: write without header, index=False to match standard data exports
394
+ df_to_save.to_csv(full_csv_path, mode='a', header=False, index=False)
395
+ _LOGGER.info(f"💾 Pareto solutions APPENDED to CSV: '{save_path.name}/{csv_filename}'. Added {len(df_to_save)} rows.")
396
+ except Exception as e:
397
+ _LOGGER.error(f"Failed to append CSV: {e}")
398
+ raise e
399
+ elif csv_if_exists == 'fail' and full_csv_path.exists():
400
+ _LOGGER.error(f"File '{full_csv_path}' already exists and csv_if_exists='fail'.")
401
+ raise FileExistsError()
402
+ else:
403
+ # Default 'replace' or new file creation using the existing utility
404
+ save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename, verbose=1)
405
+ _LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'. Shape: {df_to_save.shape}")
383
406
 
384
407
  # Save optimization bounds as JSON for reference (debug mode)
385
408
  if self._debug:
@@ -404,13 +427,13 @@ class DragonParetoOptimizer:
404
427
  save_json(
405
428
  data=bounds_data,
406
429
  directory=save_path,
407
- filename="all_optimization_bounds.json",
430
+ filename="all_debug_optimization_bounds.json",
408
431
  verbose=False
409
432
  )
410
- _LOGGER.info(f"💾 Optimization bounds saved to: '{save_path.name}/all_optimization_bounds.json'")
433
+ _LOGGER.info(f"💾 Optimization bounds saved to: '{save_path.name}/all_debug_optimization_bounds.json'")
411
434
 
412
435
  except Exception as e:
413
- _LOGGER.warning(f"Failed to save optimization bounds to JSON: {e}")
436
+ _LOGGER.warning(f"Failed to save debug optimization bounds to JSON: {e}")
414
437
 
415
438
  # --- 2. Save SQL (Optional) ---
416
439
  if save_to_sql:
@@ -636,7 +659,7 @@ class DragonParetoOptimizer:
636
659
  z_target: Union[int, str],
637
660
  hue_target: Optional[Union[int, str]] = None):
638
661
  """
639
- Public API to generate 3D visualizations for specific targets.
662
+ Generate 3D visualizations for specific targets.
640
663
 
641
664
  Args:
642
665
  x_target (int|str): Index or name of the target for the X axis.
@@ -36,6 +36,7 @@ from ._features import (
36
36
  from ._schema_ops import (
37
37
  finalize_feature_schema,
38
38
  apply_feature_schema,
39
+ reconstruct_from_schema
39
40
  )
40
41
 
41
42
  from .._core import _imprimir_disponibles
@@ -62,6 +63,7 @@ __all__ = [
62
63
  "encode_categorical_features",
63
64
  "finalize_feature_schema",
64
65
  "apply_feature_schema",
66
+ "reconstruct_from_schema",
65
67
  "match_and_filter_columns_by_regex",
66
68
  "standardize_percentages",
67
69
  "reconstruct_one_hot",
@@ -9,6 +9,13 @@ from .._core import get_logger
9
9
  _LOGGER = get_logger("Data Exploration: Schema Ops")
10
10
 
11
11
 
12
+ __all__ = [
13
+ "finalize_feature_schema",
14
+ "apply_feature_schema",
15
+ "reconstruct_from_schema",
16
+ ]
17
+
18
+
12
19
  def finalize_feature_schema(
13
20
  df_features: pd.DataFrame,
14
21
  categorical_mappings: Optional[dict[str, dict[str, int]]]
@@ -86,7 +93,7 @@ def apply_feature_schema(
86
93
  schema: FeatureSchema,
87
94
  targets: Optional[list[str]] = None,
88
95
  unknown_value: int = 99999,
89
- verbose: bool = True
96
+ verbose: int = 3
90
97
  ) -> pd.DataFrame:
91
98
  """
92
99
  Aligns the input DataFrame with the provided FeatureSchema.
@@ -100,7 +107,7 @@ def apply_feature_schema(
100
107
  targets (list[str] | None): Optional list of target column names.
101
108
  unknown_value (int): Integer value to assign to unknown categorical levels.
102
109
  Defaults to 99999 to avoid collision with existing categories.
103
- verbose (bool): If True, logs info about dropped extra columns.
110
+ verbose (int): Verbosity level for logging. Higher values produce more detailed logs.
104
111
 
105
112
  Returns:
106
113
  pd.DataFrame: A new DataFrame with the exact column order and encoding defined by the schema.
@@ -147,7 +154,8 @@ def apply_feature_schema(
147
154
  # Handle Unknown Categories
148
155
  if df_processed[col_name].isnull().any():
149
156
  n_missing = df_processed[col_name].isnull().sum()
150
- _LOGGER.warning(f"Feature '{col_name}': Found {n_missing} unknown categories. Mapping to {unknown_value}.")
157
+ if verbose >= 1:
158
+ _LOGGER.warning(f"Feature '{col_name}': Found {n_missing} unknown categories. Mapping to {unknown_value}.")
151
159
 
152
160
  # Fill unknowns with the specified integer
153
161
  df_processed[col_name] = df_processed[col_name].fillna(unknown_value)
@@ -159,14 +167,13 @@ def apply_feature_schema(
159
167
 
160
168
  extra_cols = set(df_processed.columns) - set(final_column_order)
161
169
  if extra_cols:
162
- _LOGGER.info(f"Dropping {len(extra_cols)} extra columns not present in schema.")
163
- if verbose:
164
- for extra_column in extra_cols:
165
- print(f" - Dropping column: '{extra_column}'")
170
+ if verbose >= 1:
171
+ _LOGGER.warning(f"Dropping {len(extra_cols)} extra columns not present in schema: {extra_cols}")
166
172
 
167
173
  df_final = df_processed[final_column_order]
168
174
 
169
- _LOGGER.info(f"Schema applied successfully. Final shape: {df_final.shape}")
175
+ if verbose >= 2:
176
+ _LOGGER.info(f"Schema applied successfully. Final shape: {df_final.shape}")
170
177
 
171
178
  # df_final should be a dataframe
172
179
  if isinstance(df_final, pd.Series):
@@ -174,3 +181,95 @@ def apply_feature_schema(
174
181
 
175
182
  return df_final
176
183
 
184
+
185
+ def reconstruct_from_schema(
186
+ df: pd.DataFrame,
187
+ schema: FeatureSchema,
188
+ targets: Optional[list[str]] = None,
189
+ verbose: int = 3
190
+ ) -> pd.DataFrame:
191
+ """
192
+ Reverses the schema application to make data human-readable.
193
+
194
+ This function decodes categorical features back to their string representations
195
+ using the schema's mappings. It strictly enforces the schema structure,
196
+ ignoring extra columns (unless they are specified as targets).
197
+
198
+ Args:
199
+ df (pd.DataFrame): The input DataFrame containing encoded features.
200
+ schema (FeatureSchema): The schema defining feature names and reverse mappings.
201
+ targets (list[str] | None): Optional list of target column names to preserve. These are not decoded and kept in the order specified here.
202
+ verbose (int): Verbosity level for logging info about the process.
203
+
204
+ Returns:
205
+ pd.DataFrame: A new DataFrame with the exact column order (features + targets),
206
+ with categorical features decoded to strings.
207
+
208
+ Raises:
209
+ ValueError: If any required feature or target column is missing.
210
+ """
211
+ # 1. Setup
212
+ df_decoded = df.copy()
213
+ targets = targets if targets is not None else []
214
+
215
+ # 2. Validation: Strict Column Presence
216
+ # Check Features
217
+ missing_features = [col for col in schema.feature_names if col not in df_decoded.columns]
218
+ if missing_features:
219
+ _LOGGER.error(f"Schema Reconstruction Mismatch: Missing required features: {missing_features}")
220
+ raise ValueError()
221
+
222
+ # Check Targets
223
+ if targets:
224
+ missing_targets = [col for col in targets if col not in df_decoded.columns]
225
+ if missing_targets:
226
+ _LOGGER.error(f"Schema Reconstruction Mismatch: Missing required targets: {missing_targets}")
227
+ raise ValueError()
228
+
229
+ # 3. Reorder and Filter (Drop extra columns early)
230
+ # The valid columns are Features + Targets
231
+ valid_columns = list(schema.feature_names) + targets
232
+
233
+ extra_cols = set(df_decoded.columns) - set(valid_columns)
234
+ if extra_cols:
235
+ if verbose >= 1:
236
+ _LOGGER.warning(f"Dropping extra columns not present in schema or targets: {extra_cols}")
237
+
238
+ # Enforce order: Features first, then Targets
239
+ df_decoded = df_decoded[valid_columns]
240
+
241
+ # 4. Reverse Categorical Encoding
242
+ if schema.categorical_feature_names and schema.categorical_mappings:
243
+ for col_name in schema.categorical_feature_names:
244
+ if col_name not in schema.categorical_mappings:
245
+ continue
246
+
247
+ forward_mapping = schema.categorical_mappings[col_name]
248
+ # Create reverse map: {int: str}
249
+ reverse_mapping = {v: k for k, v in forward_mapping.items()}
250
+
251
+ # --- SAFE TYPE CASTING ---
252
+ # Ensure values are Integers before mapping (handle 5.0 vs 5).
253
+ try:
254
+ if pd.api.types.is_numeric_dtype(df_decoded[col_name]):
255
+ df_decoded[col_name] = df_decoded[col_name].astype("Int64")
256
+ except (TypeError, ValueError):
257
+ # casted to NaN later during mapping
258
+ pass
259
+ # -------------------------
260
+
261
+ # Check for unknown codes before mapping
262
+ if verbose >= 1:
263
+ unique_codes = df_decoded[col_name].dropna().unique()
264
+ unknown_codes = [code for code in unique_codes if code not in reverse_mapping]
265
+ if unknown_codes:
266
+ _LOGGER.warning(f"Feature '{col_name}': Found unknown encoded values {unknown_codes}. These will be mapped to NaN.")
267
+
268
+ # Apply reverse mapping
269
+ df_decoded[col_name] = df_decoded[col_name].map(reverse_mapping)
270
+
271
+ if verbose >= 2:
272
+ _LOGGER.info(f"Schema reconstruction successful. Final shape: {df_decoded.shape}")
273
+
274
+ return df_decoded
275
+
ml_tools/keys/_keys.py CHANGED
@@ -297,7 +297,7 @@ class _EvaluationConfig:
297
297
  # large sizes for SVG layout to accommodate large fonts
298
298
  REGRESSION_PLOT_SIZE = (12, 8)
299
299
  SEQUENCE_PLOT_SIZE = (12, 8)
300
- CLASSIFICATION_PLOT_SIZE = (10, 10)
300
+ CLASSIFICATION_PLOT_SIZE = (9, 9)
301
301
  # Loss plot
302
302
  LOSS_PLOT_SIZE = (18, 9)
303
303
  LOSS_PLOT_LABEL_SIZE = 24
@@ -202,13 +202,39 @@ class FeatureSchema(NamedTuple):
202
202
  filename=DatasetKeys.CATEGORICAL_NAMES,
203
203
  verbose=verbose)
204
204
 
205
- def save_artifacts(self, directory: Union[str,Path]):
205
+ def save_description(self, directory: Union[str, Path], verbose: bool = False) -> None:
206
+ """
207
+ Saves the schema's description to a .txt file.
208
+
209
+ Args:
210
+ directory: The directory where the file will be saved.
211
+ verbose: If True, prints a confirmation message upon saving.
212
+ """
213
+ dir_path = make_fullpath(directory, make=True, enforce="directory")
214
+ filename = "FeatureSchema-description.txt"
215
+ file_path = dir_path / filename
216
+
217
+ try:
218
+ with open(file_path, "w", encoding="utf-8") as f:
219
+ f.write(str(self))
220
+
221
+ if verbose:
222
+ _LOGGER.info(f"Schema description saved to '{dir_path.name}/{filename}'")
223
+ except IOError as e:
224
+ _LOGGER.error(f"Failed to save schema description: {e}")
225
+ raise e
226
+
227
+ def save_artifacts(self, directory: Union[str,Path], verbose: bool=True):
206
228
  """
207
229
  Saves feature names, categorical feature names, continuous feature names to separate text files.
208
230
  """
209
- self.save_all_features(directory=directory, verbose=True)
210
- self.save_continuous_features(directory=directory, verbose=True)
211
- self.save_categorical_features(directory=directory, verbose=True)
231
+ self.save_all_features(directory=directory, verbose=False)
232
+ self.save_continuous_features(directory=directory, verbose=False)
233
+ self.save_categorical_features(directory=directory, verbose=False)
234
+ self.save_description(directory=directory, verbose=False)
235
+
236
+ if verbose:
237
+ _LOGGER.info(f"All FeatureSchema artifacts saved to directory: '{directory}'")
212
238
 
213
239
  def __repr__(self) -> str:
214
240
  """Returns a concise representation of the schema's contents."""