dragon-ml-toolbox 20.3.0__py3-none-any.whl → 20.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/RECORD +13 -13
- ml_tools/IO_tools/_IO_loggers.py +21 -17
- ml_tools/ML_evaluation/_classification.py +10 -2
- ml_tools/ML_optimization/_multi_dragon.py +67 -44
- ml_tools/data_exploration/__init__.py +2 -0
- ml_tools/data_exploration/_schema_ops.py +107 -8
- ml_tools/keys/_keys.py +1 -1
- ml_tools/schema/_feature_schema.py +30 -4
- {dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dragon_ml_toolbox-20.
|
|
2
|
-
dragon_ml_toolbox-20.
|
|
1
|
+
dragon_ml_toolbox-20.5.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
+
dragon_ml_toolbox-20.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
|
|
3
3
|
ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
|
|
5
5
|
ml_tools/ETL_cleaning/__init__.py,sha256=8dsHiguUkI6Ix1759IPdGU3IXcjMz4DyaSCkdYhxxg8,490
|
|
@@ -11,7 +11,7 @@ ml_tools/ETL_engineering/_dragon_engineering.py,sha256=D-D6tmhyQ3I9-cXgxLVVbQBRT
|
|
|
11
11
|
ml_tools/ETL_engineering/_transforms.py,sha256=qOxa_vjh3gzS4IiGFqq_0Wnh0ilQO41jRiIp-6Ej4vw,47079
|
|
12
12
|
ml_tools/GUI_tools/_GUI_tools.py,sha256=vjiBbiU3qCxB4rivBWHNBnq-NhpDZZERslkmi_61WxY,48987
|
|
13
13
|
ml_tools/GUI_tools/__init__.py,sha256=zjUFxE3AnYPtd_Ptq7UCGQH5bXAM324t03rGQtcYLo4,372
|
|
14
|
-
ml_tools/IO_tools/_IO_loggers.py,sha256=
|
|
14
|
+
ml_tools/IO_tools/_IO_loggers.py,sha256=wgIuCPl5WzQ0rzYrIovMsIufciXwRr17Dqxu0y9G2xY,9200
|
|
15
15
|
ml_tools/IO_tools/_IO_save_load.py,sha256=xVeQzrd4r-L9ruFPvO8cV3bvzYHhJ0coOfZMrNWq5rs,4426
|
|
16
16
|
ml_tools/IO_tools/_IO_utils.py,sha256=quOqBVSi_z0AI7qznCNAcLRB_f4kaI-abANngXUBcYA,4384
|
|
17
17
|
ml_tools/IO_tools/__init__.py,sha256=Klu0Qf0qNfb7SHG33cs6qqecH5lEnYlXfe_xYdZ1Ry4,474
|
|
@@ -39,7 +39,7 @@ ml_tools/ML_datasetmaster/_datasetmaster.py,sha256=Oy2UE3YJpKTaFwQF5TkQLgLB54-BF
|
|
|
39
39
|
ml_tools/ML_datasetmaster/_sequence_datasetmaster.py,sha256=cW3fuILZWs-7Yuo4T2fgGfTC4vwho3Gp4ohIKJYS7O0,18452
|
|
40
40
|
ml_tools/ML_datasetmaster/_vision_datasetmaster.py,sha256=kvSqXYeNBN1JSRfSEEXYeIcsqy9HsJAl_EwFWClqlsw,67025
|
|
41
41
|
ml_tools/ML_evaluation/__init__.py,sha256=e3c8JNP0tt4Kxc7QSQpGcOgrxf8JAucH4UkJvJxUL2E,1122
|
|
42
|
-
ml_tools/ML_evaluation/_classification.py,sha256=
|
|
42
|
+
ml_tools/ML_evaluation/_classification.py,sha256=xXCh87RE9_VXYalc7l6CbakYfB0rijGrY76RZIrqLBk,28922
|
|
43
43
|
ml_tools/ML_evaluation/_feature_importance.py,sha256=mTwi3LKom_axu6UFKunELj30APDdhG9GQC2w7I9mYhI,17137
|
|
44
44
|
ml_tools/ML_evaluation/_loss.py,sha256=1a4O25i3Ya_3naNZNL7ELLUL46BY86g1scA7d7q2UFM,3625
|
|
45
45
|
ml_tools/ML_evaluation/_regression.py,sha256=hnT2B2_6AnQ7aA7uk-X2lZL9G5JFGCduDXyZbr1gFCA,11037
|
|
@@ -76,7 +76,7 @@ ml_tools/ML_models_vision/_image_classification.py,sha256=miwMNoTXpmmZSiqeXvDKpx
|
|
|
76
76
|
ml_tools/ML_models_vision/_image_segmentation.py,sha256=NRjn91bDD2OJWSJFrrNW9s41qgg5w7pw68Q61-kg-As,4157
|
|
77
77
|
ml_tools/ML_models_vision/_object_detection.py,sha256=AOGER5bx0REc-FfBtspJmyLJxn3GdwDSPwFGveobR94,5608
|
|
78
78
|
ml_tools/ML_optimization/__init__.py,sha256=No18Dsw6Q9zPt8B9fpG0bWomuXmwDC7DiokiaPuwmRI,485
|
|
79
|
-
ml_tools/ML_optimization/_multi_dragon.py,sha256=
|
|
79
|
+
ml_tools/ML_optimization/_multi_dragon.py,sha256=zQhDxFY8FNxUlcbSnHMVArfojzYjgNa21jSE3pJmRW0,38956
|
|
80
80
|
ml_tools/ML_optimization/_single_dragon.py,sha256=jh5-SK6NKAzbheQhquiYoROozk-RzUv1jiFkIzK_AFg,7288
|
|
81
81
|
ml_tools/ML_optimization/_single_manual.py,sha256=h-_k9JmRqPkjTra1nu7AyYbSyWkYZ1R3utiNmW06WFs,21809
|
|
82
82
|
ml_tools/ML_scaler/_ML_scaler.py,sha256=P75X0Sx8N-VxC2Qy8aG7mWaZlkTfjspiZDi1YiMQD1I,8872
|
|
@@ -103,12 +103,12 @@ ml_tools/_core/__init__.py,sha256=m-VP0RW0tOTm9N5NI3kFNcpM7WtVgs0RK9pK3ZJRZQQ,14
|
|
|
103
103
|
ml_tools/_core/_logger.py,sha256=xzhn_FouMDRVNwXGBGlPC9Ruq6i5uCrmNaS5jesguMU,4972
|
|
104
104
|
ml_tools/_core/_schema_load_ops.py,sha256=KLs9vBzANz5ESe2wlP-C41N4VlgGil-ywcfvWKSOGss,1551
|
|
105
105
|
ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHzQ,234
|
|
106
|
-
ml_tools/data_exploration/__init__.py,sha256=
|
|
106
|
+
ml_tools/data_exploration/__init__.py,sha256=nYKg1bPBgXibC5nhmNKPw3VaKFeVtlNGL_YpHixW-Pg,1795
|
|
107
107
|
ml_tools/data_exploration/_analysis.py,sha256=H6LryV56FFCHWjvQdkhZbtprZy6aP8EqU_hC2Cf9CLE,7832
|
|
108
108
|
ml_tools/data_exploration/_cleaning.py,sha256=pAZOXgGK35j7O8q6cnyTwYK1GLNnD04A8p2fSyMB1mg,20906
|
|
109
109
|
ml_tools/data_exploration/_features.py,sha256=wW-M8n2aLIy05DR2z4fI8wjpPjn3mOAnm9aSGYbMKwI,23363
|
|
110
110
|
ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
|
|
111
|
-
ml_tools/data_exploration/_schema_ops.py,sha256=
|
|
111
|
+
ml_tools/data_exploration/_schema_ops.py,sha256=Fd6fBGGv4OpxmJ1HG9pith6QL90z0tzssCvzkQxlEEQ,11083
|
|
112
112
|
ml_tools/ensemble_evaluation/__init__.py,sha256=t4Gr8EGEk8RLatyc92-S0BzbQvdvodzoF-qDAH2qjVg,546
|
|
113
113
|
ml_tools/ensemble_evaluation/_ensemble_evaluation.py,sha256=-sX9cLMaa0FOQDikmVv2lsCYtQ56Kftd3tILnNej0Hg,28346
|
|
114
114
|
ml_tools/ensemble_inference/__init__.py,sha256=VMX-Kata2V0UmiURIU2jx6mRuZmvTWf-QXzCpHmVGZA,255
|
|
@@ -118,7 +118,7 @@ ml_tools/ensemble_learning/_ensemble_learning.py,sha256=MHDZBR20_nStlSSeThFI3bSu
|
|
|
118
118
|
ml_tools/excel_handler/__init__.py,sha256=AaWM3n_dqBhJLTs3OEA57ex5YykKXNOwVCyHlVsdnqI,530
|
|
119
119
|
ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8WxDOfQ4sgxxLs,13965
|
|
120
120
|
ml_tools/keys/__init__.py,sha256=-0c2pmrhyfROc-oQpEjJGLBMhSagA3CyFijQaaqZRqU,399
|
|
121
|
-
ml_tools/keys/_keys.py,sha256=
|
|
121
|
+
ml_tools/keys/_keys.py,sha256=kBcW3euNmD57_4aoRaAeqJP3FtU3iSuvgYv-BZqnEWw,9290
|
|
122
122
|
ml_tools/math_utilities/__init__.py,sha256=K7Obkkc4rPKj4EbRZf1BsXHfiCg7FXYv_aN9Yc2Z_Vg,400
|
|
123
123
|
ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
|
|
124
124
|
ml_tools/optimization_tools/__init__.py,sha256=KD8JXpfGuPndO4AHnjJGu6uV1GRwhOfboD0KZV45kzw,658
|
|
@@ -130,14 +130,14 @@ ml_tools/path_manager/_path_tools.py,sha256=LcZE31QlkzZWUR8g1MW_N_mPY2DpKBJLA45V
|
|
|
130
130
|
ml_tools/plot_fonts/__init__.py,sha256=KIxXRCjQ3SliEoLhEcqs7zDVZbVTn38bmSdL-yR1Q2w,187
|
|
131
131
|
ml_tools/plot_fonts/_plot_fonts.py,sha256=mfjXNT9P59ymHoTI85Q8CcvfxfK5BIFBWtTZH-hNIC4,2209
|
|
132
132
|
ml_tools/schema/__init__.py,sha256=K6uiZ9f0GCQ7etw1yl2-dQVLhU7RkL3KHesO3HNX6v4,334
|
|
133
|
-
ml_tools/schema/_feature_schema.py,sha256=
|
|
133
|
+
ml_tools/schema/_feature_schema.py,sha256=MuPf6Nf7tDhUTGyX7tcFHZh-lLSNsJkLmlf9IxdF4O4,9660
|
|
134
134
|
ml_tools/schema/_gui_schema.py,sha256=IVwN4THAdFrvh2TpV4SFd_zlzMX3eioF-w-qcSVTndE,7245
|
|
135
135
|
ml_tools/serde/__init__.py,sha256=IDirr8i-qjUHB71hmHO6lGiODhUoOnUcXYrvb_XgrzE,292
|
|
136
136
|
ml_tools/serde/_serde.py,sha256=8QnYK8ZG21zdNaC0v63iSz2bhgwOKRKAWxTVQvMV0A8,5525
|
|
137
137
|
ml_tools/utilities/__init__.py,sha256=iQb-S5JesEjGGI8983Vkj-14LCtchFxdWRhaziyvnoY,808
|
|
138
138
|
ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
|
|
139
139
|
ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
|
|
140
|
-
dragon_ml_toolbox-20.
|
|
141
|
-
dragon_ml_toolbox-20.
|
|
142
|
-
dragon_ml_toolbox-20.
|
|
143
|
-
dragon_ml_toolbox-20.
|
|
140
|
+
dragon_ml_toolbox-20.5.0.dist-info/METADATA,sha256=sf0thvyXG1fpiAdeFpjiTdsZBkdVEECxdTDz0oGFgv8,7866
|
|
141
|
+
dragon_ml_toolbox-20.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
142
|
+
dragon_ml_toolbox-20.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
143
|
+
dragon_ml_toolbox-20.5.0.dist-info/RECORD,,
|
ml_tools/IO_tools/_IO_loggers.py
CHANGED
|
@@ -170,7 +170,7 @@ def _log_dict_to_json(data: dict[Any, Any], path: Path) -> None:
|
|
|
170
170
|
|
|
171
171
|
|
|
172
172
|
def train_logger(train_config: Union[dict, Any],
|
|
173
|
-
model_parameters: Union[dict, Any],
|
|
173
|
+
model_parameters: Union[dict, Any, None],
|
|
174
174
|
train_history: Union[dict, None],
|
|
175
175
|
save_directory: Union[str, Path],
|
|
176
176
|
verbose: int = 3) -> None:
|
|
@@ -179,7 +179,7 @@ def train_logger(train_config: Union[dict, Any],
|
|
|
179
179
|
|
|
180
180
|
Args:
|
|
181
181
|
train_config (dict | Any): Training configuration parameters. If object, must have a `.to_log()` method returning a dict.
|
|
182
|
-
model_parameters (dict | Any): Model parameters. If object, must have a `.to_log()` method returning a dict.
|
|
182
|
+
model_parameters (dict | Any | None): Model parameters. If object, must have a `.to_log()` method returning a dict.
|
|
183
183
|
train_history (dict | None): Training history log.
|
|
184
184
|
save_directory (str | Path): Directory to save the log file.
|
|
185
185
|
"""
|
|
@@ -201,23 +201,27 @@ def train_logger(train_config: Union[dict, Any],
|
|
|
201
201
|
|
|
202
202
|
train_config_dict = train_config
|
|
203
203
|
|
|
204
|
-
# model_parameters should be a dict or a custom object with the ".to_log()" method
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
204
|
+
# model_parameters should be a dict or a custom object with the ".to_log()" method or None
|
|
205
|
+
model_parameters_dict = {}
|
|
206
|
+
|
|
207
|
+
if model_parameters is not None:
|
|
208
|
+
if not isinstance(model_parameters, dict):
|
|
209
|
+
if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
|
|
210
|
+
params_result: dict = model_parameters.to_log()
|
|
211
|
+
if not isinstance(params_result, dict):
|
|
212
|
+
_LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
|
|
213
|
+
raise ValueError()
|
|
214
|
+
model_parameters_dict = params_result
|
|
215
|
+
else:
|
|
216
|
+
_LOGGER.error("'model_parameters' must be a dict, None, or an object with a 'to_log()' method.")
|
|
210
217
|
raise ValueError()
|
|
211
218
|
else:
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
raise ValueError()
|
|
219
|
-
|
|
220
|
-
model_parameters_dict = model_parameters
|
|
219
|
+
# check for empty dict
|
|
220
|
+
if not model_parameters:
|
|
221
|
+
_LOGGER.error("'model_parameters' dictionary is empty.")
|
|
222
|
+
raise ValueError()
|
|
223
|
+
|
|
224
|
+
model_parameters_dict = model_parameters
|
|
221
225
|
|
|
222
226
|
# make base dictionary
|
|
223
227
|
data: dict = train_config_dict | model_parameters_dict
|
|
@@ -329,7 +329,11 @@ def classification_metrics(save_dir: Union[str, Path],
|
|
|
329
329
|
fig_roc, ax_roc = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
|
|
330
330
|
ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=format_config.ROC_PR_line)
|
|
331
331
|
ax_roc.plot([0, 1], [0, 1], 'k--')
|
|
332
|
-
|
|
332
|
+
# use "ROC" if extra title, else use "Receiver Operating Characteristic" title
|
|
333
|
+
if plot_title.strip():
|
|
334
|
+
ax_roc.set_title(f'ROC{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
|
|
335
|
+
else:
|
|
336
|
+
ax_roc.set_title(f'Receiver Operating Characteristic', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
|
|
333
337
|
ax_roc.set_xlabel('False Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
|
|
334
338
|
ax_roc.set_ylabel('True Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
|
|
335
339
|
|
|
@@ -351,7 +355,11 @@ def classification_metrics(save_dir: Union[str, Path],
|
|
|
351
355
|
ap_score = average_precision_score(y_true_binary, y_score)
|
|
352
356
|
fig_pr, ax_pr = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
|
|
353
357
|
ax_pr.plot(recall, precision, label=f'Avg Precision = {ap_score:.2f}', color=format_config.ROC_PR_line)
|
|
354
|
-
|
|
358
|
+
# Use "PR Curve" if extra title, else use "Precision-Recall Curve" title
|
|
359
|
+
if plot_title.strip():
|
|
360
|
+
ax_pr.set_title(f'PR Curve{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
|
|
361
|
+
else:
|
|
362
|
+
ax_pr.set_title(f'Precision-Recall Curve', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
|
|
355
363
|
ax_pr.set_xlabel('Recall', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
|
|
356
364
|
ax_pr.set_ylabel('Precision', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
|
|
357
365
|
|
|
@@ -170,9 +170,13 @@ class DragonParetoOptimizer:
|
|
|
170
170
|
re_evaluate=False # model is deterministic
|
|
171
171
|
)
|
|
172
172
|
|
|
173
|
-
def run(self
|
|
173
|
+
def run(self,
|
|
174
|
+
plots_and_log: bool=True) -> pd.DataFrame:
|
|
174
175
|
"""
|
|
175
176
|
Execute the optimization with progress tracking and periodic logging.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
plots_and_log (bool): If True, generates plots and logs during optimization. Disable for multi-run scenarios.
|
|
176
180
|
|
|
177
181
|
Returns:
|
|
178
182
|
pd.DataFrame: A DataFrame containing the non-dominated solutions (Pareto Front).
|
|
@@ -189,9 +193,10 @@ class DragonParetoOptimizer:
|
|
|
189
193
|
_LOGGER.info(f"🧬 Starting NSGA-II (GeneticAlgorithm) for {generations} generations...")
|
|
190
194
|
|
|
191
195
|
# Initialize log file
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
196
|
+
if plots_and_log:
|
|
197
|
+
with open(log_file, "w") as f:
|
|
198
|
+
f.write(f"Pareto Optimization Log - {generations} Generations\n")
|
|
199
|
+
f.write("=" * 60 + "\n")
|
|
195
200
|
|
|
196
201
|
# History tracking for visualization
|
|
197
202
|
history_records = []
|
|
@@ -201,43 +206,44 @@ class DragonParetoOptimizer:
|
|
|
201
206
|
for gen in range(1, generations + 1):
|
|
202
207
|
self.algorithm.step()
|
|
203
208
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
gen_stats = {}
|
|
208
|
-
for i, target_name in enumerate(self.ordered_target_names):
|
|
209
|
-
vals = current_evals[:, i]
|
|
210
|
-
v_mean = float(vals.mean())
|
|
211
|
-
v_min = float(vals.min())
|
|
212
|
-
v_max = float(vals.max())
|
|
209
|
+
if plots_and_log:
|
|
210
|
+
# Capture stats for history (every generation for smooth plots)
|
|
211
|
+
current_evals = self.algorithm.population.evals.clone() # type: ignore
|
|
213
212
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
213
|
+
gen_stats = {}
|
|
214
|
+
for i, target_name in enumerate(self.ordered_target_names):
|
|
215
|
+
vals = current_evals[:, i]
|
|
216
|
+
v_mean = float(vals.mean())
|
|
217
|
+
v_min = float(vals.min())
|
|
218
|
+
v_max = float(vals.max())
|
|
219
|
+
|
|
220
|
+
# Store for plotting
|
|
221
|
+
history_records.append({
|
|
222
|
+
"Generation": gen,
|
|
223
|
+
"Target": target_name,
|
|
224
|
+
"Mean": v_mean,
|
|
225
|
+
"Min": v_min,
|
|
226
|
+
"Max": v_max
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
gen_stats[target_name] = (v_mean, v_min, v_max)
|
|
222
230
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
with open(log_file, "a") as f:
|
|
235
|
-
f.write(log_line + "\n")
|
|
231
|
+
# Periodic Logging of Population Stats to FILE
|
|
232
|
+
if gen % log_interval == 0 or gen == generations:
|
|
233
|
+
stats_msg = [f"Gen {gen}:"]
|
|
234
|
+
for t_name, (v_mean, v_min, v_max) in gen_stats.items():
|
|
235
|
+
stats_msg.append(f"{t_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
|
|
236
|
+
|
|
237
|
+
log_line = " | ".join(stats_msg)
|
|
238
|
+
|
|
239
|
+
# Write to file
|
|
240
|
+
with open(log_file, "a") as f:
|
|
241
|
+
f.write(log_line + "\n")
|
|
236
242
|
|
|
237
243
|
pbar.update(1)
|
|
238
244
|
|
|
239
245
|
# --- Post-Optimization Visualization ---
|
|
240
|
-
if history_records:
|
|
246
|
+
if plots_and_log and history_records:
|
|
241
247
|
_LOGGER.debug("Generating optimization history plots...")
|
|
242
248
|
history_df = pd.DataFrame(history_records)
|
|
243
249
|
self._plot_optimization_history(history_df, save_path)
|
|
@@ -308,19 +314,21 @@ class DragonParetoOptimizer:
|
|
|
308
314
|
_LOGGER.info(f"Optimization complete. Found {len(pareto_df)} non-dominated solutions.")
|
|
309
315
|
|
|
310
316
|
# --- Plotting ---
|
|
311
|
-
|
|
317
|
+
if plots_and_log:
|
|
318
|
+
self._generate_plots(pareto_df, save_path)
|
|
312
319
|
|
|
313
320
|
return pareto_df
|
|
314
321
|
|
|
315
322
|
def save_solutions(self,
|
|
323
|
+
csv_if_exists: Literal['fail', 'replace', 'append'] = 'replace',
|
|
316
324
|
save_to_sql: bool = False,
|
|
317
325
|
sql_table_name: Optional[str] = None,
|
|
318
326
|
sql_if_exists: Literal['fail', 'replace', 'append'] = 'replace') -> None:
|
|
319
327
|
"""
|
|
320
|
-
Saves the current Pareto front to a CSV file
|
|
321
|
-
for specific continuous columns. Optionally saves to a SQL database.
|
|
328
|
+
Saves the current Pareto front to a CSV file. Optionally saves to a SQL database.
|
|
322
329
|
|
|
323
330
|
Args:
|
|
331
|
+
csv_if_exists (str): Behavior if CSV file exists ('fail', 'replace', 'append').
|
|
324
332
|
save_to_sql (bool): If True, also writes the results to a SQLite database in the save_dir.
|
|
325
333
|
sql_table_name (str, optional): Specific table name for SQL. If None, uses the solutions filename.
|
|
326
334
|
sql_if_exists (str): Behavior if SQL table exists ('fail', 'replace', 'append').
|
|
@@ -377,9 +385,24 @@ class DragonParetoOptimizer:
|
|
|
377
385
|
# sanitize filename and add extension if missing
|
|
378
386
|
sanitized_filename = sanitize_filename(filename)
|
|
379
387
|
csv_filename = sanitized_filename if sanitized_filename.lower().endswith(".csv") else f"{sanitized_filename}.csv"
|
|
388
|
+
full_csv_path = save_path / csv_filename
|
|
380
389
|
|
|
381
|
-
|
|
382
|
-
|
|
390
|
+
# Logic to handle Append/Fail/Replace for CSV
|
|
391
|
+
if csv_if_exists == 'append' and full_csv_path.exists():
|
|
392
|
+
try:
|
|
393
|
+
# Append mode: write without header, index=False to match standard data exports
|
|
394
|
+
df_to_save.to_csv(full_csv_path, mode='a', header=False, index=False)
|
|
395
|
+
_LOGGER.info(f"💾 Pareto solutions APPENDED to CSV: '{save_path.name}/{csv_filename}'. Added {len(df_to_save)} rows.")
|
|
396
|
+
except Exception as e:
|
|
397
|
+
_LOGGER.error(f"Failed to append CSV: {e}")
|
|
398
|
+
raise e
|
|
399
|
+
elif csv_if_exists == 'fail' and full_csv_path.exists():
|
|
400
|
+
_LOGGER.error(f"File '{full_csv_path}' already exists and csv_if_exists='fail'.")
|
|
401
|
+
raise FileExistsError()
|
|
402
|
+
else:
|
|
403
|
+
# Default 'replace' or new file creation using the existing utility
|
|
404
|
+
save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename, verbose=1)
|
|
405
|
+
_LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'. Shape: {df_to_save.shape}")
|
|
383
406
|
|
|
384
407
|
# Save optimization bounds as JSON for reference (debug mode)
|
|
385
408
|
if self._debug:
|
|
@@ -404,13 +427,13 @@ class DragonParetoOptimizer:
|
|
|
404
427
|
save_json(
|
|
405
428
|
data=bounds_data,
|
|
406
429
|
directory=save_path,
|
|
407
|
-
filename="
|
|
430
|
+
filename="all_debug_optimization_bounds.json",
|
|
408
431
|
verbose=False
|
|
409
432
|
)
|
|
410
|
-
_LOGGER.info(f"💾 Optimization bounds saved to: '{save_path.name}/
|
|
433
|
+
_LOGGER.info(f"💾 Optimization bounds saved to: '{save_path.name}/all_debug_optimization_bounds.json'")
|
|
411
434
|
|
|
412
435
|
except Exception as e:
|
|
413
|
-
_LOGGER.warning(f"Failed to save optimization bounds to JSON: {e}")
|
|
436
|
+
_LOGGER.warning(f"Failed to save debug optimization bounds to JSON: {e}")
|
|
414
437
|
|
|
415
438
|
# --- 2. Save SQL (Optional) ---
|
|
416
439
|
if save_to_sql:
|
|
@@ -636,7 +659,7 @@ class DragonParetoOptimizer:
|
|
|
636
659
|
z_target: Union[int, str],
|
|
637
660
|
hue_target: Optional[Union[int, str]] = None):
|
|
638
661
|
"""
|
|
639
|
-
|
|
662
|
+
Generate 3D visualizations for specific targets.
|
|
640
663
|
|
|
641
664
|
Args:
|
|
642
665
|
x_target (int|str): Index or name of the target for the X axis.
|
|
@@ -36,6 +36,7 @@ from ._features import (
|
|
|
36
36
|
from ._schema_ops import (
|
|
37
37
|
finalize_feature_schema,
|
|
38
38
|
apply_feature_schema,
|
|
39
|
+
reconstruct_from_schema
|
|
39
40
|
)
|
|
40
41
|
|
|
41
42
|
from .._core import _imprimir_disponibles
|
|
@@ -62,6 +63,7 @@ __all__ = [
|
|
|
62
63
|
"encode_categorical_features",
|
|
63
64
|
"finalize_feature_schema",
|
|
64
65
|
"apply_feature_schema",
|
|
66
|
+
"reconstruct_from_schema",
|
|
65
67
|
"match_and_filter_columns_by_regex",
|
|
66
68
|
"standardize_percentages",
|
|
67
69
|
"reconstruct_one_hot",
|
|
@@ -9,6 +9,13 @@ from .._core import get_logger
|
|
|
9
9
|
_LOGGER = get_logger("Data Exploration: Schema Ops")
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
__all__ = [
|
|
13
|
+
"finalize_feature_schema",
|
|
14
|
+
"apply_feature_schema",
|
|
15
|
+
"reconstruct_from_schema",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
12
19
|
def finalize_feature_schema(
|
|
13
20
|
df_features: pd.DataFrame,
|
|
14
21
|
categorical_mappings: Optional[dict[str, dict[str, int]]]
|
|
@@ -86,7 +93,7 @@ def apply_feature_schema(
|
|
|
86
93
|
schema: FeatureSchema,
|
|
87
94
|
targets: Optional[list[str]] = None,
|
|
88
95
|
unknown_value: int = 99999,
|
|
89
|
-
verbose:
|
|
96
|
+
verbose: int = 3
|
|
90
97
|
) -> pd.DataFrame:
|
|
91
98
|
"""
|
|
92
99
|
Aligns the input DataFrame with the provided FeatureSchema.
|
|
@@ -100,7 +107,7 @@ def apply_feature_schema(
|
|
|
100
107
|
targets (list[str] | None): Optional list of target column names.
|
|
101
108
|
unknown_value (int): Integer value to assign to unknown categorical levels.
|
|
102
109
|
Defaults to 99999 to avoid collision with existing categories.
|
|
103
|
-
verbose (
|
|
110
|
+
verbose (int): Verbosity level for logging. Higher values produce more detailed logs.
|
|
104
111
|
|
|
105
112
|
Returns:
|
|
106
113
|
pd.DataFrame: A new DataFrame with the exact column order and encoding defined by the schema.
|
|
@@ -147,7 +154,8 @@ def apply_feature_schema(
|
|
|
147
154
|
# Handle Unknown Categories
|
|
148
155
|
if df_processed[col_name].isnull().any():
|
|
149
156
|
n_missing = df_processed[col_name].isnull().sum()
|
|
150
|
-
|
|
157
|
+
if verbose >= 1:
|
|
158
|
+
_LOGGER.warning(f"Feature '{col_name}': Found {n_missing} unknown categories. Mapping to {unknown_value}.")
|
|
151
159
|
|
|
152
160
|
# Fill unknowns with the specified integer
|
|
153
161
|
df_processed[col_name] = df_processed[col_name].fillna(unknown_value)
|
|
@@ -159,14 +167,13 @@ def apply_feature_schema(
|
|
|
159
167
|
|
|
160
168
|
extra_cols = set(df_processed.columns) - set(final_column_order)
|
|
161
169
|
if extra_cols:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
for extra_column in extra_cols:
|
|
165
|
-
print(f" - Dropping column: '{extra_column}'")
|
|
170
|
+
if verbose >= 1:
|
|
171
|
+
_LOGGER.warning(f"Dropping {len(extra_cols)} extra columns not present in schema: {extra_cols}")
|
|
166
172
|
|
|
167
173
|
df_final = df_processed[final_column_order]
|
|
168
174
|
|
|
169
|
-
|
|
175
|
+
if verbose >= 2:
|
|
176
|
+
_LOGGER.info(f"Schema applied successfully. Final shape: {df_final.shape}")
|
|
170
177
|
|
|
171
178
|
# df_final should be a dataframe
|
|
172
179
|
if isinstance(df_final, pd.Series):
|
|
@@ -174,3 +181,95 @@ def apply_feature_schema(
|
|
|
174
181
|
|
|
175
182
|
return df_final
|
|
176
183
|
|
|
184
|
+
|
|
185
|
+
def reconstruct_from_schema(
|
|
186
|
+
df: pd.DataFrame,
|
|
187
|
+
schema: FeatureSchema,
|
|
188
|
+
targets: Optional[list[str]] = None,
|
|
189
|
+
verbose: int = 3
|
|
190
|
+
) -> pd.DataFrame:
|
|
191
|
+
"""
|
|
192
|
+
Reverses the schema application to make data human-readable.
|
|
193
|
+
|
|
194
|
+
This function decodes categorical features back to their string representations
|
|
195
|
+
using the schema's mappings. It strictly enforces the schema structure,
|
|
196
|
+
ignoring extra columns (unless they are specified as targets).
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
df (pd.DataFrame): The input DataFrame containing encoded features.
|
|
200
|
+
schema (FeatureSchema): The schema defining feature names and reverse mappings.
|
|
201
|
+
targets (list[str] | None): Optional list of target column names to preserve. These are not decoded and kept in the order specified here.
|
|
202
|
+
verbose (int): Verbosity level for logging info about the process.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
pd.DataFrame: A new DataFrame with the exact column order (features + targets),
|
|
206
|
+
with categorical features decoded to strings.
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If any required feature or target column is missing.
|
|
210
|
+
"""
|
|
211
|
+
# 1. Setup
|
|
212
|
+
df_decoded = df.copy()
|
|
213
|
+
targets = targets if targets is not None else []
|
|
214
|
+
|
|
215
|
+
# 2. Validation: Strict Column Presence
|
|
216
|
+
# Check Features
|
|
217
|
+
missing_features = [col for col in schema.feature_names if col not in df_decoded.columns]
|
|
218
|
+
if missing_features:
|
|
219
|
+
_LOGGER.error(f"Schema Reconstruction Mismatch: Missing required features: {missing_features}")
|
|
220
|
+
raise ValueError()
|
|
221
|
+
|
|
222
|
+
# Check Targets
|
|
223
|
+
if targets:
|
|
224
|
+
missing_targets = [col for col in targets if col not in df_decoded.columns]
|
|
225
|
+
if missing_targets:
|
|
226
|
+
_LOGGER.error(f"Schema Reconstruction Mismatch: Missing required targets: {missing_targets}")
|
|
227
|
+
raise ValueError()
|
|
228
|
+
|
|
229
|
+
# 3. Reorder and Filter (Drop extra columns early)
|
|
230
|
+
# The valid columns are Features + Targets
|
|
231
|
+
valid_columns = list(schema.feature_names) + targets
|
|
232
|
+
|
|
233
|
+
extra_cols = set(df_decoded.columns) - set(valid_columns)
|
|
234
|
+
if extra_cols:
|
|
235
|
+
if verbose >= 1:
|
|
236
|
+
_LOGGER.warning(f"Dropping extra columns not present in schema or targets: {extra_cols}")
|
|
237
|
+
|
|
238
|
+
# Enforce order: Features first, then Targets
|
|
239
|
+
df_decoded = df_decoded[valid_columns]
|
|
240
|
+
|
|
241
|
+
# 4. Reverse Categorical Encoding
|
|
242
|
+
if schema.categorical_feature_names and schema.categorical_mappings:
|
|
243
|
+
for col_name in schema.categorical_feature_names:
|
|
244
|
+
if col_name not in schema.categorical_mappings:
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
forward_mapping = schema.categorical_mappings[col_name]
|
|
248
|
+
# Create reverse map: {int: str}
|
|
249
|
+
reverse_mapping = {v: k for k, v in forward_mapping.items()}
|
|
250
|
+
|
|
251
|
+
# --- SAFE TYPE CASTING ---
|
|
252
|
+
# Ensure values are Integers before mapping (handle 5.0 vs 5).
|
|
253
|
+
try:
|
|
254
|
+
if pd.api.types.is_numeric_dtype(df_decoded[col_name]):
|
|
255
|
+
df_decoded[col_name] = df_decoded[col_name].astype("Int64")
|
|
256
|
+
except (TypeError, ValueError):
|
|
257
|
+
# casted to NaN later during mapping
|
|
258
|
+
pass
|
|
259
|
+
# -------------------------
|
|
260
|
+
|
|
261
|
+
# Check for unknown codes before mapping
|
|
262
|
+
if verbose >= 1:
|
|
263
|
+
unique_codes = df_decoded[col_name].dropna().unique()
|
|
264
|
+
unknown_codes = [code for code in unique_codes if code not in reverse_mapping]
|
|
265
|
+
if unknown_codes:
|
|
266
|
+
_LOGGER.warning(f"Feature '{col_name}': Found unknown encoded values {unknown_codes}. These will be mapped to NaN.")
|
|
267
|
+
|
|
268
|
+
# Apply reverse mapping
|
|
269
|
+
df_decoded[col_name] = df_decoded[col_name].map(reverse_mapping)
|
|
270
|
+
|
|
271
|
+
if verbose >= 2:
|
|
272
|
+
_LOGGER.info(f"Schema reconstruction successful. Final shape: {df_decoded.shape}")
|
|
273
|
+
|
|
274
|
+
return df_decoded
|
|
275
|
+
|
ml_tools/keys/_keys.py
CHANGED
|
@@ -297,7 +297,7 @@ class _EvaluationConfig:
|
|
|
297
297
|
# large sizes for SVG layout to accommodate large fonts
|
|
298
298
|
REGRESSION_PLOT_SIZE = (12, 8)
|
|
299
299
|
SEQUENCE_PLOT_SIZE = (12, 8)
|
|
300
|
-
CLASSIFICATION_PLOT_SIZE = (
|
|
300
|
+
CLASSIFICATION_PLOT_SIZE = (9, 9)
|
|
301
301
|
# Loss plot
|
|
302
302
|
LOSS_PLOT_SIZE = (18, 9)
|
|
303
303
|
LOSS_PLOT_LABEL_SIZE = 24
|
|
@@ -202,13 +202,39 @@ class FeatureSchema(NamedTuple):
|
|
|
202
202
|
filename=DatasetKeys.CATEGORICAL_NAMES,
|
|
203
203
|
verbose=verbose)
|
|
204
204
|
|
|
205
|
-
def
|
|
205
|
+
def save_description(self, directory: Union[str, Path], verbose: bool = False) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Saves the schema's description to a .txt file.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
directory: The directory where the file will be saved.
|
|
211
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
212
|
+
"""
|
|
213
|
+
dir_path = make_fullpath(directory, make=True, enforce="directory")
|
|
214
|
+
filename = "FeatureSchema-description.txt"
|
|
215
|
+
file_path = dir_path / filename
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
219
|
+
f.write(str(self))
|
|
220
|
+
|
|
221
|
+
if verbose:
|
|
222
|
+
_LOGGER.info(f"Schema description saved to '{dir_path.name}/{filename}'")
|
|
223
|
+
except IOError as e:
|
|
224
|
+
_LOGGER.error(f"Failed to save schema description: {e}")
|
|
225
|
+
raise e
|
|
226
|
+
|
|
227
|
+
def save_artifacts(self, directory: Union[str,Path], verbose: bool=True):
|
|
206
228
|
"""
|
|
207
229
|
Saves feature names, categorical feature names, continuous feature names to separate text files.
|
|
208
230
|
"""
|
|
209
|
-
self.save_all_features(directory=directory, verbose=
|
|
210
|
-
self.save_continuous_features(directory=directory, verbose=
|
|
211
|
-
self.save_categorical_features(directory=directory, verbose=
|
|
231
|
+
self.save_all_features(directory=directory, verbose=False)
|
|
232
|
+
self.save_continuous_features(directory=directory, verbose=False)
|
|
233
|
+
self.save_categorical_features(directory=directory, verbose=False)
|
|
234
|
+
self.save_description(directory=directory, verbose=False)
|
|
235
|
+
|
|
236
|
+
if verbose:
|
|
237
|
+
_LOGGER.info(f"All FeatureSchema artifacts saved to directory: '{directory}'")
|
|
212
238
|
|
|
213
239
|
def __repr__(self) -> str:
|
|
214
240
|
"""Returns a concise representation of the schema's contents."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|