PyPI - dragon-ml-toolbox - Versions diffs - 20.3.0__py3-none-any.whl → 20.5.0__py3-none-any.whl - Mend

dragon-ml-toolbox 20.3.0py3-none-any.whl → 20.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 20.3.0
+Version: 20.5.0
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-20.3.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-20.3.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-20.5.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-20.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/ETL_cleaning/__init__.py,sha256=8dsHiguUkI6Ix1759IPdGU3IXcjMz4DyaSCkdYhxxg8,490
@@ -11,7 +11,7 @@ ml_tools/ETL_engineering/_dragon_engineering.py,sha256=D-D6tmhyQ3I9-cXgxLVVbQBRT
 ml_tools/ETL_engineering/_transforms.py,sha256=qOxa_vjh3gzS4IiGFqq_0Wnh0ilQO41jRiIp-6Ej4vw,47079
 ml_tools/GUI_tools/_GUI_tools.py,sha256=vjiBbiU3qCxB4rivBWHNBnq-NhpDZZERslkmi_61WxY,48987
 ml_tools/GUI_tools/__init__.py,sha256=zjUFxE3AnYPtd_Ptq7UCGQH5bXAM324t03rGQtcYLo4,372
-ml_tools/IO_tools/_IO_loggers.py,sha256=ytX7toElLb4jzqTk_Lcb3EzeHHWdD82mVXDheKfmML4,9001
+ml_tools/IO_tools/_IO_loggers.py,sha256=wgIuCPl5WzQ0rzYrIovMsIufciXwRr17Dqxu0y9G2xY,9200
 ml_tools/IO_tools/_IO_save_load.py,sha256=xVeQzrd4r-L9ruFPvO8cV3bvzYHhJ0coOfZMrNWq5rs,4426
 ml_tools/IO_tools/_IO_utils.py,sha256=quOqBVSi_z0AI7qznCNAcLRB_f4kaI-abANngXUBcYA,4384
 ml_tools/IO_tools/__init__.py,sha256=Klu0Qf0qNfb7SHG33cs6qqecH5lEnYlXfe_xYdZ1Ry4,474
@@ -39,7 +39,7 @@ ml_tools/ML_datasetmaster/_datasetmaster.py,sha256=Oy2UE3YJpKTaFwQF5TkQLgLB54-BF
 ml_tools/ML_datasetmaster/_sequence_datasetmaster.py,sha256=cW3fuILZWs-7Yuo4T2fgGfTC4vwho3Gp4ohIKJYS7O0,18452
 ml_tools/ML_datasetmaster/_vision_datasetmaster.py,sha256=kvSqXYeNBN1JSRfSEEXYeIcsqy9HsJAl_EwFWClqlsw,67025
 ml_tools/ML_evaluation/__init__.py,sha256=e3c8JNP0tt4Kxc7QSQpGcOgrxf8JAucH4UkJvJxUL2E,1122
-ml_tools/ML_evaluation/_classification.py,sha256=SmhIxJy81iIFrys36LUoKrH5ey9IqzE-UxR2-tdgseI,28396
+ml_tools/ML_evaluation/_classification.py,sha256=xXCh87RE9_VXYalc7l6CbakYfB0rijGrY76RZIrqLBk,28922
 ml_tools/ML_evaluation/_feature_importance.py,sha256=mTwi3LKom_axu6UFKunELj30APDdhG9GQC2w7I9mYhI,17137
 ml_tools/ML_evaluation/_loss.py,sha256=1a4O25i3Ya_3naNZNL7ELLUL46BY86g1scA7d7q2UFM,3625
 ml_tools/ML_evaluation/_regression.py,sha256=hnT2B2_6AnQ7aA7uk-X2lZL9G5JFGCduDXyZbr1gFCA,11037
@@ -76,7 +76,7 @@ ml_tools/ML_models_vision/_image_classification.py,sha256=miwMNoTXpmmZSiqeXvDKpx
 ml_tools/ML_models_vision/_image_segmentation.py,sha256=NRjn91bDD2OJWSJFrrNW9s41qgg5w7pw68Q61-kg-As,4157
 ml_tools/ML_models_vision/_object_detection.py,sha256=AOGER5bx0REc-FfBtspJmyLJxn3GdwDSPwFGveobR94,5608
 ml_tools/ML_optimization/__init__.py,sha256=No18Dsw6Q9zPt8B9fpG0bWomuXmwDC7DiokiaPuwmRI,485
-ml_tools/ML_optimization/_multi_dragon.py,sha256=oUTscfoySSypgX2_7wfkDzjJ60Y93utSW2OZvAGTGE0,37494
+ml_tools/ML_optimization/_multi_dragon.py,sha256=zQhDxFY8FNxUlcbSnHMVArfojzYjgNa21jSE3pJmRW0,38956
 ml_tools/ML_optimization/_single_dragon.py,sha256=jh5-SK6NKAzbheQhquiYoROozk-RzUv1jiFkIzK_AFg,7288
 ml_tools/ML_optimization/_single_manual.py,sha256=h-_k9JmRqPkjTra1nu7AyYbSyWkYZ1R3utiNmW06WFs,21809
 ml_tools/ML_scaler/_ML_scaler.py,sha256=P75X0Sx8N-VxC2Qy8aG7mWaZlkTfjspiZDi1YiMQD1I,8872
@@ -103,12 +103,12 @@ ml_tools/_core/__init__.py,sha256=m-VP0RW0tOTm9N5NI3kFNcpM7WtVgs0RK9pK3ZJRZQQ,14
 ml_tools/_core/_logger.py,sha256=xzhn_FouMDRVNwXGBGlPC9Ruq6i5uCrmNaS5jesguMU,4972
 ml_tools/_core/_schema_load_ops.py,sha256=KLs9vBzANz5ESe2wlP-C41N4VlgGil-ywcfvWKSOGss,1551
 ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHzQ,234
-ml_tools/data_exploration/__init__.py,sha256=ahCjELrum2aIj_cLK-sdGbJjTvvolf3US_oaB97rOQg,1736
+ml_tools/data_exploration/__init__.py,sha256=nYKg1bPBgXibC5nhmNKPw3VaKFeVtlNGL_YpHixW-Pg,1795
 ml_tools/data_exploration/_analysis.py,sha256=H6LryV56FFCHWjvQdkhZbtprZy6aP8EqU_hC2Cf9CLE,7832
 ml_tools/data_exploration/_cleaning.py,sha256=pAZOXgGK35j7O8q6cnyTwYK1GLNnD04A8p2fSyMB1mg,20906
 ml_tools/data_exploration/_features.py,sha256=wW-M8n2aLIy05DR2z4fI8wjpPjn3mOAnm9aSGYbMKwI,23363
 ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
-ml_tools/data_exploration/_schema_ops.py,sha256=PoFeHaS9dXI9gfL0SRD-8uSP4owqmbQFbtfA-HxkLnY,7108
+ml_tools/data_exploration/_schema_ops.py,sha256=Fd6fBGGv4OpxmJ1HG9pith6QL90z0tzssCvzkQxlEEQ,11083
 ml_tools/ensemble_evaluation/__init__.py,sha256=t4Gr8EGEk8RLatyc92-S0BzbQvdvodzoF-qDAH2qjVg,546
 ml_tools/ensemble_evaluation/_ensemble_evaluation.py,sha256=-sX9cLMaa0FOQDikmVv2lsCYtQ56Kftd3tILnNej0Hg,28346
 ml_tools/ensemble_inference/__init__.py,sha256=VMX-Kata2V0UmiURIU2jx6mRuZmvTWf-QXzCpHmVGZA,255
@@ -118,7 +118,7 @@ ml_tools/ensemble_learning/_ensemble_learning.py,sha256=MHDZBR20_nStlSSeThFI3bSu
 ml_tools/excel_handler/__init__.py,sha256=AaWM3n_dqBhJLTs3OEA57ex5YykKXNOwVCyHlVsdnqI,530
 ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8WxDOfQ4sgxxLs,13965
 ml_tools/keys/__init__.py,sha256=-0c2pmrhyfROc-oQpEjJGLBMhSagA3CyFijQaaqZRqU,399
-ml_tools/keys/_keys.py,sha256=DLP0BYibRueM_8Dz9pSbWUpKypcimFL5kmXUl4wSwdU,9292
+ml_tools/keys/_keys.py,sha256=kBcW3euNmD57_4aoRaAeqJP3FtU3iSuvgYv-BZqnEWw,9290
 ml_tools/math_utilities/__init__.py,sha256=K7Obkkc4rPKj4EbRZf1BsXHfiCg7FXYv_aN9Yc2Z_Vg,400
 ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
 ml_tools/optimization_tools/__init__.py,sha256=KD8JXpfGuPndO4AHnjJGu6uV1GRwhOfboD0KZV45kzw,658
@@ -130,14 +130,14 @@ ml_tools/path_manager/_path_tools.py,sha256=LcZE31QlkzZWUR8g1MW_N_mPY2DpKBJLA45V
 ml_tools/plot_fonts/__init__.py,sha256=KIxXRCjQ3SliEoLhEcqs7zDVZbVTn38bmSdL-yR1Q2w,187
 ml_tools/plot_fonts/_plot_fonts.py,sha256=mfjXNT9P59ymHoTI85Q8CcvfxfK5BIFBWtTZH-hNIC4,2209
 ml_tools/schema/__init__.py,sha256=K6uiZ9f0GCQ7etw1yl2-dQVLhU7RkL3KHesO3HNX6v4,334
-ml_tools/schema/_feature_schema.py,sha256=aVY3AJt1j4D2mtusVy2l6lDR2SYzPMyfvG1o9zOn0Kw,8585
+ml_tools/schema/_feature_schema.py,sha256=MuPf6Nf7tDhUTGyX7tcFHZh-lLSNsJkLmlf9IxdF4O4,9660
 ml_tools/schema/_gui_schema.py,sha256=IVwN4THAdFrvh2TpV4SFd_zlzMX3eioF-w-qcSVTndE,7245
 ml_tools/serde/__init__.py,sha256=IDirr8i-qjUHB71hmHO6lGiODhUoOnUcXYrvb_XgrzE,292
 ml_tools/serde/_serde.py,sha256=8QnYK8ZG21zdNaC0v63iSz2bhgwOKRKAWxTVQvMV0A8,5525
 ml_tools/utilities/__init__.py,sha256=iQb-S5JesEjGGI8983Vkj-14LCtchFxdWRhaziyvnoY,808
 ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
 ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
-dragon_ml_toolbox-20.3.0.dist-info/METADATA,sha256=pPP6u48MGLf2mBIMTn6ddq7knczywo8sm9XO3iF9Zzg,7866
-dragon_ml_toolbox-20.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-20.3.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-20.3.0.dist-info/RECORD,,
+dragon_ml_toolbox-20.5.0.dist-info/METADATA,sha256=sf0thvyXG1fpiAdeFpjiTdsZBkdVEECxdTDz0oGFgv8,7866
+dragon_ml_toolbox-20.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-20.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-20.5.0.dist-info/RECORD,,

ml_tools/IO_tools/_IO_loggers.py CHANGED Viewed

@@ -170,7 +170,7 @@ def _log_dict_to_json(data: dict[Any, Any], path: Path) -> None:
 def train_logger(train_config: Union[dict, Any],
-                 model_parameters: Union[dict, Any],
+                 model_parameters: Union[dict, Any, None],
                  train_history: Union[dict, None],
                  save_directory: Union[str, Path],
                  verbose: int = 3) -> None:
@@ -179,7 +179,7 @@ def train_logger(train_config: Union[dict, Any],
     Args:
         train_config (dict | Any): Training configuration parameters. If object, must have a `.to_log()` method returning a dict.
-        model_parameters (dict | Any): Model parameters. If object, must have a `.to_log()` method returning a dict.
+        model_parameters (dict | Any | None): Model parameters. If object, must have a `.to_log()` method returning a dict.
         train_history (dict | None): Training history log.
         save_directory (str | Path): Directory to save the log file.
     """
@@ -201,23 +201,27 @@ def train_logger(train_config: Union[dict, Any],
         train_config_dict = train_config
-    # model_parameters should be a dict or a custom object with the ".to_log()" method
-    if not isinstance(model_parameters, dict):
-        if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
-            model_parameters_dict: dict = model_parameters.to_log()
-            if not isinstance(model_parameters_dict, dict):
-                _LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
+    # model_parameters should be a dict or a custom object with the ".to_log()" method or None
+    model_parameters_dict = {}
+    if model_parameters is not None:
+        if not isinstance(model_parameters, dict):
+            if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
+                params_result: dict = model_parameters.to_log()
+                if not isinstance(params_result, dict):
+                    _LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
+                    raise ValueError()
+                model_parameters_dict = params_result
+            else:
+                _LOGGER.error("'model_parameters' must be a dict, None, or an object with a 'to_log()' method.")
                 raise ValueError()
         else:
-            _LOGGER.error("'model_parameters' must be a dict or an object with a 'to_log()' method.")
-            raise ValueError()
-    else:
-        # check for empty dict
-        if not model_parameters:
-            _LOGGER.error("'model_parameters' dictionary is empty.")
-            raise ValueError()
-        model_parameters_dict = model_parameters
+            # check for empty dict
+            if not model_parameters:
+                _LOGGER.error("'model_parameters' dictionary is empty.")
+                raise ValueError()
+            model_parameters_dict = model_parameters
     # make base dictionary
     data: dict = train_config_dict | model_parameters_dict

ml_tools/ML_evaluation/_classification.py CHANGED Viewed

@@ -329,7 +329,11 @@ def classification_metrics(save_dir: Union[str, Path],
             fig_roc, ax_roc = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
             ax_roc.plot(fpr, tpr, label=f'AUC = {auc:.2f}', color=format_config.ROC_PR_line)
             ax_roc.plot([0, 1], [0, 1], 'k--')
-            ax_roc.set_title(f'Receiver Operating Characteristic{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
+            # use "ROC" if extra title, else use "Receiver Operating Characteristic" title
+            if plot_title.strip():
+                ax_roc.set_title(f'ROC{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
+            else:
+                ax_roc.set_title(f'Receiver Operating Characteristic', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
             ax_roc.set_xlabel('False Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
             ax_roc.set_ylabel('True Positive Rate', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
@@ -351,7 +355,11 @@ def classification_metrics(save_dir: Union[str, Path],
             ap_score = average_precision_score(y_true_binary, y_score)
             fig_pr, ax_pr = plt.subplots(figsize=CLASSIFICATION_PLOT_SIZE, dpi=DPI_value)
             ax_pr.plot(recall, precision, label=f'Avg Precision = {ap_score:.2f}', color=format_config.ROC_PR_line)
-            ax_pr.set_title(f'Precision-Recall Curve{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
+            # Use "PR Curve" if extra title, else use "Precision-Recall Curve" title
+            if plot_title.strip():
+                ax_pr.set_title(f'PR Curve{plot_title}', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
+            else:
+                ax_pr.set_title(f'Precision-Recall Curve', pad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size + 2)
             ax_pr.set_xlabel('Recall', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)
             ax_pr.set_ylabel('Precision', labelpad=_EvaluationConfig.LABEL_PADDING, fontsize=format_config.font_size)

ml_tools/ML_optimization/_multi_dragon.py CHANGED Viewed

@@ -170,9 +170,13 @@ class DragonParetoOptimizer:
             re_evaluate=False # model is deterministic
         )
-    def run(self) -> pd.DataFrame:
+    def run(self,
+            plots_and_log: bool=True) -> pd.DataFrame:
         """
         Execute the optimization with progress tracking and periodic logging.
+        Args:
+            plots_and_log (bool): If True, generates plots and logs during optimization. Disable for multi-run scenarios.
         Returns:
             pd.DataFrame: A DataFrame containing the non-dominated solutions (Pareto Front).
@@ -189,9 +193,10 @@ class DragonParetoOptimizer:
         _LOGGER.info(f"🧬 Starting NSGA-II (GeneticAlgorithm) for {generations} generations...")
         # Initialize log file
-        with open(log_file, "w") as f:
-            f.write(f"Pareto Optimization Log - {generations} Generations\n")
-            f.write("=" * 60 + "\n")
+        if plots_and_log:
+            with open(log_file, "w") as f:
+                f.write(f"Pareto Optimization Log - {generations} Generations\n")
+                f.write("=" * 60 + "\n")
         # History tracking for visualization
         history_records = []
@@ -201,43 +206,44 @@ class DragonParetoOptimizer:
             for gen in range(1, generations + 1):
                 self.algorithm.step()
-                # Capture stats for history (every generation for smooth plots)
-                current_evals = self.algorithm.population.evals.clone() # type: ignore
-                gen_stats = {}
-                for i, target_name in enumerate(self.ordered_target_names):
-                    vals = current_evals[:, i]
-                    v_mean = float(vals.mean())
-                    v_min = float(vals.min())
-                    v_max = float(vals.max())
+                if plots_and_log:
+                    # Capture stats for history (every generation for smooth plots)
+                    current_evals = self.algorithm.population.evals.clone() # type: ignore
-                    # Store for plotting
-                    history_records.append({
-                        "Generation": gen,
-                        "Target": target_name,
-                        "Mean": v_mean,
-                        "Min": v_min,
-                        "Max": v_max
-                    })
+                    gen_stats = {}
+                    for i, target_name in enumerate(self.ordered_target_names):
+                        vals = current_evals[:, i]
+                        v_mean = float(vals.mean())
+                        v_min = float(vals.min())
+                        v_max = float(vals.max())
+                        # Store for plotting
+                        history_records.append({
+                            "Generation": gen,
+                            "Target": target_name,
+                            "Mean": v_mean,
+                            "Min": v_min,
+                            "Max": v_max
+                        })
+                        gen_stats[target_name] = (v_mean, v_min, v_max)
-                    gen_stats[target_name] = (v_mean, v_min, v_max)
-                # Periodic Logging of Population Stats to FILE
-                if gen % log_interval == 0 or gen == generations:
-                    stats_msg = [f"Gen {gen}:"]
-                    for t_name, (v_mean, v_min, v_max) in gen_stats.items():
-                        stats_msg.append(f"{t_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
-                    log_line = " | ".join(stats_msg)
-                    # Write to file
-                    with open(log_file, "a") as f:
-                        f.write(log_line + "\n")
+                    # Periodic Logging of Population Stats to FILE
+                    if gen % log_interval == 0 or gen == generations:
+                        stats_msg = [f"Gen {gen}:"]
+                        for t_name, (v_mean, v_min, v_max) in gen_stats.items():
+                            stats_msg.append(f"{t_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
+                        log_line = " | ".join(stats_msg)
+                        # Write to file
+                        with open(log_file, "a") as f:
+                            f.write(log_line + "\n")
                 pbar.update(1)
         # --- Post-Optimization Visualization ---
-        if history_records:
+        if plots_and_log and history_records:
             _LOGGER.debug("Generating optimization history plots...")
             history_df = pd.DataFrame(history_records)
             self._plot_optimization_history(history_df, save_path)
@@ -308,19 +314,21 @@ class DragonParetoOptimizer:
         _LOGGER.info(f"Optimization complete. Found {len(pareto_df)} non-dominated solutions.")
         # --- Plotting ---
-        self._generate_plots(pareto_df, save_path)
+        if plots_and_log:
+            self._generate_plots(pareto_df, save_path)
         return pareto_df
     def save_solutions(self,
+                       csv_if_exists: Literal['fail', 'replace', 'append'] = 'replace',
                        save_to_sql: bool = False,
                        sql_table_name: Optional[str] = None,
                        sql_if_exists: Literal['fail', 'replace', 'append'] = 'replace') -> None:
         """
-        Saves the current Pareto front to a CSV file, with optional integer rounding
-        for specific continuous columns. Optionally saves to a SQL database.
+        Saves the current Pareto front to a CSV file. Optionally saves to a SQL database.
         Args:
+            csv_if_exists (str): Behavior if CSV file exists ('fail', 'replace', 'append').
             save_to_sql (bool): If True, also writes the results to a SQLite database in the save_dir.
             sql_table_name (str, optional): Specific table name for SQL. If None, uses the solutions filename.
             sql_if_exists (str): Behavior if SQL table exists ('fail', 'replace', 'append').
@@ -377,9 +385,24 @@ class DragonParetoOptimizer:
         # sanitize filename and add extension if missing
         sanitized_filename = sanitize_filename(filename)
         csv_filename = sanitized_filename if sanitized_filename.lower().endswith(".csv") else f"{sanitized_filename}.csv"
+        full_csv_path = save_path / csv_filename
-        save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename, verbose=1)
-        _LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'. Shape: {df_to_save.shape}")
+        # Logic to handle Append/Fail/Replace for CSV
+        if csv_if_exists == 'append' and full_csv_path.exists():
+            try:
+                # Append mode: write without header, index=False to match standard data exports
+                df_to_save.to_csv(full_csv_path, mode='a', header=False, index=False)
+                _LOGGER.info(f"💾 Pareto solutions APPENDED to CSV: '{save_path.name}/{csv_filename}'. Added {len(df_to_save)} rows.")
+            except Exception as e:
+                _LOGGER.error(f"Failed to append CSV: {e}")
+                raise e
+        elif csv_if_exists == 'fail' and full_csv_path.exists():
+            _LOGGER.error(f"File '{full_csv_path}' already exists and csv_if_exists='fail'.")
+            raise FileExistsError()
+        else:
+            # Default 'replace' or new file creation using the existing utility
+            save_dataframe_filename(df=df_to_save, save_dir=save_path, filename=csv_filename, verbose=1)
+            _LOGGER.info(f"💾 Pareto solutions saved to CSV: '{save_path.name}/{csv_filename}'. Shape: {df_to_save.shape}")
         # Save optimization bounds as JSON for reference (debug mode)
         if self._debug:
@@ -404,13 +427,13 @@ class DragonParetoOptimizer:
                 save_json(
                     data=bounds_data,
                     directory=save_path,
-                    filename="all_optimization_bounds.json",
+                    filename="all_debug_optimization_bounds.json",
                     verbose=False
                 )
-                _LOGGER.info(f"💾 Optimization bounds saved to: '{save_path.name}/all_optimization_bounds.json'")
+                _LOGGER.info(f"💾 Optimization bounds saved to: '{save_path.name}/all_debug_optimization_bounds.json'")
             except Exception as e:
-                _LOGGER.warning(f"Failed to save optimization bounds to JSON: {e}")
+                _LOGGER.warning(f"Failed to save debug optimization bounds to JSON: {e}")
         # --- 2. Save SQL (Optional) ---
         if save_to_sql:
@@ -636,7 +659,7 @@ class DragonParetoOptimizer:
                        z_target: Union[int, str],
                        hue_target: Optional[Union[int, str]] = None):
         """
-        Public API to generate 3D visualizations for specific targets.
+        Generate 3D visualizations for specific targets.
         Args:
             x_target (int|str): Index or name of the target for the X axis.

ml_tools/data_exploration/__init__.py CHANGED Viewed

@@ -36,6 +36,7 @@ from ._features import (
 from ._schema_ops import (
     finalize_feature_schema,
     apply_feature_schema,
+    reconstruct_from_schema
 )
 from .._core import _imprimir_disponibles
@@ -62,6 +63,7 @@ __all__ = [
     "encode_categorical_features",
     "finalize_feature_schema",
     "apply_feature_schema",
+    "reconstruct_from_schema",
     "match_and_filter_columns_by_regex",
     "standardize_percentages",
     "reconstruct_one_hot",

ml_tools/data_exploration/_schema_ops.py CHANGED Viewed

@@ -9,6 +9,13 @@ from .._core import get_logger
 _LOGGER = get_logger("Data Exploration: Schema Ops")
+__all__ = [
+    "finalize_feature_schema",
+    "apply_feature_schema",
+    "reconstruct_from_schema",
+]
 def finalize_feature_schema(
     df_features: pd.DataFrame,
     categorical_mappings: Optional[dict[str, dict[str, int]]]
@@ -86,7 +93,7 @@ def apply_feature_schema(
     schema: FeatureSchema,
     targets: Optional[list[str]] = None,
     unknown_value: int = 99999,
-    verbose: bool = True
+    verbose: int = 3
 ) -> pd.DataFrame:
     """
     Aligns the input DataFrame with the provided FeatureSchema.
@@ -100,7 +107,7 @@ def apply_feature_schema(
         targets (list[str] | None): Optional list of target column names.
         unknown_value (int): Integer value to assign to unknown categorical levels.
                              Defaults to 99999 to avoid collision with existing categories.
-        verbose (bool): If True, logs info about dropped extra columns.
+        verbose (int): Verbosity level for logging. Higher values produce more detailed logs.
     Returns:
         pd.DataFrame: A new DataFrame with the exact column order and encoding defined by the schema.
@@ -147,7 +154,8 @@ def apply_feature_schema(
             # Handle Unknown Categories
             if df_processed[col_name].isnull().any():
                 n_missing = df_processed[col_name].isnull().sum()
-                _LOGGER.warning(f"Feature '{col_name}': Found {n_missing} unknown categories. Mapping to {unknown_value}.")
+                if verbose >= 1:
+                    _LOGGER.warning(f"Feature '{col_name}': Found {n_missing} unknown categories. Mapping to {unknown_value}.")
                 # Fill unknowns with the specified integer
                 df_processed[col_name] = df_processed[col_name].fillna(unknown_value)
@@ -159,14 +167,13 @@ def apply_feature_schema(
     extra_cols = set(df_processed.columns) - set(final_column_order)
     if extra_cols:
-        _LOGGER.info(f"Dropping {len(extra_cols)} extra columns not present in schema.")
-        if verbose:
-            for extra_column in extra_cols:
-                print(f"  - Dropping column: '{extra_column}'")
+        if verbose >= 1:
+            _LOGGER.warning(f"Dropping {len(extra_cols)} extra columns not present in schema: {extra_cols}")
     df_final = df_processed[final_column_order]
-    _LOGGER.info(f"Schema applied successfully. Final shape: {df_final.shape}")
+    if verbose >= 2:
+        _LOGGER.info(f"Schema applied successfully. Final shape: {df_final.shape}")
     # df_final should be a dataframe
     if isinstance(df_final, pd.Series):
@@ -174,3 +181,95 @@ def apply_feature_schema(
     return df_final
+def reconstruct_from_schema(
+    df: pd.DataFrame,
+    schema: FeatureSchema,
+    targets: Optional[list[str]] = None,
+    verbose: int = 3
+) -> pd.DataFrame:
+    """
+    Reverses the schema application to make data human-readable.
+    This function decodes categorical features back to their string representations
+    using the schema's mappings. It strictly enforces the schema structure,
+    ignoring extra columns (unless they are specified as targets).
+    Args:
+        df (pd.DataFrame): The input DataFrame containing encoded features.
+        schema (FeatureSchema): The schema defining feature names and reverse mappings.
+        targets (list[str] | None): Optional list of target column names to preserve. These are not decoded and kept in the order specified here.
+        verbose (int): Verbosity level for logging info about the process.
+    Returns:
+        pd.DataFrame: A new DataFrame with the exact column order (features + targets),
+                      with categorical features decoded to strings.
+    Raises:
+        ValueError: If any required feature or target column is missing.
+    """
+    # 1. Setup
+    df_decoded = df.copy()
+    targets = targets if targets is not None else []
+    # 2. Validation: Strict Column Presence
+    # Check Features
+    missing_features = [col for col in schema.feature_names if col not in df_decoded.columns]
+    if missing_features:
+        _LOGGER.error(f"Schema Reconstruction Mismatch: Missing required features: {missing_features}")
+        raise ValueError()
+    # Check Targets
+    if targets:
+        missing_targets = [col for col in targets if col not in df_decoded.columns]
+        if missing_targets:
+            _LOGGER.error(f"Schema Reconstruction Mismatch: Missing required targets: {missing_targets}")
+            raise ValueError()
+    # 3. Reorder and Filter (Drop extra columns early)
+    # The valid columns are Features + Targets
+    valid_columns = list(schema.feature_names) + targets
+    extra_cols = set(df_decoded.columns) - set(valid_columns)
+    if extra_cols:
+        if verbose >= 1:
+            _LOGGER.warning(f"Dropping extra columns not present in schema or targets: {extra_cols}")
+    # Enforce order: Features first, then Targets
+    df_decoded = df_decoded[valid_columns]
+    # 4. Reverse Categorical Encoding
+    if schema.categorical_feature_names and schema.categorical_mappings:
+        for col_name in schema.categorical_feature_names:
+            if col_name not in schema.categorical_mappings:
+                continue
+            forward_mapping = schema.categorical_mappings[col_name]
+            # Create reverse map: {int: str}
+            reverse_mapping = {v: k for k, v in forward_mapping.items()}
+            # --- SAFE TYPE CASTING ---
+            # Ensure values are Integers before mapping (handle 5.0 vs 5).
+            try:
+                if pd.api.types.is_numeric_dtype(df_decoded[col_name]):
+                    df_decoded[col_name] = df_decoded[col_name].astype("Int64")
+            except (TypeError, ValueError):
+                # casted to NaN later during mapping
+                pass
+            # -------------------------
+            # Check for unknown codes before mapping
+            if verbose >= 1:
+                unique_codes = df_decoded[col_name].dropna().unique()
+                unknown_codes = [code for code in unique_codes if code not in reverse_mapping]
+                if unknown_codes:
+                    _LOGGER.warning(f"Feature '{col_name}': Found unknown encoded values {unknown_codes}. These will be mapped to NaN.")
+            # Apply reverse mapping
+            df_decoded[col_name] = df_decoded[col_name].map(reverse_mapping)
+    if verbose >= 2:
+        _LOGGER.info(f"Schema reconstruction successful. Final shape: {df_decoded.shape}")
+    return df_decoded

ml_tools/keys/_keys.py CHANGED Viewed

@@ -297,7 +297,7 @@ class _EvaluationConfig:
     # large sizes for SVG layout to accommodate large fonts
     REGRESSION_PLOT_SIZE = (12, 8)
     SEQUENCE_PLOT_SIZE = (12, 8)
-    CLASSIFICATION_PLOT_SIZE = (10, 10)
+    CLASSIFICATION_PLOT_SIZE = (9, 9)
     # Loss plot
     LOSS_PLOT_SIZE = (18, 9)
     LOSS_PLOT_LABEL_SIZE = 24

ml_tools/schema/_feature_schema.py CHANGED Viewed

@@ -202,13 +202,39 @@ class FeatureSchema(NamedTuple):
                           filename=DatasetKeys.CATEGORICAL_NAMES,
                           verbose=verbose)
-    def save_artifacts(self, directory: Union[str,Path]):
+    def save_description(self, directory: Union[str, Path], verbose: bool = False) -> None:
+        """
+        Saves the schema's description to a .txt file.
+        Args:
+            directory: The directory where the file will be saved.
+            verbose: If True, prints a confirmation message upon saving.
+        """
+        dir_path = make_fullpath(directory, make=True, enforce="directory")
+        filename = "FeatureSchema-description.txt"
+        file_path = dir_path / filename
+        try:
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(str(self))
+            if verbose:
+                _LOGGER.info(f"Schema description saved to '{dir_path.name}/{filename}'")
+        except IOError as e:
+            _LOGGER.error(f"Failed to save schema description: {e}")
+            raise e
+    def save_artifacts(self, directory: Union[str,Path], verbose: bool=True):
         """
         Saves feature names, categorical feature names, continuous feature names to separate text files.
         """
-        self.save_all_features(directory=directory, verbose=True)
-        self.save_continuous_features(directory=directory, verbose=True)
-        self.save_categorical_features(directory=directory, verbose=True)
+        self.save_all_features(directory=directory, verbose=False)
+        self.save_continuous_features(directory=directory, verbose=False)
+        self.save_categorical_features(directory=directory, verbose=False)
+        self.save_description(directory=directory, verbose=False)
+        if verbose:
+            _LOGGER.info(f"All FeatureSchema artifacts saved to directory: '{directory}'")
     def __repr__(self) -> str:
         """Returns a concise representation of the schema's contents."""

{dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.3.0.dist-info → dragon_ml_toolbox-20.5.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 20.3.0__py3-none-any.whl → 20.5.0__py3-none-any.whl

dragon-ml-toolbox 20.3.0py3-none-any.whl → 20.5.0py3-none-any.whl