PyPI - dragon-ml-toolbox - Versions diffs - 19.12.0__py3-none-any.whl → 19.12.2__py3-none-any.whl - Mend

dragon-ml-toolbox 19.12.0py3-none-any.whl → 19.12.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{dragon_ml_toolbox-19.12.0.dist-info → dragon_ml_toolbox-19.12.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 19.12.0
+Version: 19.12.2
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-19.12.0.dist-info → dragon_ml_toolbox-19.12.2.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-19.12.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-19.12.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-19.12.2.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-19.12.2.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/ETL_cleaning.py,sha256=cKXyRFaaFs_beAGDnQM54xnML671kq-yJEGjHafW-20,351
 ml_tools/ETL_engineering.py,sha256=cwh1FhtNdUHllUDvho-x3SIVj4KwG_rFQR6VYzWUg0U,898
 ml_tools/GUI_tools.py,sha256=O89rG8WQv6GY1DiphQjIsPzXFCQID6te7q_Sgt1iTkQ,294
@@ -38,7 +38,7 @@ ml_tools/SQL.py,sha256=ZYlY5L-k2mkDckOhNPtJEof2L7ePe_KBpgx55WG5NKs,84
 ml_tools/VIF_factor.py,sha256=xGUbnfhh1eqUiHX-tIpJBn_3Y_h3SOuNfVKkpsQXc7w,184
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
-ml_tools/data_exploration.py,sha256=qgo4hfuLnj-GzEpnOi4AzaX5xFjwM5Ox-uRjMaR4dug,1468
+ml_tools/data_exploration.py,sha256=vwCgOHhpPku2uuRVscrco6sXwkjc3ruOlfO002OQ74M,1468
 ml_tools/ensemble_evaluation.py,sha256=P26vyS2fMV3Pm_4w2MN1z1eS7aVJzYagsyLmqC-Io6Q,468
 ml_tools/ensemble_inference.py,sha256=sl_Dq9KaN0SrtZmyiVrrhWd6lSjdQangSIUUUIFvfj4,178
 ml_tools/ensemble_learning.py,sha256=BLPnpfJWCly-D75mkRP1FE5TExoWAAlAHR89KAzW9iU,336
@@ -54,18 +54,18 @@ ml_tools/utilities.py,sha256=dHNjGPH3Ck9V41IRFbRojE_RW6lACdxrNZz0FxI5SQY,691
 ml_tools/_core/_ETL_cleaning.py,sha256=_pTNKuapNHgWErmxvsXW-2YzCm4BaTshKV627A38RuA,28748
 ml_tools/_core/_ETL_engineering.py,sha256=JgIWrQGyNjmLrbyv5Kh0EHKBLmYlyrGKSnKRxGzxSco,57930
 ml_tools/_core/_GUI_tools.py,sha256=kpvk18Eb4vdLzo-I5mBV1yuwPXs-NJJ01rn-iCXHvIY,49079
-ml_tools/_core/_IO_tools.py,sha256=oWaYa_OVO-8ANVt_a9F1QPMvyOcI2yLbtq7LoVHlqek,16625
+ml_tools/_core/_IO_tools.py,sha256=sEbtzDHkc9GNkXvsFS9ic038LzAW-rxXPuLwtoHXzGw,17107
 ml_tools/_core/_MICE_imputation.py,sha256=64l20duGWt93Q2MbqcWqrA1s99JPRf5AJACb1CZi2xI,21149
 ml_tools/_core/_ML_callbacks.py,sha256=T0PjptlpC75_Tp3bWIMPTYhxsMX-8z4YtDT4FJ3p8jg,27988
 ml_tools/_core/_ML_chaining_inference.py,sha256=vXUPZzuQ2yKU71kkvUsE0xPo0hN-Yu6gfnL0JbXoRjI,7783
 ml_tools/_core/_ML_chaining_utilities.py,sha256=nsYowgRbkIYuzRiHlqsM3tnC3c-8O73CY8DHUF14XL0,19248
-ml_tools/_core/_ML_configuration.py,sha256=hwnDCo9URsFqRCgLuFJhGTtoOqbE1XJreNY8B_3spTg,52693
+ml_tools/_core/_ML_configuration.py,sha256=olRcam2s-Y5oUr8BAcmhwIBQDmaQZm2RHTv5sK2HeOU,53151
 ml_tools/_core/_ML_configuration_pytab.py,sha256=C3e4iScqdRePVDoqnic6xXMOW7DNYqpgTCeaFDyMdL4,3286
 ml_tools/_core/_ML_datasetmaster.py,sha256=yU1BMtzz6XumMWCetVACrRLk7WJQwmYhaQ-VAWu9Ots,32043
 ml_tools/_core/_ML_evaluation.py,sha256=bu8qlYzhWSC1B7wNfCC5TSF-oed-uP8EF7TV45VTiBM,37325
 ml_tools/_core/_ML_evaluation_captum.py,sha256=a69jnghIzE9qppuw2vzTBMdTErnZkDkTA3MPUUYjsS4,19212
 ml_tools/_core/_ML_evaluation_multi.py,sha256=n_AJbKF58DMUrYqJutwPFV5z6sNssDPA1Gl05IfPG5s,23647
-ml_tools/_core/_ML_finalize_handler.py,sha256=0eZ_0N2L5aUUIJUgvhAQ-rbd8XbE9UmNqTKSJq09uTI,6987
+ml_tools/_core/_ML_finalize_handler.py,sha256=1__wG3Jcr9h1a99F-CmHezhEw1_Ojxh3aDHNyJN2S5w,7127
 ml_tools/_core/_ML_inference.py,sha256=5swm2lnsrDLalBnCm7gZPlDucX4yNCq5vn7ck3SW_4Q,29791
 ml_tools/_core/_ML_models.py,sha256=8FUx4-TVghlBF9srh1_5UxovrWPU7YEZ6XXLqwJei88,27974
 ml_tools/_core/_ML_models_advanced.py,sha256=oU6M5FEBMQ9yPp32cziWh3bz8SXRho07vFMC8ZDVcuU,45002
@@ -77,7 +77,7 @@ ml_tools/_core/_ML_sequence_datasetmaster.py,sha256=0YVOPf-y4ZNdgUxropXUWrmInNyG
 ml_tools/_core/_ML_sequence_evaluation.py,sha256=AiPHtZ9DRpE6zL9n3Tp5eGGD9vrYRkLbZ0Nc274mL7I,8069
 ml_tools/_core/_ML_sequence_inference.py,sha256=zd3hBwOtLmjAV4JtdB2qFY9GxhysajFufATdy8fjGTE,16316
 ml_tools/_core/_ML_sequence_models.py,sha256=5qcEYLU6wDePBITnikBrj_H9mCvyJmElKa3HiWGXhZs,5639
-ml_tools/_core/_ML_trainer.py,sha256=EeNqZ0pCWrBxGaYgOVmDxofMBQhV56Bvsj-VuBwBgHQ,117580
+ml_tools/_core/_ML_trainer.py,sha256=ZYDH-P8GJhFe0vpeMtgLS0O3Fz0d4qr8zcTm-C30T1I,117595
 ml_tools/_core/_ML_utilities.py,sha256=elLGD0QYh148_9iNLlqGe1vz-wCFspJa6CWtWTfA3jY,35594
 ml_tools/_core/_ML_vision_datasetmaster.py,sha256=8EsE7luzphVlwBXdOsOwsFfz1D4UIUSEQtqHlM0Vf-o,67084
 ml_tools/_core/_ML_vision_evaluation.py,sha256=BSLf9xrGpaR02Dhkf-fAbgxSpwRjf7DruNIcQadl7qg,11631
@@ -88,7 +88,7 @@ ml_tools/_core/_PSO_optimization.py,sha256=W3g5xw2v2eOUQadv8KHFkt5HNm9AiY3ZUk-Te
 ml_tools/_core/_SQL.py,sha256=zX_8EgYfmLmvvrnL851KMkI4w9kdkjHJ997BTvS5aig,11556
 ml_tools/_core/_VIF_factor.py,sha256=BM0mTowBqt45PXFy9oJLhT9C-CTWWo0TQhgCyWYLHtQ,10457
 ml_tools/_core/__init__.py,sha256=d4IG0OxUXj2HffepzQcYixHlZeuuuDMAFa09H_6LtmU,12
-ml_tools/_core/_data_exploration.py,sha256=-g_e4Lox4LN8c2AfhpcPmnI9TNIZGl84O8hWEVH5asA,77438
+ml_tools/_core/_data_exploration.py,sha256=uynIjMppbr5nFJ-7wag0R0HDQCp2rTXqz_IpgNKKNOM,77508
 ml_tools/_core/_ensemble_evaluation.py,sha256=17lWl4bWLT1BAMv_fhGf2D3wy-F4jx0HgnJ79lYkRuE,28419
 ml_tools/_core/_ensemble_inference.py,sha256=9UpARSETzmqPdQmxqizD768tjkqldxHw1ER_hM9Kx9M,8631
 ml_tools/_core/_ensemble_learning.py,sha256=X8ghbjDOLMENCWdISXLhDlHQtR3C6SW1tkTBAcfRRPY,22016
@@ -104,8 +104,8 @@ ml_tools/_core/_plot_fonts.py,sha256=CjYXW2gZ9AUaGkyX8_WOXXNYs6d1PTK-nEJBrv_Zb2o
 ml_tools/_core/_schema.py,sha256=TM5WVVMoKOvr_Bc2z34sU_gzKlM465PRKTgdZaEOkGY,14076
 ml_tools/_core/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/_core/_serde.py,sha256=tsI4EO2Y7jrBMmbQ1pinDsPOrOg-SaPuB-Dt40q0taE,5609
-ml_tools/_core/_utilities.py,sha256=D7FGyEszcMHxGkMW4aqN7JUwabTICCcQz9qsGtOj97o,22787
-dragon_ml_toolbox-19.12.0.dist-info/METADATA,sha256=MoUoxvRMHfPhedd87f68yWWlPJFEGVFCOa1OpgKwpHE,8193
-dragon_ml_toolbox-19.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-19.12.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-19.12.0.dist-info/RECORD,,
+ml_tools/_core/_utilities.py,sha256=oU-0hBipE96bXox66NG-hFuEMMNkKa9MkAy1yJGCSIA,22779
+dragon_ml_toolbox-19.12.2.dist-info/METADATA,sha256=Nuk7YVRdDotD_TURCpIFKqcDuTlkBs9fpTv-8jCm5aU,8193
+dragon_ml_toolbox-19.12.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-19.12.2.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-19.12.2.dist-info/RECORD,,

ml_tools/_core/_IO_tools.py CHANGED Viewed

@@ -434,8 +434,8 @@ def train_logger(train_config: Union[dict, Any],
     Logs training data to JSON, adding a timestamp to the filename.
     Args:
-        train_config (dict | Any): Training configuration parameters.
-        model_parameters (dict | Any): Model parameters.
+        train_config (dict | Any): Training configuration parameters. If object, must have a `.to_log()` method returning a dict.
+        model_parameters (dict | Any): Model parameters. If object, must have a `.to_log()` method returning a dict.
         train_history (dict | None): Training history log.
         save_directory (str | Path): Directory to save the log file.
     """
@@ -443,6 +443,9 @@ def train_logger(train_config: Union[dict, Any],
     if not isinstance(train_config, dict):
         if hasattr(train_config, "to_log") and callable(getattr(train_config, "to_log")):
             train_config_dict: dict = train_config.to_log()
+            if not isinstance(train_config_dict, dict):
+                _LOGGER.error("'train_config.to_log()' did not return a dictionary.")
+                raise ValueError()
         else:
             _LOGGER.error("'train_config' must be a dict or an object with a 'to_log()' method.")
             raise ValueError()
@@ -458,6 +461,9 @@ def train_logger(train_config: Union[dict, Any],
     if not isinstance(model_parameters, dict):
         if hasattr(model_parameters, "to_log") and callable(getattr(model_parameters, "to_log")):
             model_parameters_dict: dict = model_parameters.to_log()
+            if not isinstance(model_parameters_dict, dict):
+                _LOGGER.error("'model_parameters.to_log()' did not return a dictionary.")
+                raise ValueError()
         else:
             _LOGGER.error("'model_parameters' must be a dict or an object with a 'to_log()' method.")
             raise ValueError()

ml_tools/_core/_ML_configuration.py CHANGED Viewed

@@ -660,18 +660,27 @@ class DragonTrainingConfig(_BaseModelParams):
                  initial_learning_rate: float,
                  batch_size: int,
                  random_state: int = 101,
-                 early_stop_patience: Optional[int] = None,
-                 scheduler_patience: Optional[int] = None,
-                 scheduler_lr_factor: Optional[float] = None,
+                #  early_stop_patience: Optional[int] = None,
+                #  scheduler_patience: Optional[int] = None,
+                #  scheduler_lr_factor: Optional[float] = None,
                  **kwargs: Any) -> None:
+        """
+        Args:
+            validation_size (float): Proportion of data for validation set.
+            test_size (float): Proportion of data for test set.
+            initial_learning_rate (float): Starting learning rate.
+            batch_size (int): Number of samples per training batch.
+            random_state (int): Seed for reproducibility.
+            **kwargs: Additional training parameters as key-value pairs.
+        """
         self.validation_size = validation_size
         self.test_size = test_size
         self.initial_learning_rate = initial_learning_rate
         self.batch_size = batch_size
         self.random_state = random_state
-        self.early_stop_patience = early_stop_patience
-        self.scheduler_patience = scheduler_patience
-        self.scheduler_lr_factor = scheduler_lr_factor
+        # self.early_stop_patience = early_stop_patience
+        # self.scheduler_patience = scheduler_patience
+        # self.scheduler_lr_factor = scheduler_lr_factor
         # Process kwargs with validation
         for key, value in kwargs.items():

ml_tools/_core/_ML_finalize_handler.py CHANGED Viewed

@@ -51,7 +51,7 @@ class FinalizedFileHandler:
         self._initial_sequence: Optional[np.ndarray] = None
         self._target_name: Optional[str] = None
         self._target_names: Optional[list[str]] = None
-        self._model_state_dict: Optional[Any] = None
+        self._model_state_dict: Optional[dict[str, Any]] = None
         # Set warning outputs
         self._verbose: bool=True
@@ -90,7 +90,7 @@ class FinalizedFileHandler:
         else:
             # It is a dict, but missing the keys, assume it is the raw state dict
-            _LOGGER.info(f"File '{pth_path.name}' does not have the required keys for a finalized-file. Treating it as raw PyTorch state dictionary.")
+            _LOGGER.warning(f"File '{pth_path.name}' does not have the required keys for a Dragon-ML finalized-file. Keys found:\n    {list(pth_file_content.keys())}")
             self._model_state_dict = pth_file_content
@@ -113,9 +113,10 @@ class FinalizedFileHandler:
         return self._task
     @property
-    def model_state_dict(self):
+    def model_state_dict(self) -> dict[str, Any]:
         """Returns the model state dictionary."""
-        return self._model_state_dict
+        # No need to check for None, as it is guaranteed to be set in __init__
+        return self._model_state_dict # type: ignore
     @property
     def epoch(self) -> Optional[int]:

ml_tools/_core/_ML_trainer.py CHANGED Viewed

@@ -663,7 +663,7 @@ class DragonTrainer(_BaseDragonTrainer):
         Evaluates the model, routing to the correct evaluation function based on task `kind`.
         Args:
-            model_checkpoint ('auto' | Path | None):
+            model_checkpoint (Path | "best" | "current"):
                 - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
                 - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
                 - If 'current', use the current state of the trained model up the latest trained epoch.
@@ -1608,7 +1608,7 @@ class DragonDetectionTrainer(_BaseDragonTrainer):
         Args:
             save_dir (str | Path): Directory to save all reports and plots.
-            model_checkpoint ('auto' | Path | None):
+            model_checkpoint (Path | "best" | "current"):
                 - Path to a valid checkpoint for the model. The state of the trained model will be overwritten in place.
                 - If 'best', the best checkpoint will be loaded if a DragonModelCheckpoint was provided. The state of the trained model will be overwritten in place.
                 - If 'current', use the current state of the trained model up the latest trained epoch.
@@ -2046,7 +2046,7 @@ class DragonSequenceTrainer(_BaseDragonTrainer):
         Evaluates the model, routing to the correct evaluation function.
         Args:
-            model_checkpoint ('auto' | Path | None):
+            model_checkpoint (Path | "best" | "current"):
                 - Path to a valid checkpoint for the model.
                 - If 'best', the best checkpoint will be loaded.
                 - If 'current', use the current state of the trained model.

ml_tools/_core/_data_exploration.py CHANGED Viewed

@@ -17,7 +17,6 @@ from ._schema import FeatureSchema
 _LOGGER = get_logger("Data Exploration")
-# Keep track of all available tools, show using `info()`
 __all__ = [
     "summarize_dataframe",
     "drop_constant_columns",
@@ -754,6 +753,8 @@ def plot_categorical_vs_target(
     This function is a core EDA step for regression tasks to understand the
     relationship between a categorical independent variable and a continuous
     dependent variable.
+    Plots are saved as individual .svg files in a structured way, with a subdirectory created for each target.
     Args:
         df (pd.DataFrame): The input DataFrame.
@@ -1167,7 +1168,7 @@ def clip_outliers_single(
 def clip_outliers_multi(
     df: pd.DataFrame,
-    clip_dict: Dict[str, Tuple[Union[int, float], Union[int, float]]],
+    clip_dict: Union[Dict[str, Tuple[int, int]], Dict[str, Tuple[float, float]]],
     verbose: bool=False
 ) -> pd.DataFrame:
     """

ml_tools/_core/_utilities.py CHANGED Viewed

@@ -396,7 +396,7 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
     if not isinstance(full_path, Path) or not full_path.suffix.endswith(".csv"):
         _LOGGER.error('A path object pointing to a .csv file must be provided.')
         raise ValueError()
     save_dataframe_filename(df=df,
                             save_dir=full_path.parent,
                             filename=full_path.name)

ml_tools/data_exploration.py CHANGED Viewed

@@ -37,18 +37,18 @@ __all__ = [
     "plot_value_distributions",
     "plot_continuous_vs_target",
     "plot_categorical_vs_target",
-    "encode_categorical_features",
     "split_features_targets",
-    "split_continuous_binary",
+    "encode_categorical_features",
     "clip_outliers_single",
     "clip_outliers_multi",
     "drop_outlier_samples",
     "plot_correlation_heatmap",
+    "finalize_feature_schema",
     "match_and_filter_columns_by_regex",
     "standardize_percentages",
     "reconstruct_one_hot",
     "reconstruct_binary",
     "reconstruct_multibinary",
-    "finalize_feature_schema",
+    "split_continuous_binary",
     "apply_feature_schema",
 ]

{dragon_ml_toolbox-19.12.0.dist-info → dragon_ml_toolbox-19.12.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.12.0.dist-info → dragon_ml_toolbox-19.12.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.12.0.dist-info → dragon_ml_toolbox-19.12.2.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-19.12.0.dist-info → dragon_ml_toolbox-19.12.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 19.12.0__py3-none-any.whl → 19.12.2__py3-none-any.whl

dragon-ml-toolbox 19.12.0py3-none-any.whl → 19.12.2py3-none-any.whl