PyPI - dragon-ml-toolbox - Versions diffs - 20.8.0__py3-none-any.whl → 20.9.0__py3-none-any.whl - Mend

dragon-ml-toolbox 20.8.0py3-none-any.whl → 20.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{dragon_ml_toolbox-20.8.0.dist-info → dragon_ml_toolbox-20.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 20.8.0
+Version: 20.9.0
 Summary: Complete pipelines and helper tools for data science and machine learning projects.
 Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-20.8.0.dist-info → dragon_ml_toolbox-20.9.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-dragon_ml_toolbox-20.8.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-20.8.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
+dragon_ml_toolbox-20.9.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-20.9.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
 ml_tools/ETL_cleaning/__init__.py,sha256=gLRHF-qzwpqKTvbbn9chIQELeUDh_XGpBRX28j-5IqI,545
@@ -34,15 +34,16 @@ ml_tools/ML_configuration/_metrics.py,sha256=KJM7HQeoEmJUUUrxNa4wYf2N9NawGPJoy7A
 ml_tools/ML_configuration/_models.py,sha256=lvuuqvD6DWUzOa3i06NZfrdfOi9bu2e26T_QO6BGMSw,7629
 ml_tools/ML_configuration/_training.py,sha256=_M_TwouHFNbGrZQtQNAvyG_poSVpmN99cbyUonZsHhk,8969
 ml_tools/ML_datasetmaster/__init__.py,sha256=UltQzuXnlXVCkD-aeA5TW4IcMVLnQf1_aglawg4WyrI,580
-ml_tools/ML_datasetmaster/_base_datasetmaster.py,sha256=lmqo9CN09xMu-YKYtKEnC2ZEzkxcZFJ0rS1B7K2-PKY,14691
+ml_tools/ML_datasetmaster/_base_datasetmaster.py,sha256=IgyVzRY3mlKDyBDklawvPF9SMjZFu8T2red6M-3MlQ4,16074
 ml_tools/ML_datasetmaster/_datasetmaster.py,sha256=Oy2UE3YJpKTaFwQF5TkQLgLB54-BFw_5b8wIPTxZIKU,19157
 ml_tools/ML_datasetmaster/_sequence_datasetmaster.py,sha256=cW3fuILZWs-7Yuo4T2fgGfTC4vwho3Gp4ohIKJYS7O0,18452
 ml_tools/ML_datasetmaster/_vision_datasetmaster.py,sha256=kvSqXYeNBN1JSRfSEEXYeIcsqy9HsJAl_EwFWClqlsw,67025
 ml_tools/ML_evaluation/__init__.py,sha256=e3c8JNP0tt4Kxc7QSQpGcOgrxf8JAucH4UkJvJxUL2E,1122
-ml_tools/ML_evaluation/_classification.py,sha256=Te5ckLfBCUyb3QO9vZ_mlJF5wS5LoajXC54k1Fkct-U,33938
+ml_tools/ML_evaluation/_classification.py,sha256=0URqIhNEgWedy-SYRmIJ2ejLKqatiuOU7qelJ6Cv3OE,33939
 ml_tools/ML_evaluation/_feature_importance.py,sha256=mTwi3LKom_axu6UFKunELj30APDdhG9GQC2w7I9mYhI,17137
+ml_tools/ML_evaluation/_helpers.py,sha256=kE1TSYIOAAcYI1EjdudyTfFeU47Wrl0E9eNL1EOwbKg,1217
 ml_tools/ML_evaluation/_loss.py,sha256=1a4O25i3Ya_3naNZNL7ELLUL46BY86g1scA7d7q2UFM,3625
-ml_tools/ML_evaluation/_regression.py,sha256=hnT2B2_6AnQ7aA7uk-X2lZL9G5JFGCduDXyZbr1gFCA,11037
+ml_tools/ML_evaluation/_regression.py,sha256=UZA7_fg85ZKJQWszioWDtmkplSiXeHJk2fBYR5bRXHY,11225
 ml_tools/ML_evaluation/_sequence.py,sha256=gUk9Uvmy7MrXkfrriMnfypkgJU5XERHdqekTa2gBaOM,8004
 ml_tools/ML_evaluation/_vision.py,sha256=abBHQ6Z2GunHNusL3wcLgfI1FVNA6hBUBTq1eOA8FSA,11489
 ml_tools/ML_evaluation_captum/_ML_evaluation_captum.py,sha256=6g3ymSxJGHXxwIN7WCD2Zi9zxKWEv-Qskd2cCGQQJ5Y,18439
@@ -118,7 +119,7 @@ ml_tools/ensemble_learning/_ensemble_learning.py,sha256=MHDZBR20_nStlSSeThFI3bSu
 ml_tools/excel_handler/__init__.py,sha256=AaWM3n_dqBhJLTs3OEA57ex5YykKXNOwVCyHlVsdnqI,530
 ml_tools/excel_handler/_excel_handler.py,sha256=TODudmeQgDSdxUKzLfAzizs--VL-g8WxDOfQ4sgxxLs,13965
 ml_tools/keys/__init__.py,sha256=-0c2pmrhyfROc-oQpEjJGLBMhSagA3CyFijQaaqZRqU,399
-ml_tools/keys/_keys.py,sha256=jBhw99SRTlBkb9EFMDLZA86_kaHT4YLxkljDYRCTarE,9389
+ml_tools/keys/_keys.py,sha256=56hlyPl2VUMsq7cFFLBypWHr-JU6ehWGwZG38l6IjI0,9389
 ml_tools/math_utilities/__init__.py,sha256=K7Obkkc4rPKj4EbRZf1BsXHfiCg7FXYv_aN9Yc2Z_Vg,400
 ml_tools/math_utilities/_math_utilities.py,sha256=BYHIVcM9tuKIhVrkgLLiM5QalJ39zx7dXYy_M9aGgiM,9012
 ml_tools/optimization_tools/__init__.py,sha256=KD8JXpfGuPndO4AHnjJGu6uV1GRwhOfboD0KZV45kzw,658
@@ -142,7 +143,7 @@ ml_tools/utilities/__init__.py,sha256=h4lE3SQstg-opcQj6QSKhu-HkqSbmHExsWoM9vC5D9
 ml_tools/utilities/_translate.py,sha256=U8hRPa3PmTpIf9n9yR3gBGmp_hkcsjQLwjAHSHc0WHs,10325
 ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
 ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
-dragon_ml_toolbox-20.8.0.dist-info/METADATA,sha256=EVzUhpCzHarcTicuqc_t4prSuJdXGuCppSX7wnIv1JY,7888
-dragon_ml_toolbox-20.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-20.8.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-20.8.0.dist-info/RECORD,,
+dragon_ml_toolbox-20.9.0.dist-info/METADATA,sha256=ehKhp6BpCkHcZnWpcoZU53rn4T0yI0Dboq3eH2vx8LU,7888
+dragon_ml_toolbox-20.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-20.9.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-20.9.0.dist-info/RECORD,,

ml_tools/ML_datasetmaster/_base_datasetmaster.py CHANGED Viewed

@@ -133,7 +133,7 @@ class _BaseDatasetMaker(ABC):
         # Get continuous feature indices *from the schema*
         if schema.continuous_feature_names:
-            if verbose >= 2:
+            if verbose >= 3:
                 _LOGGER.info("Getting continuous feature indices from schema.")
             try:
                 # Convert columns to a standard list for .index()
@@ -189,7 +189,7 @@ class _BaseDatasetMaker(ABC):
         # ------------------------------------------------------------------
         if self.target_scaler is None:
-            if verbose >= 2:
+            if verbose >= 3:
                 _LOGGER.info("Fitting a new DragonScaler on training targets.")
             # Convert to float tensor for calculation
             y_train_tensor = torch.tensor(y_train_arr, dtype=torch.float32)
@@ -202,6 +202,9 @@ class _BaseDatasetMaker(ABC):
             y_val_tensor = self.target_scaler.transform(torch.tensor(y_val_arr, dtype=torch.float32))
             y_test_tensor = self.target_scaler.transform(torch.tensor(y_test_arr, dtype=torch.float32))
             return y_train_tensor.numpy(), y_val_tensor.numpy(), y_test_tensor.numpy()
+        if verbose >= 2:
+            _LOGGER.info("Target scaling transformation complete.")
         return y_train_arr, y_val_arr, y_test_arr
@@ -214,6 +217,9 @@ class _BaseDatasetMaker(ABC):
     @property
     def train_dataset(self) -> Dataset:
+        """
+        Returns the training dataset.
+        """
         if self._train_ds is None:
             _LOGGER.error("Train Dataset not yet created.")
             raise RuntimeError()
@@ -221,6 +227,9 @@ class _BaseDatasetMaker(ABC):
     @property
     def validation_dataset(self) -> Dataset:
+        """
+        Returns the validation dataset.
+        """
         if self._val_ds is None:
             _LOGGER.error("Validation Dataset not yet created.")
             raise RuntimeError()
@@ -228,6 +237,9 @@ class _BaseDatasetMaker(ABC):
     @property
     def test_dataset(self) -> Dataset:
+        """
+        Returns the test dataset.
+        """
         if self._test_ds is None:
             _LOGGER.error("Test Dataset not yet created.")
             raise RuntimeError()
@@ -235,30 +247,50 @@ class _BaseDatasetMaker(ABC):
     @property
     def feature_names(self) -> list[str]:
+        """
+        Returns a list with the feature names.
+        """
         return self._feature_names
     @property
     def target_names(self) -> list[str]:
+        """
+        Returns a list with the target names.
+        """
         return self._target_names
     @property
     def number_of_features(self) -> int:
+        """
+        Returns the number of features.
+        """
         return len(self._feature_names)
     @property
     def number_of_targets(self) -> int:
+        """
+        Returns the number of targets.
+        """
         return len(self._target_names)
     @property
     def id(self) -> Optional[str]:
+        """
+        Returns the dataset ID if set, otherwise None.
+        """
         return self._id
     @id.setter
     def id(self, dataset_id: str):
-        if not isinstance(dataset_id, str): raise ValueError("ID must be a string.")
+        if not isinstance(dataset_id, str):
+            _LOGGER.error("Dataset ID must be a string.")
+            raise ValueError()
         self._id = dataset_id
     def dataframes_info(self) -> None:
+        """
+        Prints the shapes of the dataframes after the split.
+        """
         print("--- DataFrame Shapes After Split ---")
         print(f"  X_train shape: {self._X_train_shape}, y_train shape: {self._y_train_shape}")
         print(f"  X_val shape:   {self._X_val_shape}, y_val shape:   {self._y_val_shape}")
@@ -266,12 +298,26 @@ class _BaseDatasetMaker(ABC):
         print("------------------------------------")
     def save_feature_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """
+        Saves the feature names to a text file.
+        Args:
+            directory (str | Path): Directory to save the feature names.
+            verbose (bool): Whether to print log messages.
+        """
         save_list_strings(list_strings=self._feature_names,
                           directory=directory,
                           filename=DatasetKeys.FEATURE_NAMES,
                           verbose=verbose)
     def save_target_names(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """
+        Saves the target names to a text file.
+        Args:
+            directory (str | Path): Directory to save the target names.
+            verbose (bool): Whether to print log messages.
+        """
         save_list_strings(list_strings=self._target_names,
                           directory=directory,
                           filename=DatasetKeys.TARGET_NAMES,
@@ -281,6 +327,10 @@ class _BaseDatasetMaker(ABC):
         """
         Saves both feature and target scalers (if they exist) to a single .pth file
         using a dictionary structure.
+        Args:
+            directory (str | Path): Directory to save the scaler.
+            verbose (bool): Whether to print log messages.
         """
         if self.feature_scaler is None and self.target_scaler is None:
             _LOGGER.warning("No scalers (feature or target) were fitted. Nothing to save.")

ml_tools/ML_evaluation/_classification.py CHANGED Viewed

@@ -28,6 +28,8 @@ from ..path_manager import make_fullpath, sanitize_filename
 from .._core import get_logger
 from ..keys._keys import _EvaluationConfig
+from ._helpers import check_and_abbreviate_name
 _LOGGER = get_logger("Classification Metrics")
@@ -85,7 +87,8 @@ def classification_metrics(save_dir: Union[str, Path],
         try:
             sorted_items = sorted(class_map.items(), key=lambda item: item[1])
             map_labels = [item[1] for item in sorted_items]
-            map_display_labels = [item[0] for item in sorted_items]
+            # Abbreviate display labels if needed
+            map_display_labels = [check_and_abbreviate_name(item[0]) for item in sorted_items]
         except Exception as e:
             _LOGGER.warning(f"Could not parse 'class_map': {e}")
             map_labels = None
@@ -397,6 +400,10 @@ def classification_metrics(save_dir: Union[str, Path],
             # --- Step 1: Get binned data directly ---
             # calculates reliability diagram data without needing a temporary plot
             prob_true, prob_pred = calibration_curve(y_true_binary, y_score, n_bins=dynamic_bins)
+            # Anchor the plot to (0,0) and (1,1) to ensure the line spans the full diagonal
+            prob_true = np.concatenate(([0.0], prob_true, [1.0]))
+            prob_pred = np.concatenate(([0.0], prob_pred, [1.0]))
             # --- Step 2: Plot ---
             ax_cal.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
@@ -467,6 +474,9 @@ def multi_label_classification_metrics(
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    # --- Pre-process target names for abbreviation ---
+    target_names = [check_and_abbreviate_name(name) for name in target_names]
     # --- Parse Config or use defaults ---
     if config is None:
         # Create a default config if one wasn't provided
@@ -502,7 +512,7 @@ def multi_label_classification_metrics(
     # print(overall_report)
     overall_report_path = save_dir_path / "classification_report.txt"
     overall_report_path.write_text(overall_report)
     # --- Save Classification Report Heatmap (Multi-label) ---
     try:
          # Generate full report as dict
@@ -566,18 +576,6 @@ def multi_label_classification_metrics(
         pred_i = y_pred[:, i] # Use passed-in y_pred
         prob_i = y_prob[:, i] # Use passed-in y_prob
         sanitized_name = sanitize_filename(name)
-        # if name is too long, just take the first letter of each word. Each word might be separated by space or underscore
-        if len(name) >= _EvaluationConfig.NAME_LIMIT:
-            parts = [w for w in name.replace("_", " ").split() if w]
-            abbr = "".join(p[0].upper() for p in parts)
-            # keep only alpha numeric chars
-            abbr = "".join(ch for ch in abbr if ch.isalnum())
-            if not abbr:
-                # fallback to a sanitized, truncated version of the original name
-                abbr = sanitize_filename(name)[: _EvaluationConfig.NAME_LIMIT]
-            _LOGGER.warning(f"Using abbreviated name '{abbr}' for '{name}' plots.")
-            name = abbr
         # --- Save Classification Report for the label (uses y_pred) ---
         report_text = classification_report(true_i, pred_i)
@@ -726,6 +724,11 @@ def multi_label_classification_metrics(
         # Calculate calibration curve for this specific label
         prob_true, prob_pred = calibration_curve(true_i, prob_i, n_bins=dynamic_bins)
+        # Anchor the plot to (0,0) and (1,1)
+        prob_true = np.concatenate(([0.0], prob_true, [1.0]))
+        prob_pred = np.concatenate(([0.0], prob_pred, [1.0]))
+        # Plot the calibration curve
         ax_cal.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
         ax_cal.plot(prob_pred,
                     prob_true,

ml_tools/ML_evaluation/_helpers.py ADDED Viewed

@@ -0,0 +1,41 @@
+from ..keys._keys import _EvaluationConfig
+from ..path_manager import sanitize_filename
+from .._core import get_logger
+_LOGGER = get_logger("Metrics Helper")
+def check_and_abbreviate_name(name: str) -> str:
+    """
+    Checks if a name exceeds the NAME_LIMIT. If it does, creates an abbreviation
+    (initials of words) or truncates it if the abbreviation is empty.
+    Args:
+        name (str): The original label or target name.
+    Returns:
+        str: The potentially abbreviated name.
+    """
+    limit = _EvaluationConfig.NAME_LIMIT
+    # Strip whitespace
+    name = name.strip()
+    if len(name) <= limit:
+        return name
+    # Attempt abbreviation: First letter of each word (split by space or underscore)
+    parts = [w for w in name.replace("_", " ").split() if w]
+    abbr = "".join(p[0].upper() for p in parts)
+    # Keep only alphanumeric characters
+    abbr = "".join(ch for ch in abbr if ch.isalnum())
+    # Fallback if abbreviation failed or is empty
+    if not abbr:
+        sanitized = sanitize_filename(name)
+        abbr = sanitized[:limit]
+    _LOGGER.warning(f"Label '{name}' is too long. Abbreviating to '{abbr}'.")
+    return abbr

ml_tools/ML_evaluation/_regression.py CHANGED Viewed

@@ -19,6 +19,8 @@ from ..path_manager import make_fullpath, sanitize_filename
 from .._core import get_logger
 from ..keys._keys import _EvaluationConfig
+from ._helpers import check_and_abbreviate_name
 _LOGGER = get_logger("Regression Metrics")
@@ -180,6 +182,9 @@ def multi_target_regression_metrics(
     if y_true.shape[1] != len(target_names):
         _LOGGER.error("Number of target names must match the number of columns in y_true.")
         raise ValueError()
+    # --- Pre-process target names for abbreviation ---
+    target_names = [check_and_abbreviate_name(name) for name in target_names]
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
     metrics_summary = []

ml_tools/keys/_keys.py CHANGED Viewed

@@ -306,7 +306,7 @@ class _EvaluationConfig:
     LOSS_PLOT_LEGEND_SIZE = 24
     # CM settings
     CM_SIZE = (9, 8)    # used for multi label binary classification confusion matrix
-    NAME_LIMIT = 20  # max number of characters for feature/label names in plots
+    NAME_LIMIT = 15  # max number of characters for feature/label names in plots
 class _OneHotOtherPlaceholder:
     """Used internally by GUI_tools."""

{dragon_ml_toolbox-20.8.0.dist-info → dragon_ml_toolbox-20.9.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.8.0.dist-info → dragon_ml_toolbox-20.9.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.8.0.dist-info → dragon_ml_toolbox-20.9.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

File without changes

{dragon_ml_toolbox-20.8.0.dist-info → dragon_ml_toolbox-20.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 20.8.0__py3-none-any.whl → 20.9.0__py3-none-any.whl

dragon-ml-toolbox 20.8.0py3-none-any.whl → 20.9.0py3-none-any.whl