PyPI - dragon-ml-toolbox - Versions diffs - 14.7.0__py3-none-any.whl → 16.2.0__py3-none-any.whl - Mend

dragon-ml-toolbox 14.7.0py3-none-any.whl → 16.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/METADATA +9 -5
dragon_ml_toolbox-16.2.0.dist-info/RECORD +51 -0
ml_tools/ETL_cleaning.py +20 -20
ml_tools/ETL_engineering.py +23 -25
ml_tools/GUI_tools.py +20 -20
ml_tools/MICE_imputation.py +3 -3
ml_tools/ML_callbacks.py +43 -26
ml_tools/ML_configuration.py +704 -24
ml_tools/ML_datasetmaster.py +235 -280
ml_tools/ML_evaluation.py +144 -39
ml_tools/ML_evaluation_multi.py +103 -35
ml_tools/ML_inference.py +290 -208
ml_tools/ML_models.py +13 -102
ml_tools/ML_models_advanced.py +1 -1
ml_tools/ML_optimization.py +12 -12
ml_tools/ML_scaler.py +11 -11
ml_tools/ML_sequence_datasetmaster.py +341 -0
ml_tools/ML_sequence_evaluation.py +219 -0
ml_tools/ML_sequence_inference.py +391 -0
ml_tools/ML_sequence_models.py +139 -0
ml_tools/ML_trainer.py +1342 -386
ml_tools/ML_utilities.py +1 -1
ml_tools/ML_vision_datasetmaster.py +120 -72
ml_tools/ML_vision_evaluation.py +30 -6
ml_tools/ML_vision_inference.py +129 -152
ml_tools/ML_vision_models.py +1 -1
ml_tools/ML_vision_transformers.py +121 -40
ml_tools/PSO_optimization.py +6 -6
ml_tools/SQL.py +4 -4
ml_tools/{keys.py → _keys.py} +45 -0
ml_tools/_schema.py +1 -1
ml_tools/ensemble_evaluation.py +1 -1
ml_tools/ensemble_inference.py +7 -33
ml_tools/ensemble_learning.py +1 -1
ml_tools/optimization_tools.py +2 -2
ml_tools/path_manager.py +5 -5
ml_tools/utilities.py +1 -2
dragon_ml_toolbox-14.7.0.dist-info/RECORD +0 -49
ml_tools/RNN_forecast.py +0 -56
ml_tools/_ML_vision_recipe.py +0 -88
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_configuration.py CHANGED Viewed

@@ -1,20 +1,45 @@
-from typing import Optional
+from typing import Union, Optional
+import numpy as np
 from ._script_info import _script_info
+from ._logger import _LOGGER
+from .path_manager import sanitize_filename
 __all__ = [
-    "ClassificationMetricsFormat",
-    "MultiClassificationMetricsFormat"
+    "RegressionMetricsFormat",
+    "MultiTargetRegressionMetricsFormat",
+    "BinaryClassificationMetricsFormat",
+    "MultiClassClassificationMetricsFormat",
+    "BinaryImageClassificationMetricsFormat",
+    "MultiClassImageClassificationMetricsFormat",
+    "MultiLabelBinaryClassificationMetricsFormat",
+    "BinarySegmentationMetricsFormat",
+    "MultiClassSegmentationMetricsFormat",
+    "SequenceValueMetricsFormat",
+    "SequenceSequenceMetricsFormat",
+    "FinalizeBinaryClassification",
+    "FinalizeBinarySegmentation",
+    "FinalizeBinaryImageClassification",
+    "FinalizeMultiClassClassification",
+    "FinalizeMultiClassImageClassification",
+    "FinalizeMultiClassSegmentation",
+    "FinalizeMultiLabelBinaryClassification",
+    "FinalizeMultiTargetRegression",
+    "FinalizeRegression",
+    "FinalizeObjectDetection",
+    "FinalizeSequencePrediction"
 ]
+# --- Private base classes ---
-class ClassificationMetricsFormat:
+class _BaseClassificationFormat:
     """
-    Optional configuration for classification tasks, use in the '.evaluate()' method of the MLTrainer.
+    [PRIVATE] Base configuration for single-label classification metrics.
     """
     def __init__(self,
                  cmap: str="Blues",
-                 class_map: Optional[dict[str,int]]=None,
                  ROC_PR_line: str='darkorange',
                  calibration_bins: int=15,
                  font_size: int=16) -> None:
@@ -27,11 +52,6 @@ class ClassificationMetricsFormat:
                 - Sequential options: 'Blues', 'Greens', 'Reds', 'Oranges', 'Purples'
                 - Diverging options: 'coolwarm', 'viridis', 'plasma', 'inferno'
-            class_map (dict[str,int] | None): A dictionary mapping
-                class string names to their integer indices (e.g., {'cat': 0, 'dog': 1}).
-                This is used to label the axes of the confusion matrix and classification
-                report correctly. Defaults to None.
             ROC_PR_line (str): The color name or hex code for the line plotted
                 on the ROC and Precision-Recall curves. Defaults to 'darkorange'.
                 - Common color names: 'darkorange', 'cornflowerblue', 'crimson', 'forestgreen'
@@ -41,9 +61,12 @@ class ClassificationMetricsFormat:
                 creating the calibration (reliability) plot. Defaults to 15.
             font_size (int): The base font size to apply to the plots. Defaults to 16.
+        <br>
+        ## [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
         """
         self.cmap = cmap
-        self.class_map = class_map
         self.ROC_PR_line = ROC_PR_line
         self.calibration_bins = calibration_bins
         self.font_size = font_size
@@ -51,20 +74,18 @@ class ClassificationMetricsFormat:
     def __repr__(self) -> str:
         parts = [
             f"cmap='{self.cmap}'",
-            f"class_map={self.class_map}",
             f"ROC_PR_line='{self.ROC_PR_line}'",
             f"calibration_bins={self.calibration_bins}",
             f"font_size={self.font_size}"
         ]
-        return f"ClassificationMetricsFormat({', '.join(parts)})"
+        return f"{self.__class__.__name__}({', '.join(parts)})"
-class MultiClassificationMetricsFormat:
+class _BaseMultiLabelFormat:
     """
-    Optional configuration for multi-label classification tasks, use in the '.evaluate()' method of the MLTrainer.
+    [PRIVATE] Base configuration for multi-label binary classification metrics.
     """
     def __init__(self,
-                 threshold: float=0.5,
                  ROC_PR_line: str='darkorange',
                  cmap: str = "Blues",
                  font_size: int = 16) -> None:
@@ -72,10 +93,6 @@ class MultiClassificationMetricsFormat:
         Initializes the formatting configuration for multi-label classification metrics.
         Args:
-            threshold (float): The probability threshold (0.0 to 1.0) used
-                to convert sigmoid outputs into binary (0 or 1) predictions for
-                calculating the confusion matrix and overall metrics. Defaults to 0.5.
             ROC_PR_line (str): The color name or hex code for the line plotted
                 on the ROC and Precision-Recall curves (one for each label).
                 Defaults to 'darkorange'.
@@ -88,21 +105,684 @@ class MultiClassificationMetricsFormat:
                 - Diverging options: 'coolwarm', 'viridis', 'plasma', 'inferno'
             font_size (int): The base font size to apply to the plots. Defaults to 16.
+        <br>
+        ## [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
         """
-        self.threshold = threshold
         self.cmap = cmap
         self.ROC_PR_line = ROC_PR_line
         self.font_size = font_size
     def __repr__(self) -> str:
         parts = [
-            f"threshold={self.threshold}",
             f"ROC_PR_line='{self.ROC_PR_line}'",
             f"cmap='{self.cmap}'",
             f"font_size={self.font_size}"
         ]
-        return f"MultiClassificationMetricsFormat({', '.join(parts)})"
+        return f"{self.__class__.__name__}({', '.join(parts)})"
+class _BaseRegressionFormat:
+    """
+    [PRIVATE] Base configuration for regression metrics.
+    """
+    def __init__(self,
+                 font_size: int=16,
+                 scatter_color: str='tab:blue',
+                 scatter_alpha: float=0.6,
+                 ideal_line_color: str='k',
+                 residual_line_color: str='red',
+                 hist_bins: Union[int, str] = 'auto') -> None:
+        """
+        Initializes the formatting configuration for regression metrics.
+        Args:
+            font_size (int): The base font size to apply to the plots. Defaults to 16.
+            scatter_color (str): Matplotlib color for the scatter plot points. Defaults to 'tab:blue'.
+                - Common color names: 'tab:blue', 'crimson', 'forestgreen', '#4682B4'
+            scatter_alpha (float): Alpha transparency for scatter plot points. Defaults to 0.6.
+            ideal_line_color (str): Matplotlib color for the 'ideal' y=x line in the
+                True vs. Predicted plot. Defaults to 'k' (black).
+                - Common color names: 'k', 'red', 'darkgrey', '#FF6347'
+            residual_line_color (str): Matplotlib color for the y=0 line in the
+                Residual plot. Defaults to 'red'.
+                - Common color names: 'red', 'blue', 'k', '#4682B4'
+            hist_bins (int | str): The number of bins for the residuals histogram.
+                Defaults to 'auto' to use seaborn's automatic bin selection.
+                - Options: 'auto', 'sqrt', 10, 20
+        <br>
+        ## [Matplotlib Colors](https://matplotlib.org/stable/users/explain/colors/colors.html)
+        """
+        self.font_size = font_size
+        self.scatter_color = scatter_color
+        self.scatter_alpha = scatter_alpha
+        self.ideal_line_color = ideal_line_color
+        self.residual_line_color = residual_line_color
+        self.hist_bins = hist_bins
+    def __repr__(self) -> str:
+        parts = [
+            f"font_size={self.font_size}",
+            f"scatter_color='{self.scatter_color}'",
+            f"scatter_alpha={self.scatter_alpha}",
+            f"ideal_line_color='{self.ideal_line_color}'",
+            f"residual_line_color='{self.residual_line_color}'",
+            f"hist_bins='{self.hist_bins}'"
+        ]
+        return f"{self.__class__.__name__}({', '.join(parts)})"
+class _BaseSegmentationFormat:
+    """
+    [PRIVATE] Base configuration for segmentation metrics.
+    """
+    def __init__(self,
+                 heatmap_cmap: str = 'viridis',
+                 cm_cmap: str = "Blues",
+                 font_size: int = 16) -> None:
+        """
+        Initializes the formatting configuration for segmentation metrics.
+        Args:
+            heatmap_cmap (str): The matplotlib colormap name for the per-class
+                metrics heatmap. Defaults to "viridis".
+                - Sequential options: 'viridis', 'plasma', 'inferno', 'cividis'
+                - Diverging options: 'coolwarm', 'bwr', 'seismic'
+            cm_cmap (str): The matplotlib colormap name for the pixel-level
+                confusion matrix. Defaults to "Blues".
+                - Sequential options: 'Blues', 'Greens', 'Reds', 'Oranges'
+            font_size (int): The base font size to apply to the plots. Defaults to 16.
+        <br>
+        ## [Matplotlib Colormaps](https://matplotlib.org/stable/users/explain/colors/colormaps.html)
+        """
+        self.heatmap_cmap = heatmap_cmap
+        self.cm_cmap = cm_cmap
+        self.font_size = font_size
+    def __repr__(self) -> str:
+        parts = [
+            f"heatmap_cmap='{self.heatmap_cmap}'",
+            f"cm_cmap='{self.cm_cmap}'",
+            f"font_size={self.font_size}"
+        ]
+        return f"{self.__class__.__name__}({', '.join(parts)})"
+class _BaseSequenceValueFormat:
+    """
+    [PRIVATE] Base configuration for sequence to value metrics.
+    """
+    def __init__(self,
+                 font_size: int=16,
+                 scatter_color: str='tab:blue',
+                 scatter_alpha: float=0.6,
+                 ideal_line_color: str='k',
+                 residual_line_color: str='red',
+                 hist_bins: Union[int, str] = 'auto') -> None:
+        """
+        Initializes the formatting configuration for sequence to value metrics.
+        Args:
+            font_size (int): The base font size to apply to the plots. Defaults to 16.
+            scatter_color (str): Matplotlib color for the scatter plot points. Defaults to 'tab:blue'.
+                - Common color names: 'tab:blue', 'crimson', 'forestgreen', '#4682B4'
+            scatter_alpha (float): Alpha transparency for scatter plot points. Defaults to 0.6.
+            ideal_line_color (str): Matplotlib color for the 'ideal' y=x line in the
+                True vs. Predicted plot. Defaults to 'k' (black).
+                - Common color names: 'k', 'red', 'darkgrey', '#FF6347'
+            residual_line_color (str): Matplotlib color for the y=0 line in the
+                Residual plot. Defaults to 'red'.
+                - Common color names: 'red', 'blue', 'k', '#4682B4'
+            hist_bins (int | str): The number of bins for the residuals histogram.
+                Defaults to 'auto' to use seaborn's automatic bin selection.
+                - Options: 'auto', 'sqrt', 10, 20
+        <br>
+        ## [Matplotlib Colors](https://matplotlib.org/stable/users/explain/colors/colors.html)
+        """
+        self.font_size = font_size
+        self.scatter_color = scatter_color
+        self.scatter_alpha = scatter_alpha
+        self.ideal_line_color = ideal_line_color
+        self.residual_line_color = residual_line_color
+        self.hist_bins = hist_bins
+    def __repr__(self) -> str:
+        parts = [
+            f"font_size={self.font_size}",
+            f"scatter_color='{self.scatter_color}'",
+            f"scatter_alpha={self.scatter_alpha}",
+            f"ideal_line_color='{self.ideal_line_color}'",
+            f"residual_line_color='{self.residual_line_color}'",
+            f"hist_bins='{self.hist_bins}'"
+        ]
+        return f"{self.__class__.__name__}({', '.join(parts)})"
+class _BaseSequenceSequenceFormat:
+    """
+    [PRIVATE] Base configuration for sequence-to-sequence metrics.
+    """
+    def __init__(self,
+                 font_size: int = 16,
+                 plot_figsize: tuple[int, int] = (10, 6),
+                 grid_style: str = '--',
+                 rmse_color: str = 'tab:blue',
+                 rmse_marker: str = 'o-',
+                 mae_color: str = 'tab:orange',
+                 mae_marker: str = 's--'):
+        """
+        Initializes the formatting configuration for seq-to-seq metrics.
+        Args:
+            font_size (int): The base font size to apply to the plots. Defaults to 16.
+            plot_figsize (Tuple[int, int]): Figure size for the plot. Defaults to (10, 6).
+            grid_style (str): Matplotlib linestyle for the plot grid. Defaults to '--'.
+                - Options: '--' (dashed), ':' (dotted), '-.' (dash-dot), '-' (solid)
+            rmse_color (str): Matplotlib color for the RMSE line. Defaults to 'tab:blue'.
+                - Common color names: 'tab:blue', 'crimson', 'forestgreen', '#4682B4'
+            rmse_marker (str): Matplotlib marker style for the RMSE line. Defaults to 'o-'.
+                - Options: 'o-' (circle), 's--' (square), '^:' (triangle), 'x' (x marker)
+            mae_color (str): Matplotlib color for the MAE line. Defaults to 'tab:orange'.
+                - Common color names: 'tab:orange', 'purple', 'black', '#FF6347'
+            mae_marker (str): Matplotlib marker style for the MAE line. Defaults to 's--'.
+                - Options: 's--', 'o-', 'v:', '+' (plus marker)
+        <br>
+        ## [Matplotlib Colors](https://matplotlib.org/stable/users/explain/colors/colors.html)
+        ## [Matplotlib Linestyles](https://matplotlib.org/stable/gallery/lines_bars_and_markers/linestyles.html)
+        ## [Matplotlib Markers](https://matplotlib.org/stable/api/markers_api.html)
+        """
+        self.font_size = font_size
+        self.plot_figsize = plot_figsize
+        self.grid_style = grid_style
+        self.rmse_color = rmse_color
+        self.rmse_marker = rmse_marker
+        self.mae_color = mae_color
+        self.mae_marker = mae_marker
+    def __repr__(self) -> str:
+        parts = [
+            f"font_size={self.font_size}",
+            f"plot_figsize={self.plot_figsize}",
+            f"grid_style='{self.grid_style}'",
+            f"rmse_color='{self.rmse_color}'",
+            f"mae_color='{self.mae_color}'"
+        ]
+        return f"{self.__class__.__name__}({', '.join(parts)})"
+# --- Public API classes ---
+# Regression
+class RegressionMetricsFormat(_BaseRegressionFormat):
+    """
+    Configuration for single-target regression.
+    """
+    def __init__(self,
+                 font_size: int=16,
+                 scatter_color: str='tab:blue',
+                 scatter_alpha: float=0.6,
+                 ideal_line_color: str='k',
+                 residual_line_color: str='red',
+                 hist_bins: Union[int, str] = 'auto') -> None:
+        super().__init__(font_size=font_size,
+                         scatter_color=scatter_color,
+                         scatter_alpha=scatter_alpha,
+                         ideal_line_color=ideal_line_color,
+                         residual_line_color=residual_line_color,
+                         hist_bins=hist_bins)
+# Multitarget regression
+class MultiTargetRegressionMetricsFormat(_BaseRegressionFormat):
+    """
+    Configuration for multi-target regression.
+    """
+    def __init__(self,
+                 font_size: int=16,
+                 scatter_color: str='tab:blue',
+                 scatter_alpha: float=0.6,
+                 ideal_line_color: str='k',
+                 residual_line_color: str='red',
+                 hist_bins: Union[int, str] = 'auto') -> None:
+        super().__init__(font_size=font_size,
+                         scatter_color=scatter_color,
+                         scatter_alpha=scatter_alpha,
+                         ideal_line_color=ideal_line_color,
+                         residual_line_color=residual_line_color,
+                         hist_bins=hist_bins)
+# Classification
+class BinaryClassificationMetricsFormat(_BaseClassificationFormat):
+    """
+    Configuration for binary classification.
+    """
+    def __init__(self,
+                 cmap: str="Blues",
+                 ROC_PR_line: str='darkorange',
+                 calibration_bins: int=15,
+                 font_size: int=16) -> None:
+        super().__init__(cmap=cmap,
+                         ROC_PR_line=ROC_PR_line,
+                         calibration_bins=calibration_bins,
+                         font_size=font_size)
+class MultiClassClassificationMetricsFormat(_BaseClassificationFormat):
+    """
+    Configuration for multi-class classification.
+    """
+    def __init__(self,
+                 cmap: str="Blues",
+                 ROC_PR_line: str='darkorange',
+                 calibration_bins: int=15,
+                 font_size: int=16) -> None:
+        super().__init__(cmap=cmap,
+                         ROC_PR_line=ROC_PR_line,
+                         calibration_bins=calibration_bins,
+                         font_size=font_size)
+class BinaryImageClassificationMetricsFormat(_BaseClassificationFormat):
+    """
+    Configuration for binary image classification.
+    """
+    def __init__(self,
+                 cmap: str="Blues",
+                 ROC_PR_line: str='darkorange',
+                 calibration_bins: int=15,
+                 font_size: int=16) -> None:
+        super().__init__(cmap=cmap,
+                         ROC_PR_line=ROC_PR_line,
+                         calibration_bins=calibration_bins,
+                         font_size=font_size)
+class MultiClassImageClassificationMetricsFormat(_BaseClassificationFormat):
+    """
+    Configuration for multi-class image classification.
+    """
+    def __init__(self,
+                 cmap: str="Blues",
+                 ROC_PR_line: str='darkorange',
+                 calibration_bins: int=15,
+                 font_size: int=16) -> None:
+        super().__init__(cmap=cmap,
+                         ROC_PR_line=ROC_PR_line,
+                         calibration_bins=calibration_bins,
+                         font_size=font_size)
+# Multi-Label classification
+class MultiLabelBinaryClassificationMetricsFormat(_BaseMultiLabelFormat):
+    """
+    Configuration for multi-label binary classification.
+    """
+    def __init__(self,
+                 ROC_PR_line: str='darkorange',
+                 cmap: str = "Blues",
+                 font_size: int = 16) -> None:
+        super().__init__(ROC_PR_line=ROC_PR_line,
+                         cmap=cmap,
+                         font_size=font_size)
+# Segmentation
+class BinarySegmentationMetricsFormat(_BaseSegmentationFormat):
+    """
+    Configuration for binary segmentation.
+    """
+    def __init__(self,
+                 heatmap_cmap: str = 'viridis',
+                 cm_cmap: str = "Blues",
+                 font_size: int = 16) -> None:
+        super().__init__(heatmap_cmap=heatmap_cmap,
+                         cm_cmap=cm_cmap,
+                         font_size=font_size)
+class MultiClassSegmentationMetricsFormat(_BaseSegmentationFormat):
+    """
+    Configuration for multi-class segmentation.
+    """
+    def __init__(self,
+                 heatmap_cmap: str = 'viridis',
+                 cm_cmap: str = "Blues",
+                 font_size: int = 16) -> None:
+        super().__init__(heatmap_cmap=heatmap_cmap,
+                         cm_cmap=cm_cmap,
+                         font_size=font_size)
+# Sequence
+class SequenceValueMetricsFormat(_BaseSequenceValueFormat):
+    """
+    Configuration for sequence-to-value prediction.
+    """
+    def __init__(self,
+                 font_size: int=16,
+                 scatter_color: str='tab:blue',
+                 scatter_alpha: float=0.6,
+                 ideal_line_color: str='k',
+                 residual_line_color: str='red',
+                 hist_bins: Union[int, str] = 'auto') -> None:
+        super().__init__(font_size=font_size,
+                         scatter_color=scatter_color,
+                         scatter_alpha=scatter_alpha,
+                         ideal_line_color=ideal_line_color,
+                         residual_line_color=residual_line_color,
+                         hist_bins=hist_bins)
+class SequenceSequenceMetricsFormat(_BaseSequenceSequenceFormat):
+    """
+    Configuration for sequence-to-sequence prediction.
+    """
+    def __init__(self,
+                 font_size: int = 16,
+                 plot_figsize: tuple[int, int] = (10, 6),
+                 grid_style: str = '--',
+                 rmse_color: str = 'tab:blue',
+                 rmse_marker: str = 'o-',
+                 mae_color: str = 'tab:orange',
+                 mae_marker: str = 's--'):
+        super().__init__(font_size=font_size,
+                         plot_figsize=plot_figsize,
+                         grid_style=grid_style,
+                         rmse_color=rmse_color,
+                         rmse_marker=rmse_marker,
+                         mae_color=mae_color,
+                         mae_marker=mae_marker)
+# -------- Finalize classes --------
+class _FinalizeModelTraining:
+    """
+    Base class for finalizing model training.
+    This class is not intended to be instantiated directly. Instead, use one of its specific subclasses.
+    """
+    def __init__(self,
+                 filename: str,
+                 ) -> None:
+        self.filename = _validate_string(string=filename, attribute_name="filename", extension=".pth")
+        self.target_name: Optional[str] = None
+        self.target_names: Optional[list[str]] = None
+        self.classification_threshold: Optional[float] = None
+        self.class_map: Optional[dict[str,int]] = None
+        self.initial_sequence: Optional[np.ndarray] = None
+        self.sequence_length: Optional[int] = None
+class FinalizeRegression(_FinalizeModelTraining):
+    """Parameters for finalizing a single-target regression model."""
+    def __init__(self,
+                 filename: str,
+                 target_name: str,
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            target_name (str): The name of the target variable.
+        """
+        super().__init__(filename=filename)
+        self.target_name = _validate_string(string=target_name, attribute_name="Target name")
+class FinalizeMultiTargetRegression(_FinalizeModelTraining):
+    """Parameters for finalizing a multi-target regression model."""
+    def __init__(self,
+                 filename: str,
+                 target_names: list[str],
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            target_names (list[str]): A list of names for the target variables.
+        """
+        super().__init__(filename=filename)
+        safe_names = [_validate_string(string=target_name, attribute_name="All target names") for target_name in target_names]
+        self.target_names = safe_names
+class FinalizeBinaryClassification(_FinalizeModelTraining):
+    """Parameters for finalizing a binary classification model."""
+    def __init__(self,
+                 filename: str,
+                 target_name: str,
+                 classification_threshold: float,
+                 class_map: dict[str,int]
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            target_name (str): The name of the target variable.
+            classification_threshold (float): The cutoff threshold for classifying as the positive class.
+            class_map (dict[str,int]): A dictionary mapping class names (str)
+                to their integer representations (e.g., {'cat': 0, 'dog': 1}).
+        """
+        super().__init__(filename=filename)
+        self.target_name = _validate_string(string=target_name, attribute_name="Target name")
+        self.classification_threshold = _validate_threshold(classification_threshold)
+        self.class_map = _validate_class_map(class_map)
+class FinalizeMultiClassClassification(_FinalizeModelTraining):
+    """Parameters for finalizing a multi-class classification model."""
+    def __init__(self,
+                 filename: str,
+                 target_name: str,
+                 class_map: dict[str,int]
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            target_name (str): The name of the target variable.
+            class_map (dict[str,int]): A dictionary mapping class names (str)
+                to their integer representations (e.g., {'cat': 0, 'dog': 1}).
+        """
+        super().__init__(filename=filename)
+        self.target_name = _validate_string(string=target_name, attribute_name="Target name")
+        self.class_map = _validate_class_map(class_map)
+class FinalizeBinaryImageClassification(_FinalizeModelTraining):
+    """Parameters for finalizing a binary image classification model."""
+    def __init__(self,
+                 filename: str,
+                 classification_threshold: float,
+                 class_map: dict[str,int]
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            classification_threshold (float): The cutoff threshold for
+                classifying as the positive class.
+            class_map (dict[str,int]): A dictionary mapping class names (str)
+                to their integer representations (e.g., {'cat': 0, 'dog': 1}).
+        """
+        super().__init__(filename=filename)
+        self.classification_threshold = _validate_threshold(classification_threshold)
+        self.class_map = _validate_class_map(class_map)
+class FinalizeMultiClassImageClassification(_FinalizeModelTraining):
+    """Parameters for finalizing a multi-class image classification model."""
+    def __init__(self,
+                 filename: str,
+                 class_map: dict[str,int]
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            class_map (dict[str,int]): A dictionary mapping class names (str)
+                to their integer representations (e.g., {'cat': 0, 'dog': 1}).
+        """
+        super().__init__(filename=filename)
+        self.class_map = _validate_class_map(class_map)
+class FinalizeMultiLabelBinaryClassification(_FinalizeModelTraining):
+    """Parameters for finalizing a multi-label binary classification model."""
+    def __init__(self,
+                 filename: str,
+                 target_names: list[str],
+                 classification_threshold: float,
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            target_names (list[str]): A list of names for the target variables.
+            classification_threshold (float): The cutoff threshold for classifying as the positive class.
+        """
+        super().__init__(filename=filename)
+        safe_names = [_validate_string(string=target_name, attribute_name="All target names") for target_name in target_names]
+        self.target_names = safe_names
+        self.classification_threshold = _validate_threshold(classification_threshold)
+class FinalizeBinarySegmentation(_FinalizeModelTraining):
+    """Parameters for finalizing a binary segmentation model."""
+    def __init__(self,
+                 filename: str,
+                 class_map: dict[str,int],
+                 classification_threshold: float,
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            classification_threshold (float): The cutoff threshold for classifying as the positive class (mask).
+        """
+        super().__init__(filename=filename)
+        self.classification_threshold = _validate_threshold(classification_threshold)
+        self.class_map = _validate_class_map(class_map)
+class FinalizeMultiClassSegmentation(_FinalizeModelTraining):
+    """Parameters for finalizing a multi-class segmentation model."""
+    def __init__(self,
+                 filename: str,
+                 class_map: dict[str,int]
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+        """
+        super().__init__(filename=filename)
+        self.class_map = _validate_class_map(class_map)
+class FinalizeObjectDetection(_FinalizeModelTraining):
+    """Parameters for finalizing an object detection model."""
+    def __init__(self,
+                 filename: str,
+                 class_map: dict[str,int]
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+        """
+        super().__init__(filename=filename)
+        self.class_map = _validate_class_map(class_map)
+class FinalizeSequencePrediction(_FinalizeModelTraining):
+    """Parameters for finalizing a sequence prediction model."""
+    def __init__(self,
+                 filename: str,
+                 last_training_sequence: np.ndarray,
+                 ) -> None:
+        """Initializes the finalization parameters.
+        Args:
+            filename (str): The name of the file to be saved.
+            last_training_sequence (np.ndarray): The last sequence from the training data, needed to start predictions.
+        """
+        super().__init__(filename=filename)
+        if not isinstance(last_training_sequence, np.ndarray):
+            _LOGGER.error(f"The last training sequence must be a 1D numpy array, got {type(last_training_sequence)}.")
+            raise TypeError()
+        if last_training_sequence.ndim == 1:
+            # It's already 1D, (N,). This is valid.
+            self.initial_sequence = last_training_sequence
+        elif last_training_sequence.ndim == 2:
+            # It's 2D, check for shape (1, N)
+            if last_training_sequence.shape[0] == 1:
+                # Shape is (1, N), flatten to (N,)
+                self.initial_sequence = last_training_sequence.flatten()
+            else:
+                # Shape is (N, 1) or (N, M), which is invalid
+                _LOGGER.error(f"The last training sequence must be a 1D numpy array, got shape {last_training_sequence.shape}.")
+                raise ValueError()
+        else:
+            # It's 3D or more, which is not supported
+            _LOGGER.error(f"The last training sequence must be a 1D numpy array, got shape {last_training_sequence.shape}.")
+            raise ValueError()
+        # Save the length of the validated 1D sequence
+        self.sequence_length = len(self.initial_sequence)
+def _validate_string(string: str, attribute_name: str, extension: Optional[str]=None) -> str:
+    """Helper for finalize classes"""
+    if not isinstance(string, str):
+        _LOGGER.error(f"{attribute_name} must be a string.")
+        raise TypeError()
+    if extension:
+        safe_name = sanitize_filename(string)
+        if not safe_name.endswith(extension):
+            safe_name += extension
+    else:
+        safe_name = string
+    return safe_name
+def _validate_threshold(threshold: float):
+    """Helper for finalize classes"""
+    if not isinstance(threshold, float):
+        _LOGGER.error(f"Classification threshold must be a float.")
+        raise TypeError()
+    elif threshold <= 0.0 or threshold >= 1.0:
+        _LOGGER.error(f"Classification threshold must be in the range [0.1, 0.9]")
+        raise ValueError()
+    return threshold
+def _validate_class_map(map: dict[str,int]):
+    """Helper for finalize classes"""
+    validated_map = None
+    if isinstance(map, dict):
+        if all( [isinstance(key, str) for key in map.keys()] ):
+            if all( [isinstance(val, str) for val in map.values()] ):
+                validated_map = map
+    if validated_map is None:
+        _LOGGER.error(f"Class map must be a dictionary of string keys and integer values.")
+        raise TypeError()
+    else:
+        return validated_map
 def info():
     _script_info(__all__)

dragon-ml-toolbox 14.7.0__py3-none-any.whl → 16.2.0__py3-none-any.whl

dragon-ml-toolbox 14.7.0py3-none-any.whl → 16.2.0py3-none-any.whl