PyPI - dragon-ml-toolbox - Versions diffs - 13.3.0__py3-none-any.whl → 16.2.0__py3-none-any.whl - Mend

dragon-ml-toolbox 13.3.0py3-none-any.whl → 16.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/METADATA +20 -6
dragon_ml_toolbox-16.2.0.dist-info/RECORD +51 -0
{dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +10 -0
ml_tools/ETL_cleaning.py +20 -20
ml_tools/ETL_engineering.py +23 -25
ml_tools/GUI_tools.py +20 -20
ml_tools/MICE_imputation.py +207 -5
ml_tools/ML_callbacks.py +43 -26
ml_tools/ML_configuration.py +788 -0
ml_tools/ML_datasetmaster.py +303 -448
ml_tools/ML_evaluation.py +351 -93
ml_tools/ML_evaluation_multi.py +139 -42
ml_tools/ML_inference.py +290 -209
ml_tools/ML_models.py +33 -106
ml_tools/ML_models_advanced.py +323 -0
ml_tools/ML_optimization.py +12 -12
ml_tools/ML_scaler.py +11 -11
ml_tools/ML_sequence_datasetmaster.py +341 -0
ml_tools/ML_sequence_evaluation.py +219 -0
ml_tools/ML_sequence_inference.py +391 -0
ml_tools/ML_sequence_models.py +139 -0
ml_tools/ML_trainer.py +1604 -179
ml_tools/ML_utilities.py +351 -4
ml_tools/ML_vision_datasetmaster.py +1540 -0
ml_tools/ML_vision_evaluation.py +284 -0
ml_tools/ML_vision_inference.py +405 -0
ml_tools/ML_vision_models.py +641 -0
ml_tools/ML_vision_transformers.py +284 -0
ml_tools/PSO_optimization.py +6 -6
ml_tools/SQL.py +4 -4
ml_tools/_keys.py +171 -0
ml_tools/_schema.py +1 -1
ml_tools/custom_logger.py +37 -14
ml_tools/data_exploration.py +502 -93
ml_tools/ensemble_evaluation.py +54 -11
ml_tools/ensemble_inference.py +7 -33
ml_tools/ensemble_learning.py +1 -1
ml_tools/math_utilities.py +1 -1
ml_tools/optimization_tools.py +2 -2
ml_tools/path_manager.py +5 -5
ml_tools/serde.py +2 -2
ml_tools/utilities.py +192 -4
dragon_ml_toolbox-13.3.0.dist-info/RECORD +0 -41
ml_tools/RNN_forecast.py +0 -56
ml_tools/keys.py +0 -87
{dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-13.3.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_scaler.py CHANGED Viewed

@@ -9,11 +9,11 @@ from .path_manager import make_fullpath
 __all__ = [
-    "PytorchScaler"
+    "DragonScaler"
 ]
-class PytorchScaler:
+class DragonScaler:
     """
     Standardizes continuous features in a PyTorch dataset by subtracting the
     mean and dividing by the standard deviation.
@@ -38,7 +38,7 @@ class PytorchScaler:
         self.continuous_feature_indices = continuous_feature_indices
     @classmethod
-    def fit(cls, dataset: Dataset, continuous_feature_indices: List[int], batch_size: int = 64) -> 'PytorchScaler':
+    def fit(cls, dataset: Dataset, continuous_feature_indices: List[int], batch_size: int = 64) -> 'DragonScaler':
         """
         Fits the scaler by computing the mean and std dev from a dataset using a
         fast, single-pass, vectorized algorithm.
@@ -50,7 +50,7 @@ class PytorchScaler:
             batch_size (int): The batch size for iterating through the dataset.
         Returns:
-            PytorchScaler: A new, fitted instance of the scaler.
+            DragonScaler: A new, fitted instance of the scaler.
         """
         if not continuous_feature_indices:
             _LOGGER.error("No continuous feature indices provided. Scaler will not be fitted.")
@@ -167,10 +167,10 @@ class PytorchScaler:
         }
         torch.save(state, path_obj)
         if verbose:
-            _LOGGER.info(f"PytorchScaler state saved as '{path_obj.name}'.")
+            _LOGGER.info(f"DragonScaler state saved as '{path_obj.name}'.")
     @staticmethod
-    def load(filepath: Union[str, Path], verbose: bool=True) -> 'PytorchScaler':
+    def load(filepath: Union[str, Path], verbose: bool=True) -> 'DragonScaler':
         """
         Loads a scaler's state from a .pth file.
@@ -178,13 +178,13 @@ class PytorchScaler:
             filepath (str | Path): The path to the saved scaler file.
         Returns:
-            PytorchScaler: An instance of the scaler with the loaded state.
+            DragonScaler: An instance of the scaler with the loaded state.
         """
         path_obj = make_fullpath(filepath, enforce="file")
         state = torch.load(path_obj)
         if verbose:
-            _LOGGER.info(f"PytorchScaler state loaded from '{path_obj.name}'.")
-        return PytorchScaler(
+            _LOGGER.info(f"DragonScaler state loaded from '{path_obj.name}'.")
+        return DragonScaler(
             mean=state['mean'],
             std=state['std'],
             continuous_feature_indices=state['continuous_feature_indices']
@@ -194,8 +194,8 @@ class PytorchScaler:
         """Returns the developer-friendly string representation of the scaler."""
         if self.continuous_feature_indices:
             num_features = len(self.continuous_feature_indices)
-            return f"PytorchScaler(fitted for {num_features} features)"
-        return "PytorchScaler(not fitted)"
+            return f"DragonScaler(fitted for {num_features} features)"
+        return "DragonScaler(not fitted)"
 def info():

ml_tools/ML_sequence_datasetmaster.py ADDED Viewed

@@ -0,0 +1,341 @@
+import torch
+from torch.utils.data import Dataset
+import pandas
+import numpy
+from typing import Literal, Union, Tuple
+import matplotlib.pyplot as plt
+from pathlib import Path
+from .path_manager import make_fullpath
+from ._logger import _LOGGER
+from ._script_info import _script_info
+from .ML_scaler import DragonScaler
+from .ML_datasetmaster import _PytorchDataset
+from ._keys import DatasetKeys, MLTaskKeys
+__all__ = [
+    "DragonDatasetSequence"
+]
+# --- SequenceMaker ---
+class DragonDatasetSequence:
+    """
+    Creates windowed PyTorch datasets from a univariate (one feature) sequential data.
+    Automatic Pipeline:
+    1. Split Data: Separate data into training, validation, and testing portions.
+    2. Normalize Data: Normalize the data. The scaler will be fitted on the training portion.
+    3. Generate Windows: Create the windowed sequences from the normalized splits.
+    """
+    def __init__(self,
+                 prediction_mode: Literal["sequence-to-sequence", "sequence-to-value"],
+                 data: Union[pandas.DataFrame, pandas.Series, numpy.ndarray],
+                 sequence_length: int,
+                 validation_size: float = 0.2,
+                 test_size: float = 0.1):
+        """
+        Initializes the dataset manager and automatically processes the data.
+        The constructor runs the full pipeline:
+        1. Splits the data chronologically (train, validation, test).
+        2. Fits a DragonScaler on the training split.
+        3. Normalizes all splits using the fitted scaler.
+        4. Generates windowed datasets for training, validation, and testing.
+        Args:
+            prediction_mode: The type of sequence task.
+            data: The input univariate time-series data.
+                - If pandas.DataFrame: The index is used for the time axis
+                  and the *first column* is used as the sequence.
+                - If pandas.Series: The index is used for the time axis.
+                - If numpy.ndarray: A simple integer range is used for the time axis.
+            sequence_length (int): The number of time steps in each input window (X).
+            validation_size (float): The fraction of data to hold out for validation.
+            test_size (float): The fraction of data to hold out for testing.
+        """
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
+        self.sequence_length = sequence_length
+        self.scaler = None
+        if not prediction_mode in [MLTaskKeys.SEQUENCE_SEQUENCE, MLTaskKeys.SEQUENCE_VALUE]:
+            _LOGGER.error(f"Unrecognized prediction mode: '{prediction_mode}'.")
+            raise ValueError()
+        else:
+            self.prediction_mode = prediction_mode
+        if isinstance(data, pandas.DataFrame):
+            self.time_axis = data.index.values
+            self.sequence = data.iloc[:, 0].values.astype(numpy.float32)
+        elif isinstance(data, pandas.Series):
+            self.time_axis = data.index.values
+            self.sequence = data.values.astype(numpy.float32)
+        elif isinstance(data, numpy.ndarray):
+            self.time_axis = numpy.arange(len(data))
+            self.sequence = data.astype(numpy.float32)
+        else:
+            _LOGGER.error("Data must be a pandas DataFrame/Series or a numpy array.")
+            raise TypeError()
+        self.train_sequence = None
+        self.val_sequence = None
+        self.test_sequence = None
+        self.train_time_axis = None
+        self.val_time_axis = None
+        self.test_time_axis = None
+        self._is_split = False
+        self._is_normalized = False
+        self._are_windows_generated = False
+        # Automation
+        self._split_data(validation_size=validation_size, test_size=test_size)
+        self._normalize_data()
+        self._generate_windows()
+    def _split_data(self, validation_size: float = 0.2, test_size: float = 0.1) -> None:
+        """
+        Splits the sequence chronologically into training, validation, and testing portions.
+        To prevent windowing errors, the validation and test sets include an overlap of `sequence_length` from the preceding data.
+        """
+        if self._is_split:
+            _LOGGER.warning("Data has already been split.")
+            return
+        if (validation_size + test_size) >= 1.0:
+            _LOGGER.error(f"The sum of validation_size ({validation_size}) and test_size ({test_size}) must be less than 1.0.")
+            raise ValueError("validation_size and test_size sum must be < 1.0")
+        total_size = len(self.sequence)
+        # Calculate split indices
+        test_split_idx = int(total_size * (1 - test_size))
+        val_split_idx = int(total_size * (1 - test_size - validation_size))
+        # --- Create sequences ---
+        # Train sequence is from the beginning to the validation index
+        self.train_sequence = self.sequence[:val_split_idx]
+        # Validation sequence starts `sequence_length` before its split index for windowing
+        self.val_sequence = self.sequence[val_split_idx - self.sequence_length : test_split_idx]
+        # Test sequence starts `sequence_length` before its split index for windowing
+        self.test_sequence = self.sequence[test_split_idx - self.sequence_length:]
+        # --- Create time axes ---
+        self.train_time_axis = self.time_axis[:val_split_idx]
+        # The "plottable" validation/test time axes start from their respective split indices
+        self.val_time_axis = self.time_axis[val_split_idx : test_split_idx]
+        self.test_time_axis = self.time_axis[test_split_idx:]
+        self._is_split = True
+        _LOGGER.info(f"Sequence split into training ({len(self.train_sequence)}), validation ({len(self.val_sequence)}), and testing ({len(self.test_sequence)}) points.")
+    def _normalize_data(self) -> None:
+        """
+        Normalizes the sequence data using DragonScaler. Must be called AFTER splitting to prevent data leakage from the test set.
+        """
+        if not self._is_split:
+            _LOGGER.error("Data must be split BEFORE normalizing.")
+            raise RuntimeError()
+        if self.scaler:
+            _LOGGER.warning("Data has already been normalized.")
+            return
+        # 1. DragonScaler requires a Dataset to fit. Create a temporary one.
+        # The scaler expects 2D data [n_samples, n_features].
+        train_features = self.train_sequence.reshape(-1, 1) # type: ignore
+        # _PytorchDataset needs labels, so we create dummy ones.
+        dummy_labels = numpy.zeros(len(train_features))
+        temp_train_ds = _PytorchDataset(train_features, dummy_labels, labels_dtype=torch.float32)
+        # 2. Fit the DragonScaler on the temporary training dataset.
+        # The sequence is a single feature, so its index is [0].
+        _LOGGER.info("Fitting DragonScaler on the training data...")
+        self.scaler = DragonScaler.fit(temp_train_ds, continuous_feature_indices=[0])
+        # 3. Transform sequences using the fitted scaler.
+        # The transform method requires a tensor, so we convert, transform, and convert back.
+        train_tensor = torch.tensor(self.train_sequence.reshape(-1, 1), dtype=torch.float32) # type: ignore
+        val_tensor = torch.tensor(self.val_sequence.reshape(-1, 1), dtype=torch.float32) # type: ignore
+        test_tensor = torch.tensor(self.test_sequence.reshape(-1, 1), dtype=torch.float32) # type: ignore
+        self.train_sequence = self.scaler.transform(train_tensor).numpy().flatten()
+        self.val_sequence = self.scaler.transform(val_tensor).numpy().flatten()
+        self.test_sequence = self.scaler.transform(test_tensor).numpy().flatten()
+        self._is_normalized = True
+        _LOGGER.info("Sequence data normalized using DragonScaler.")
+    def _generate_windows(self) -> None:
+        """
+        Generates overlapping windows for features and labels.
+        """
+        if not self._is_split:
+            _LOGGER.error("Cannot generate windows before splitting data.")
+            raise RuntimeError()
+        if not self._is_normalized:
+            _LOGGER.error("Cannot generate windows before normalizing data.")
+            raise RuntimeError()
+        if self._are_windows_generated:
+            _LOGGER.warning("Windows have already been generated.")
+            return
+        self._train_dataset = self._create_windowed_dataset(self.train_sequence) # type: ignore
+        self._val_dataset = self._create_windowed_dataset(self.val_sequence) # type: ignore
+        self._test_dataset = self._create_windowed_dataset(self.test_sequence) # type: ignore
+        self._are_windows_generated = True
+        _LOGGER.info("Feature and label windows generated for train, validation, and test sets.")
+    def _create_windowed_dataset(self, data: numpy.ndarray) -> Dataset:
+        """Efficiently creates windowed features and labels using numpy."""
+        if len(data) <= self.sequence_length:
+            # Validation/Test sets of size 0 might be passed
+            _LOGGER.warning(f"Data length ({len(data)}) is not greater than sequence_length ({self.sequence_length}). Cannot create windows. Returning empty dataset.")
+            return _PytorchDataset(numpy.array([]), numpy.array([]), labels_dtype=torch.float32)
+        if self.prediction_mode == MLTaskKeys.SEQUENCE_VALUE:
+            # sequence-to-value
+            features = data[:-1]
+            labels = data[self.sequence_length:]
+            n_windows = len(features) - self.sequence_length + 1
+            bytes_per_item = features.strides[0]
+            strided_features = numpy.lib.stride_tricks.as_strided(
+                features, shape=(n_windows, self.sequence_length), strides=(bytes_per_item, bytes_per_item)
+            )
+            # Ensure labels align with the end of each feature window
+            aligned_labels = labels[:n_windows]
+            return _PytorchDataset(strided_features, aligned_labels, labels_dtype=torch.float32)
+        else:
+            # Sequence-to-sequence
+            x_data = data[:-1]
+            y_data = data[1:]
+            n_windows = len(x_data) - self.sequence_length + 1
+            bytes_per_item = x_data.strides[0]
+            strided_x = numpy.lib.stride_tricks.as_strided(x_data, shape=(n_windows, self.sequence_length), strides=(bytes_per_item, bytes_per_item))
+            strided_y = numpy.lib.stride_tricks.as_strided(y_data, shape=(n_windows, self.sequence_length), strides=(bytes_per_item, bytes_per_item))
+            return _PytorchDataset(strided_x, strided_y, labels_dtype=torch.float32)
+    def plot_splits(self, save_dir: Union[str, Path]):
+        """Plots the training, validation and testing data."""
+        if not self._is_split:
+            _LOGGER.error("Cannot plot before splitting data.")
+            raise RuntimeError()
+        if self.scaler is None:
+            _LOGGER.error("Cannot plot: data has not been normalized, or scaler is missing.")
+            return
+        save_path = make_fullpath(save_dir, make=True, enforce="directory")
+        full_path = save_path / "SequenceSplits.svg"
+        plt.figure(figsize=(15, 6))
+        plt.title("Sequential Data")
+        plt.grid(True)
+        plt.xlabel("Sequence")
+        plt.ylabel("Value")
+        # Plot denormalized training data
+        plt.plot(self.train_time_axis, self.scaler.inverse_transform(self.train_sequence.reshape(-1, 1)), label='Train Data') # type: ignore
+        # Plot denormalized validation data
+        # We must skip the overlapping 'sequence_length' part for plotting
+        val_plot_data = self.val_sequence[self.sequence_length:] # type: ignore
+        plt.plot(self.val_time_axis, self.scaler.inverse_transform(val_plot_data.reshape(-1, 1)), label='Validation Data', c='orange') # type: ignore
+        # Plot denormalized test data
+        # We must skip the overlapping 'sequence_length' part for plotting
+        test_plot_data = self.test_sequence[self.sequence_length:] # type: ignore
+        plt.plot(self.test_time_axis, self.scaler.inverse_transform(test_plot_data.reshape(-1, 1)), label='Test Data', c='green') # type: ignore
+        plt.legend()
+        plt.tight_layout()
+        plt.savefig(full_path)
+        _LOGGER.info(f"📈 Sequence data splits saved as '{full_path.name}'.")
+        plt.close()
+    def get_datasets(self) -> Tuple[Dataset, Dataset, Dataset]:
+        """Returns the final train, validation, and test datasets."""
+        if not self._are_windows_generated:
+            _LOGGER.error("Windows have not been generated. Call .generate_windows() first.")
+            raise RuntimeError()
+        return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+    def save_scaler(self, directory: Union[str, Path], verbose: bool=True) -> None:
+        """
+        Saves the fitted DragonScaler's state to a .pth file.
+        Args:
+            directory (str | Path): The directory where the scaler will be saved.
+        """
+        if not self.scaler:
+            _LOGGER.error("No scaler was fitted or provided.")
+            raise RuntimeError()
+        save_path = make_fullpath(directory, make=True, enforce="directory")
+        filename = f"{DatasetKeys.SCALER_PREFIX}{self.prediction_mode}.pth"
+        filepath = save_path / filename
+        self.scaler.save(filepath, verbose=False)
+        if verbose:
+            _LOGGER.info(f"Scaler saved as '{filepath.name}'.")
+    def get_last_training_sequence(self) -> numpy.ndarray:
+        """
+        Returns the final, un-scaled sequence from the training data.
+        """
+        if not self._is_split:
+            _LOGGER.error("Data has not been split. Cannot get last training sequence.")
+            raise RuntimeError()
+        # The length of train_time_axis is our validation split index
+        val_split_idx = len(self.train_time_axis) # type: ignore
+        if val_split_idx < self.sequence_length:
+            _LOGGER.error(f"Training data length ({val_split_idx}) is less than sequence_length ({self.sequence_length}).")
+            raise ValueError()
+        # Get the slice from the *original* sequence
+        start_idx = val_split_idx - self.sequence_length
+        end_idx = val_split_idx
+        return self.sequence[start_idx:end_idx]
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Prediction Mode: {self.prediction_mode}\n"
+        s += f"  Sequence Length (Window): {self.sequence_length}\n"
+        s += f"  Total Data Points: {len(self.sequence)}\n"
+        s += "  --- Status ---\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Normalized: {self._is_normalized}\n"
+        s += f"  Windows Generated: {self._are_windows_generated}\n"
+        if self._are_windows_generated:
+            train_len = len(self._train_dataset) if self._train_dataset else 0 # type: ignore
+            val_len = len(self._val_dataset) if self._val_dataset else 0 # type: ignore
+            test_len = len(self._test_dataset) if self._test_dataset else 0 # type: ignore
+            s += f"  Datasets (Train | Validation | Test): {train_len} | {val_len} | {test_len} windows\n"
+        return s
+def info():
+    _script_info(__all__)

ml_tools/ML_sequence_evaluation.py ADDED Viewed

@@ -0,0 +1,219 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import (
+    mean_squared_error,
+    mean_absolute_error,
+    r2_score,
+    median_absolute_error,
+)
+from pathlib import Path
+from typing import Union, Optional
+from .path_manager import make_fullpath
+from ._logger import _LOGGER
+from ._script_info import _script_info
+from .ML_configuration import SequenceValueMetricsFormat, SequenceSequenceMetricsFormat, _BaseSequenceValueFormat, _BaseSequenceSequenceFormat
+__all__ = [
+    "sequence_to_value_metrics",
+    "sequence_to_sequence_metrics"
+]
+DPI_value = 250
+def sequence_to_value_metrics(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    save_dir: Union[str, Path],
+    config: Optional[SequenceValueMetricsFormat] = None
+):
+    """
+    Saves regression metrics and plots for sequence-to-value (many-to-one) tasks.
+    Args:
+        y_true (np.ndarray): Ground truth values (1D array).
+        y_pred (np.ndarray): Predicted values (1D array).
+        save_dir (str | Path): Directory to save plots and report.
+        config (object): Formatting configuration object.
+    """
+    # --- Ensure 1D input ---
+    if y_true.ndim > 1: y_true = y_true.flatten()
+    if y_pred.ndim > 1: y_pred = y_pred.flatten()
+    # --- Parse Config or use defaults ---
+    if config is None:
+        # Create a default config if one wasn't provided
+        format_config = _BaseSequenceValueFormat()
+    else:
+        format_config = config
+    # --- Set Matplotlib font size ---
+    original_rc_params = plt.rcParams.copy()
+    plt.rcParams.update({'font.size': format_config.font_size})
+    # --- Calculate Metrics ---
+    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
+    mae = mean_absolute_error(y_true, y_pred)
+    r2 = r2_score(y_true, y_pred)
+    medae = median_absolute_error(y_true, y_pred)
+    report_lines = [
+        "--- Sequence-to-Value Regression Report ---",
+        f"  Root Mean Squared Error (RMSE): {rmse:.4f}",
+        f"  Mean Absolute Error (MAE):      {mae:.4f}",
+        f"  Median Absolute Error (MedAE):  {medae:.4f}",
+        f"  Coefficient of Determination (R²): {r2:.4f}"
+    ]
+    report_string = "\n".join(report_lines)
+    save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    # Save text report
+    report_path = save_dir_path / "sequence_to_value_report.txt"
+    report_path.write_text(report_string)
+    _LOGGER.info(f"📝 Seq-to-Value report saved as '{report_path.name}'")
+    # --- Save residual plot ---
+    residuals = y_true - y_pred
+    fig_res, ax_res = plt.subplots(figsize=(8, 6), dpi=DPI_value)
+    ax_res.scatter(y_pred, residuals,
+                   alpha=format_config.scatter_alpha,
+                   color=format_config.scatter_color)
+    ax_res.axhline(0, color=format_config.residual_line_color, linestyle='--')
+    ax_res.set_xlabel("Predicted Values")
+    ax_res.set_ylabel("Residuals")
+    ax_res.set_title("Sequence-to-Value Residual Plot")
+    ax_res.grid(True)
+    plt.tight_layout()
+    res_path = save_dir_path / "sequence_to_value_residual_plot.svg"
+    plt.savefig(res_path)
+    _LOGGER.info(f"📈 Seq-to-Value residual plot saved as '{res_path.name}'")
+    plt.close(fig_res)
+    # --- Save true vs predicted plot ---
+    fig_tvp, ax_tvp = plt.subplots(figsize=(8, 6), dpi=DPI_value)
+    ax_tvp.scatter(y_true, y_pred,
+                   alpha=format_config.scatter_alpha,
+                   color=format_config.scatter_color)
+    ax_tvp.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()],
+                linestyle='--',
+                lw=2,
+                color=format_config.ideal_line_color)
+    ax_tvp.set_xlabel('True Values')
+    ax_tvp.set_ylabel('Predictions')
+    ax_tvp.set_title('Sequence-to-Value: True vs. Predicted')
+    ax_tvp.grid(True)
+    plt.tight_layout()
+    tvp_path = save_dir_path / "sequence_to_value_true_vs_predicted_plot.svg"
+    plt.savefig(tvp_path)
+    _LOGGER.info(f"📉 Seq-to-Value True vs. Predicted plot saved as '{tvp_path.name}'")
+    plt.close(fig_tvp)
+    # --- Restore RC params ---
+    plt.rcParams.update(original_rc_params)
+def sequence_to_sequence_metrics(
+    y_true: np.ndarray,
+    y_pred: np.ndarray,
+    save_dir: Union[str, Path],
+    config: Optional[SequenceSequenceMetricsFormat] = None
+):
+    """
+    Saves per-step regression metrics for sequence-to-sequence (many-to-many) tasks.
+    Args:
+        y_true (np.ndarray): Ground truth sequences (n_samples, sequence_length).
+        y_pred (np.ndarray): Predicted sequences (n_samples, sequence_length).
+        save_dir (str | Path): Directory to save plots and report.
+        config (object): Formatting configuration object.
+    """
+    if y_true.ndim != 2 or y_pred.ndim != 2:
+        _LOGGER.error(f"Input arrays must be 2D (n_samples, sequence_length). Got y_true: {y_true.shape}, y_pred: {y_pred.shape}")
+        raise ValueError("Invalid input dimensions for sequence-to-sequence metrics.")
+    if y_true.shape != y_pred.shape:
+        _LOGGER.error(f"Input shapes do not match. Got y_true: {y_true.shape}, y_pred: {y_pred.shape}")
+        raise ValueError("Mismatched input shapes.")
+    # --- Parse Config or use defaults ---
+    if config is None:
+        format_config = _BaseSequenceSequenceFormat()
+    else:
+        format_config = config
+    # --- Set Matplotlib font size ---
+    original_rc_params = plt.rcParams.copy()
+    plt.rcParams.update({'font.size': format_config.font_size})
+    sequence_length = y_true.shape[1]
+    steps = list(range(1, sequence_length + 1))
+    per_step_rmse = []
+    per_step_mae = []
+    # --- Calculate metrics for each time step ---
+    for i in range(sequence_length):
+        y_true_step = y_true[:, i]
+        y_pred_step = y_pred[:, i]
+        rmse = np.sqrt(mean_squared_error(y_true_step, y_pred_step))
+        mae = mean_absolute_error(y_true_step, y_pred_step)
+        per_step_rmse.append(rmse)
+        per_step_mae.append(mae)
+    # --- Create and save DataFrame ---
+    report_df = pd.DataFrame({
+        "step": steps,
+        "rmse": per_step_rmse,
+        "mae": per_step_mae
+    })
+    save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    report_path = save_dir_path / "sequence_to_sequence_report.csv"
+    report_df.to_csv(report_path, index=False)
+    _LOGGER.info(f"📝 Seq-to-Seq per-step report saved as '{report_path.name}'")
+    # --- Create and save plot ---
+    fig, ax1 = plt.subplots(figsize=format_config.plot_figsize, dpi=DPI_value)
+    # Plot RMSE
+    color_rmse = format_config.rmse_color
+    ax1.set_xlabel('Prediction Step')
+    ax1.set_ylabel('RMSE', color=color_rmse)
+    ax1.plot(steps, per_step_rmse, format_config.rmse_marker, color=color_rmse, label='RMSE')
+    ax1.tick_params(axis='y', labelcolor=color_rmse)
+    ax1.grid(True, linestyle=format_config.grid_style)
+    # Create a second y-axis for MAE
+    ax2 = ax1.twinx()
+    color_mae = format_config.mae_color
+    ax2.set_ylabel('MAE', color=color_mae)
+    ax2.plot(steps, per_step_mae, format_config.mae_marker, color=color_mae, label='MAE')
+    ax2.tick_params(axis='y', labelcolor=color_mae)
+    fig.suptitle('Sequence-to-Sequence Metrics (Per-Step)')
+    # Add a single legend
+    lines, labels = ax1.get_legend_handles_labels()
+    lines2, labels2 = ax2.get_legend_handles_labels()
+    ax2.legend(lines + lines2, labels + labels2, loc='best')
+    fig.tight_layout(rect=(0, 0.03, 1, 0.95)) # Adjust for suptitle
+    plot_path = save_dir_path / "sequence_to_sequence_metrics_plot.svg"
+    plt.savefig(plot_path)
+    _LOGGER.info(f"📈 Seq-to-Seq per-step metrics plot saved as '{plot_path.name}'")
+    plt.close(fig)
+    # --- Restore RC params ---
+    plt.rcParams.update(original_rc_params)
+def info():
+    _script_info(__all__)

dragon-ml-toolbox 13.3.0__py3-none-any.whl → 16.2.0__py3-none-any.whl

dragon-ml-toolbox 13.3.0py3-none-any.whl → 16.2.0py3-none-any.whl