PyPI - dragon-ml-toolbox - Versions diffs - 13.1.0__py3-none-any.whl → 14.3.1__py3-none-any.whl - Mend

dragon-ml-toolbox 13.1.0py3-none-any.whl → 14.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (30) hide show

{dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/METADATA +11 -2
dragon_ml_toolbox-14.3.1.dist-info/RECORD +48 -0
{dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +10 -0
ml_tools/MICE_imputation.py +207 -5
ml_tools/ML_datasetmaster.py +63 -205
ml_tools/ML_evaluation.py +23 -15
ml_tools/ML_evaluation_multi.py +5 -6
ml_tools/ML_inference.py +0 -1
ml_tools/ML_models.py +22 -6
ml_tools/ML_models_advanced.py +323 -0
ml_tools/ML_trainer.py +463 -20
ml_tools/ML_utilities.py +302 -4
ml_tools/ML_vision_datasetmaster.py +1395 -0
ml_tools/ML_vision_evaluation.py +260 -0
ml_tools/ML_vision_inference.py +428 -0
ml_tools/ML_vision_models.py +627 -0
ml_tools/ML_vision_transformers.py +58 -0
ml_tools/_ML_vision_recipe.py +88 -0
ml_tools/__init__.py +1 -0
ml_tools/_schema.py +79 -2
ml_tools/custom_logger.py +37 -14
ml_tools/data_exploration.py +502 -93
ml_tools/keys.py +42 -1
ml_tools/math_utilities.py +1 -1
ml_tools/serde.py +77 -15
ml_tools/utilities.py +192 -3
dragon_ml_toolbox-13.1.0.dist-info/RECORD +0 -41
{dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/top_level.txt +0 -0

ml_tools/ML_datasetmaster.py CHANGED Viewed

@@ -1,13 +1,10 @@
 import torch
-from torch.utils.data import Dataset, Subset
+from torch.utils.data import Dataset
 import pandas
 import numpy
 from sklearn.model_selection import train_test_split
 from typing import Literal, Union, Tuple, List, Optional
 from abc import ABC, abstractmethod
-from PIL import Image, ImageOps
-from torchvision.datasets import ImageFolder
-from torchvision import transforms
 import matplotlib.pyplot as plt
 from pathlib import Path
@@ -23,9 +20,7 @@ from ._schema import FeatureSchema
 __all__ = [
     "DatasetMaker",
     "DatasetMakerMulti",
-    "VisionDatasetMaker",
-    "SequenceMaker",
-    "ResizeAspectFill",
+    "SequenceMaker"
 ]
@@ -126,8 +121,8 @@ class _BaseDatasetMaker(ABC):
         else:
             _LOGGER.info("No continuous features listed in schema. Scaler will not be fitted.")
-        X_train_values = X_train.values
-        X_test_values = X_test.values
+        X_train_values = X_train.to_numpy()
+        X_test_values = X_test.to_numpy()
         # continuous_feature_indices is derived
         if self.scaler is None and continuous_feature_indices:
@@ -253,26 +248,42 @@ class DatasetMaker(_BaseDatasetMaker):
                  pandas_df: pandas.DataFrame,
                  schema: FeatureSchema,
                  kind: Literal["regression", "classification"],
+                 scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
                  test_size: float = 0.2,
-                 random_state: int = 42,
-                 scaler: Optional[PytorchScaler] = None):
+                 random_state: int = 42):
         """
         Args:
             pandas_df (pandas.DataFrame):
                 The pre-processed input DataFrame containing all columns. (features and single target).
             schema (FeatureSchema):
                 The definitive schema object from data_exploration.
-            kind (Literal["regression", "classification"]):
+            kind ("regression" | "classification"):
                 The type of ML task. This determines the data type of the labels.
+            scaler ("fit" | "none" | PytorchScaler):
+                Strategy for data scaling:
+                - "fit": Fit a new PytorchScaler on continuous features.
+                - "none": Do not scale data (e.g., for TabularTransformer).
+                - PytorchScaler instance: Use a pre-fitted scaler to transform data.
             test_size (float):
                 The proportion of the dataset to allocate to the test split.
             random_state (int):
                 The seed for the random number of generator for reproducibility.
-            scaler (PytorchScaler | None):
-                A pre-fitted PytorchScaler instance, if None a new scaler will be created.
         """
         super().__init__()
-        self.scaler = scaler
+        _apply_scaling: bool = False
+        if scaler == "fit":
+            self.scaler = None # To be created
+            _apply_scaling = True
+        elif scaler == "none":
+            self.scaler = None
+        elif isinstance(scaler, PytorchScaler):
+            self.scaler = scaler # Use the provided one
+            _apply_scaling = True
+        else:
+            _LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
+            raise ValueError()
         # --- 1. Identify features (from schema) ---
         self._feature_names = list(schema.feature_names)
@@ -310,9 +321,14 @@ class DatasetMaker(_BaseDatasetMaker):
         label_dtype = torch.float32 if kind == "regression" else torch.int64
         # --- 4. Scale (using the schema) ---
-        X_train_final, X_test_final = self._prepare_scaler(
-            X_train, y_train, X_test, label_dtype, schema
-        )
+        if _apply_scaling:
+            X_train_final, X_test_final = self._prepare_scaler(
+                X_train, y_train, X_test, label_dtype, schema
+            )
+        else:
+            _LOGGER.info("Features have not been scaled as specified.")
+            X_train_final = X_train.to_numpy()
+            X_test_final = X_test.to_numpy()
         # --- 5. Create Datasets ---
         self._train_ds = _PytorchDataset(X_train_final, y_train, labels_dtype=label_dtype, feature_names=self._feature_names, target_names=self._target_names)
@@ -336,9 +352,9 @@ class DatasetMakerMulti(_BaseDatasetMaker):
                  pandas_df: pandas.DataFrame,
                  target_columns: List[str],
                  schema: FeatureSchema,
+                 scaler: Union[Literal["fit"], Literal["none"], PytorchScaler],
                  test_size: float = 0.2,
-                 random_state: int = 42,
-                 scaler: Optional[PytorchScaler] = None):
+                 random_state: int = 42):
         """
         Args:
             pandas_df (pandas.DataFrame):
@@ -348,20 +364,35 @@ class DatasetMakerMulti(_BaseDatasetMaker):
                 List of target column names.
             schema (FeatureSchema):
                 The definitive schema object from data_exploration.
+            scaler ("fit" | "none" | PytorchScaler):
+                Strategy for data scaling:
+                - "fit": Fit a new PytorchScaler on continuous features.
+                - "none": Do not scale data (e.g., for TabularTransformer).
+                - PytorchScaler instance: Use a pre-fitted scaler to transform data.
             test_size (float):
                 The proportion of the dataset to allocate to the test split.
             random_state (int):
                 The seed for the random number generator for reproducibility.
-            scaler (PytorchScaler | None):
-                A pre-fitted PytorchScaler instance.
         ## Note:
         For multi-binary classification, the most common PyTorch loss function is nn.BCEWithLogitsLoss.
         This loss function requires the labels to be torch.float32 which is the same type required for regression (multi-regression) tasks.
         """
         super().__init__()
-        self.scaler = scaler
+        _apply_scaling: bool = False
+        if scaler == "fit":
+            self.scaler = None
+            _apply_scaling = True
+        elif scaler == "none":
+            self.scaler = None
+        elif isinstance(scaler, PytorchScaler):
+            self.scaler = scaler # Use the provided one
+            _apply_scaling = True
+        else:
+            _LOGGER.error(f"Invalid 'scaler' argument. Must be 'fit', 'none', or a PytorchScaler instance.")
+            raise ValueError()
         # --- 1. Get features and targets from schema/args ---
         self._feature_names = list(schema.feature_names)
         self._target_names = target_columns
@@ -403,9 +434,14 @@ class DatasetMakerMulti(_BaseDatasetMaker):
         label_dtype = torch.float32
         # --- 4. Scale (using the schema) ---
-        X_train_final, X_test_final = self._prepare_scaler(
-            X_train, y_train, X_test, label_dtype, schema
-        )
+        if _apply_scaling:
+            X_train_final, X_test_final = self._prepare_scaler(
+                X_train, y_train, X_test, label_dtype, schema
+            )
+        else:
+            _LOGGER.info("Features have not been scaled as specified.")
+            X_train_final = X_train.to_numpy()
+            X_test_final = X_test.to_numpy()
         # --- 5. Create Datasets ---
         # _PytorchDataset now correctly handles y_train (a DataFrame)
@@ -432,149 +468,6 @@ class _BaseMaker(ABC):
         pass
-# --- VisionDatasetMaker ---
-class VisionDatasetMaker(_BaseMaker):
-    """
-    Creates processed PyTorch datasets for computer vision tasks from an
-    image folder directory.
-    Uses online augmentations per epoch (image augmentation without creating new files).
-    """
-    def __init__(self, full_dataset: ImageFolder):
-        super().__init__()
-        self.full_dataset = full_dataset
-        self.labels = [s[1] for s in self.full_dataset.samples]
-        self.class_map = full_dataset.class_to_idx
-        self._is_split = False
-        self._are_transforms_configured = False
-    @classmethod
-    def from_folder(cls, root_dir: str) -> 'VisionDatasetMaker':
-        """Creates a maker instance from a root directory of images."""
-        initial_transform = transforms.Compose([transforms.ToTensor()])
-        full_dataset = ImageFolder(root=root_dir, transform=initial_transform)
-        _LOGGER.info(f"Found {len(full_dataset)} images in {len(full_dataset.classes)} classes.")
-        return cls(full_dataset)
-    @staticmethod
-    def inspect_folder(path: Union[str, Path]):
-        """
-        Logs a report of the types, sizes, and channels of image files
-        found in the directory and its subdirectories.
-        """
-        path_obj = make_fullpath(path)
-        non_image_files = set()
-        img_types = set()
-        img_sizes = set()
-        img_channels = set()
-        img_counter = 0
-        _LOGGER.info(f"Inspecting folder: {path_obj}...")
-        # Use rglob to recursively find all files
-        for filepath in path_obj.rglob('*'):
-            if filepath.is_file():
-                try:
-                    # Using PIL to open is a more reliable check
-                    with Image.open(filepath) as img:
-                        img_types.add(img.format)
-                        img_sizes.add(img.size)
-                        img_channels.update(img.getbands())
-                        img_counter += 1
-                except (IOError, SyntaxError):
-                    non_image_files.add(filepath.name)
-        if non_image_files:
-            _LOGGER.warning(f"Non-image or corrupted files found and ignored: {non_image_files}")
-        report = (
-            f"\n--- Inspection Report for '{path_obj.name}' ---\n"
-            f"Total images found: {img_counter}\n"
-            f"Image formats: {img_types or 'None'}\n"
-            f"Image sizes (WxH): {img_sizes or 'None'}\n"
-            f"Image channels (bands): {img_channels or 'None'}\n"
-            f"--------------------------------------"
-        )
-        print(report)
-    def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
-        """Splits the dataset into training, validation, and optional test sets."""
-        if self._is_split:
-            _LOGGER.warning("Data has already been split.")
-            return self
-        if val_size + test_size >= 1.0:
-            _LOGGER.error("The sum of val_size and test_size must be less than 1.")
-            raise ValueError()
-        indices = list(range(len(self.full_dataset)))
-        labels_for_split = self.labels if stratify else None
-        train_indices, val_test_indices = train_test_split(
-            indices, test_size=(val_size + test_size), random_state=random_state, stratify=labels_for_split
-        )
-        if test_size > 0:
-            val_test_labels = [self.labels[i] for i in val_test_indices]
-            stratify_val_test = val_test_labels if stratify else None
-            val_indices, test_indices = train_test_split(
-                val_test_indices, test_size=(test_size / (val_size + test_size)),
-                random_state=random_state, stratify=stratify_val_test
-            )
-            self._test_dataset = Subset(self.full_dataset, test_indices)
-            _LOGGER.info(f"Test set created with {len(self._test_dataset)} images.")
-        else:
-            val_indices = val_test_indices
-        self._train_dataset = Subset(self.full_dataset, train_indices)
-        self._val_dataset = Subset(self.full_dataset, val_indices)
-        self._is_split = True
-        _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
-        return self
-    def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
-                             mean: List[float] = [0.485, 0.456, 0.406],
-                             std: List[float] = [0.229, 0.224, 0.225],
-                             extra_train_transforms: Optional[List] = None) -> 'VisionDatasetMaker':
-        """Configures and applies the image transformations (augmentations)."""
-        if not self._is_split:
-            _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
-            raise RuntimeError()
-        base_train_transforms = [transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip()]
-        if extra_train_transforms:
-            base_train_transforms.extend(extra_train_transforms)
-        final_transforms = [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]
-        val_transform = transforms.Compose([transforms.Resize(resize_size), transforms.CenterCrop(crop_size), *final_transforms])
-        train_transform = transforms.Compose([*base_train_transforms, *final_transforms])
-        self._train_dataset.dataset.transform = train_transform # type: ignore
-        self._val_dataset.dataset.transform = val_transform # type: ignore
-        if self._test_dataset:
-            self._test_dataset.dataset.transform = val_transform # type: ignore
-        self._are_transforms_configured = True
-        _LOGGER.info("Image transforms configured and applied.")
-        return self
-    def get_datasets(self) -> Tuple[Dataset, ...]:
-        """Returns the final train, validation, and optional test datasets."""
-        if not self._is_split:
-            _LOGGER.error("Data has not been split. Call .split_data() first.")
-            raise RuntimeError()
-        if not self._are_transforms_configured:
-            _LOGGER.warning("Transforms have not been configured. Using default ToTensor only.")
-        if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
 # --- SequenceMaker ---
 class SequenceMaker(_BaseMaker):
     """
@@ -763,40 +656,5 @@ class SequenceMaker(_BaseMaker):
         return self._train_dataset, self._test_dataset
-# --- Custom Vision Transform Class ---
-class ResizeAspectFill:
-    """
-    Custom transformation to make an image square by padding it to match the
-    longest side, preserving the aspect ratio. The image is finally centered.
-    Args:
-        pad_color (Union[str, int]): Color to use for the padding.
-                                     Defaults to "black".
-    """
-    def __init__(self, pad_color: Union[str, int] = "black") -> None:
-        self.pad_color = pad_color
-    def __call__(self, image: Image.Image) -> Image.Image:
-        if not isinstance(image, Image.Image):
-            _LOGGER.error(f"Expected PIL.Image.Image, got {type(image).__name__}")
-            raise TypeError()
-        w, h = image.size
-        if w == h:
-            return image
-        # Determine padding to center the image
-        if w > h:
-            top_padding = (w - h) // 2
-            bottom_padding = w - h - top_padding
-            padding = (0, top_padding, 0, bottom_padding)
-        else: # h > w
-            left_padding = (h - w) // 2
-            right_padding = h - w - left_padding
-            padding = (left_padding, 0, right_padding, 0)
-        return ImageOps.expand(image, padding, fill=self.pad_color)
 def info():
     _script_info(__all__)

ml_tools/ML_evaluation.py CHANGED Viewed

@@ -24,7 +24,7 @@ import warnings
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .keys import SHAPKeys
+from .keys import SHAPKeys, PyTorchLogKeys
 __all__ = [
@@ -44,8 +44,8 @@ def plot_losses(history: dict, save_dir: Union[str, Path]):
         history (dict): A dictionary containing 'train_loss' and 'val_loss'.
         save_dir (str | Path): Directory to save the plot image.
     """
-    train_loss = history.get('train_loss', [])
-    val_loss = history.get('val_loss', [])
+    train_loss = history.get(PyTorchLogKeys.TRAIN_LOSS, [])
+    val_loss = history.get(PyTorchLogKeys.VAL_LOSS, [])
     if not train_loss and not val_loss:
         print("Warning: Loss history is empty or incomplete. Cannot plot.")
@@ -258,7 +258,7 @@ def shap_summary_plot(model,
                       feature_names: Optional[list[str]],
                       save_dir: Union[str, Path],
                       device: torch.device = torch.device('cpu'),
-                      explainer_type: Literal['deep', 'kernel'] = 'deep'):
+                      explainer_type: Literal['deep', 'kernel'] = 'kernel'):
     """
     Calculates SHAP values and saves summary plots and data.
@@ -270,7 +270,7 @@ def shap_summary_plot(model,
         save_dir (str | Path): Directory to save SHAP artifacts.
         device (torch.device): The torch device for SHAP calculations.
         explainer_type (Literal['deep', 'kernel']): The explainer to use.
-            - 'deep': (Default) Uses shap.DeepExplainer. Fast and efficient for
+            - 'deep': Uses shap.DeepExplainer. Fast and efficient for
               PyTorch models.
             - 'kernel': Uses shap.KernelExplainer. Model-agnostic but EXTREMELY
               slow and memory-intensive.
@@ -285,7 +285,7 @@ def shap_summary_plot(model,
     instances_to_explain_np = None
     if explainer_type == 'deep':
-        # --- 1. Use DeepExplainer (Preferred) ---
+        # --- 1. Use DeepExplainer  ---
         # Ensure data is torch.Tensor
         if isinstance(background_data, np.ndarray):
@@ -309,10 +309,9 @@ def shap_summary_plot(model,
         instances_to_explain_np = instances_to_explain.cpu().numpy()
     elif explainer_type == 'kernel':
-        # --- 2. Use KernelExplainer (Slow Fallback) ---
+        # --- 2. Use KernelExplainer ---
         _LOGGER.warning(
-            "Using KernelExplainer. This is memory-intensive and slow. "
-            "Consider reducing 'n_samples' if the process terminates unexpectedly."
+            "KernelExplainer is memory-intensive and slow. Consider reducing the number of instances to explain if the process terminates unexpectedly."
         )
         # Ensure data is np.ndarray
@@ -348,14 +347,26 @@ def shap_summary_plot(model,
     else:
         _LOGGER.error(f"Invalid explainer_type: '{explainer_type}'. Must be 'deep' or 'kernel'.")
         raise ValueError()
+    if not isinstance(shap_values, list) and shap_values.ndim == 3 and shap_values.shape[2] == 1:
+        # _LOGGER.info("Squeezing SHAP values from (N, F, 1) to (N, F) for regression plot.")
+        shap_values = shap_values.squeeze(-1)
     # --- 3. Plotting and Saving ---
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
     plt.ioff()
+    # Convert instances to a DataFrame. robust way to ensure SHAP correctly maps values to feature names.
+    if feature_names is None:
+        # Create generic names if none were provided
+        num_features = instances_to_explain_np.shape[1]
+        feature_names = [f'feature_{i}' for i in range(num_features)]
+    instances_df = pd.DataFrame(instances_to_explain_np, columns=feature_names)
     # Save Bar Plot
     bar_path = save_dir_path / "shap_bar_plot.svg"
-    shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="bar", show=False)
+    shap.summary_plot(shap_values, instances_df, plot_type="bar", show=False)
     ax = plt.gca()
     ax.set_xlabel("SHAP Value Impact", labelpad=10)
     plt.title("SHAP Feature Importance")
@@ -366,7 +377,7 @@ def shap_summary_plot(model,
     # Save Dot Plot
     dot_path = save_dir_path / "shap_dot_plot.svg"
-    shap.summary_plot(shap_values, instances_to_explain_np, feature_names=feature_names, plot_type="dot", show=False)
+    shap.summary_plot(shap_values, instances_df, plot_type="dot", show=False)
     ax = plt.gca()
     ax.set_xlabel("SHAP Value Impact", labelpad=10)
     if plt.gcf().axes and len(plt.gcf().axes) > 1:
@@ -389,9 +400,6 @@ def shap_summary_plot(model,
         mean_abs_shap = np.abs(shap_values).mean(axis=0)
     mean_abs_shap = mean_abs_shap.flatten()
-    if feature_names is None:
-        feature_names = [f'feature_{i}' for i in range(len(mean_abs_shap))]
     summary_df = pd.DataFrame({
         SHAPKeys.FEATURE_COLUMN: feature_names,
@@ -401,7 +409,7 @@ def shap_summary_plot(model,
     summary_df.to_csv(summary_path, index=False)
     _LOGGER.info(f"📝 SHAP summary data saved as '{summary_path.name}'")
-    plt.ion()
+    plt.ion()
 def plot_attention_importance(weights: List[torch.Tensor], feature_names: Optional[List[str]], save_dir: Union[str, Path], top_n: int = 10):

ml_tools/ML_evaluation_multi.py CHANGED Viewed

@@ -235,7 +235,7 @@ def multi_target_shap_summary_plot(
     target_names: List[str],
     save_dir: Union[str, Path],
     device: torch.device = torch.device('cpu'),
-    explainer_type: Literal['deep', 'kernel'] = 'deep'
+    explainer_type: Literal['deep', 'kernel'] = 'kernel'
 ):
     """
     Calculates SHAP values for a multi-target model and saves summary plots and data for each target.
@@ -249,7 +249,7 @@ def multi_target_shap_summary_plot(
         save_dir (str | Path): Directory to save SHAP artifacts.
         device (torch.device): The torch device for SHAP calculations.
         explainer_type (Literal['deep', 'kernel']): The explainer to use.
-            - 'deep': (Default) Uses shap.DeepExplainer. Fast and efficient.
+            - 'deep': Uses shap.DeepExplainer. Fast and efficient.
             - 'kernel': Uses shap.KernelExplainer. Model-agnostic but slow and memory-intensive.
     """
     _LOGGER.info(f"--- Multi-Target SHAP Value Explanation (Using: {explainer_type.upper()}Explainer) ---")
@@ -260,7 +260,7 @@ def multi_target_shap_summary_plot(
     instances_to_explain_np = None
     if explainer_type == 'deep':
-        # --- 1. Use DeepExplainer (Preferred) ---
+        # --- 1. Use DeepExplainer ---
         # Ensure data is torch.Tensor
         if isinstance(background_data, np.ndarray):
@@ -285,10 +285,9 @@ def multi_target_shap_summary_plot(
         instances_to_explain_np = instances_to_explain.cpu().numpy()
     elif explainer_type == 'kernel':
-        # --- 2. Use KernelExplainer (Slow Fallback) ---
+        # --- 2. Use KernelExplainer  ---
         _LOGGER.warning(
-            "Using KernelExplainer. This is memory-intensive and slow. "
-            "Consider reducing 'n_samples' if the process terminates."
+            "KernelExplainer is memory-intensive and slow. Consider reducing the number of instances to explain if the process terminates unexpectedly."
         )
         # Convert all data to numpy

ml_tools/ML_inference.py CHANGED Viewed

@@ -82,7 +82,6 @@ class _BaseInferenceHandler(ABC):
             _LOGGER.warning("CUDA not available, switching to CPU.")
             device_lower = "cpu"
         elif device_lower == "mps" and not torch.backends.mps.is_available():
-            # Your M-series Mac will appreciate this check!
             _LOGGER.warning("Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
             device_lower = "cpu"
         return torch.device(device_lower)

ml_tools/ML_models.py CHANGED Viewed

@@ -306,10 +306,10 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
     def __init__(self, *,
                  schema: FeatureSchema,
                  out_targets: int,
-                 embedding_dim: int = 32,
+                 embedding_dim: int = 256,
                  num_heads: int = 8,
                  num_layers: int = 6,
-                 dropout: float = 0.1):
+                 dropout: float = 0.2):
         """
         Args:
             schema (FeatureSchema):
@@ -317,14 +317,28 @@ class TabularTransformer(nn.Module, _ArchitectureHandlerMixin):
             out_targets (int):
                 Number of output targets (1 for regression).
             embedding_dim (int):
-                The dimension for all feature embeddings. Must be divisible
-                by num_heads.
+                The dimension for all feature embeddings. Must be divisible by num_heads. Common values: (64, 128, 192, 256, etc.)
             num_heads (int):
-                The number of heads in the multi-head attention mechanism.
+                The number of heads in the multi-head attention mechanism. Common values: (4, 8, 16)
             num_layers (int):
-                The number of sub-encoder-layers in the transformer encoder.
+                The number of sub-encoder-layers in the transformer encoder. Common values: (4, 8, 12)
             dropout (float):
                 The dropout value.
+        ## Note:
+        **Embedding Dimension:** "Width" of the model. It's the N-dimension vector that will be used to represent each one of the features.
+            - Each continuous feature gets its own learnable N-dimension vector.
+            - Each categorical feature gets an embedding table that maps every category (e.g., "color=red", "color=blue") to a unique N-dimension vector.
+        **Attention Heads:** Controls the "Multi-Head Attention" mechanism. Instead of looking at all the feature interactions at once, the model splits its attention into N parallel heads.
+            - Embedding Dimensions get divided by the number of Attention Heads, resulting in the dimensions assigned per head.
+        **Number of Layers:** "Depth" of the model. Number of identical `TransformerEncoderLayer` blocks that are stacked on top of each other.
+            - Layer 1: The attention heads find simple, direct interactions between the features.
+            - Layer 2: Takes the output of Layer 1 and finds interactions between those interactions and so on.
+            - Trade-off: More layers are more powerful but are slower to train and more prone to overfitting. If the training loss goes down but the validation loss goes up, you might have too many layers (or need more dropout).
         """
         super().__init__()
@@ -734,5 +748,7 @@ class SequencePredictorLSTM(nn.Module, _ArchitectureHandlerMixin):
         )
+# ---- PyTorch models ---
 def info():
     _script_info(__all__)

dragon-ml-toolbox 13.1.0__py3-none-any.whl → 14.3.1__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 13.1.0py3-none-any.whl → 14.3.1py3-none-any.whl