PyPI - dragon-ml-toolbox - Versions diffs - 13.7.0__py3-none-any.whl → 14.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 13.7.0py3-none-any.whl → 14.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (21) hide show

{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/METADATA +2 -1
{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/RECORD +21 -14
ml_tools/ML_datasetmaster.py +2 -185
ml_tools/ML_evaluation.py +3 -3
ml_tools/ML_inference.py +0 -1
ml_tools/ML_models.py +3 -1
ml_tools/ML_trainer.py +446 -11
ml_tools/ML_utilities.py +302 -4
ml_tools/ML_vision_datasetmaster.py +1315 -0
ml_tools/ML_vision_evaluation.py +260 -0
ml_tools/ML_vision_inference.py +428 -0
ml_tools/ML_vision_models.py +627 -0
ml_tools/ML_vision_transformers.py +58 -0
ml_tools/_ML_pytorch_tabular.py +543 -0
ml_tools/_ML_vision_recipe.py +88 -0
ml_tools/custom_logger.py +37 -14
ml_tools/keys.py +38 -0
{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/top_level.txt +0 -0

{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 13.7.0
+Version: 14.0.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT
@@ -34,6 +34,7 @@ Requires-Dist: Pillow; extra == "ml"
 Requires-Dist: evotorch; extra == "ml"
 Requires-Dist: pyarrow; extra == "ml"
 Requires-Dist: colorlog; extra == "ml"
+Requires-Dist: torchmetrics; extra == "ml"
 Provides-Extra: mice
 Requires-Dist: numpy<2.0; extra == "mice"
 Requires-Dist: pandas; extra == "mice"

{dragon_ml_toolbox-13.7.0.dist-info → dragon_ml_toolbox-14.0.0.dist-info}/RECORD RENAMED Viewed

@@ -1,41 +1,48 @@
-dragon_ml_toolbox-13.7.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
-dragon_ml_toolbox-13.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
+dragon_ml_toolbox-14.0.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
+dragon_ml_toolbox-14.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
 ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
 ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
 ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
 ml_tools/MICE_imputation.py,sha256=KLJXGQLKJ6AuWWttAG-LCCaxpS-ygM4dXPiguHDaL6Y,20815
 ml_tools/ML_callbacks.py,sha256=elD2Yr030sv_6gX_m9GVd6HTyrbmt34nFS8lrgS4HtM,15808
-ml_tools/ML_datasetmaster.py,sha256=6caWbq6eu1RE9V51gmceD71PtMctJRjFuLvkkK5ChiY,36271
-ml_tools/ML_evaluation.py,sha256=li77AuP53pCzgrj6p-jTCNtPFgS9Y9XnMWIZn1ulTBM,18946
+ml_tools/ML_datasetmaster.py,sha256=rsJgZEGBJmfeKF6cR8CQZzfEx4T7Y-p1wUnR15_nNw0,28400
+ml_tools/ML_evaluation.py,sha256=4GU86rUWMIGbkXrvN6PyjfGwKtWvXKE7pMlWpWeBq14,18988
 ml_tools/ML_evaluation_multi.py,sha256=rJKdgtq-9I7oaI7PRzq7aIZ84XdNV0xzlVePZW4nj0k,16095
-ml_tools/ML_inference.py,sha256=yq2gdN6s_OUYC5ZLQrIJC5BA5H33q8UKODXwb-_0M2c,23549
-ml_tools/ML_models.py,sha256=UVWJHPLVIvFno_csCHH1FwBfTwQ5nX0V8F1TbOByZ4I,31388
+ml_tools/ML_inference.py,sha256=YJ953bhNWsdlPRtJQh3h2ACfMIgp8dQ9KtL9Azar-5s,23489
+ml_tools/ML_models.py,sha256=B_6cUMI0-CnVKcd9BoabvMOG-xIZz-eqP9G9sfOceHc,31434
 ml_tools/ML_optimization.py,sha256=P0zkhKAwTpkorIBtR0AOIDcyexo5ngmvFUzo3DfNO-E,22692
 ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
-ml_tools/ML_trainer.py,sha256=ZxeOagXW5adFhYIH-oMTlcrLU6VHe4R1EROI7yypNwQ,29665
-ml_tools/ML_utilities.py,sha256=EnKpPTnJ2qjZmz7kvows4Uu5CfSA7ByRmI1v2-KarKw,9337
+ml_tools/ML_trainer.py,sha256=ZWI4MbUcLeBxyfoUTL96l5tjHHMp9I64h4SdXnjYmBE,49795
+ml_tools/ML_utilities.py,sha256=z6LbpbZwhn8F__fWlKi-g-cAJQXSxwg1NHfC5FBoAyc,21139
+ml_tools/ML_vision_datasetmaster.py,sha256=tOrdatuq_AP8-GDiTrtARvSJdpc8h7dT-OhDJtRQnsk,54433
+ml_tools/ML_vision_evaluation.py,sha256=t12R7i1RkOCt9zu1_lxSBr8OH6A6Get0k8ftDLctn6I,10486
+ml_tools/ML_vision_inference.py,sha256=He3KV3VJAm8PwO-fOq4b9VO8UXFr-GmpuCnoHXf4VZI,20588
+ml_tools/ML_vision_models.py,sha256=G3S4jB9AE9wMpU9ZygOgOx9q1K6t6LAXBYcJ-U2XQ1M,25600
+ml_tools/ML_vision_transformers.py,sha256=95e0aBkHY5VDGE8i5xy57COU7NvSNIgFknnhBubwE40,1832
 ml_tools/PSO_optimization.py,sha256=T-HWHMRJUnPvPwixdU5jif3_rnnI36TzcL8u3oSCwuA,22960
 ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
 ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
 ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
+ml_tools/_ML_pytorch_tabular.py,sha256=G9ZkqtjfYe1KAZmkmlL2bCg5s4EIK_wPBBbDmgeEI8k,21753
+ml_tools/_ML_vision_recipe.py,sha256=zrgxFUvTJqQVuwR7jWlbIC2FD29u6eNFPkTRoJ7yEZI,3178
 ml_tools/__init__.py,sha256=kJiankjz9_qXu7gU92mYqYg_anLvt-B6RtW0mMH8uGo,76
 ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
 ml_tools/_schema.py,sha256=yu6aWmn_2Z4_AxAtJGDDCIa96y6JcUp-vgnCS013Qmw,3908
 ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
 ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
-ml_tools/custom_logger.py,sha256=7tSAgRL7e-Ekm7rS1FLDocaPLCnaoKc7VSrtfwCtCEg,10067
+ml_tools/custom_logger.py,sha256=TGc0Ww2Xlqj2XE3q4bP43hV7T3qnb5ci9f0pYHXF5TY,11226
 ml_tools/data_exploration.py,sha256=-BbWO7BBFapPi_7ZuWo65VqguJXaBfgFSptrXyoWrDk,51902
 ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
 ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
 ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
 ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
-ml_tools/keys.py,sha256=oykUVLB4Wos3AZomowjtI8AFFC5xnMUH-icNHydRpOk,2275
+ml_tools/keys.py,sha256=wZOBuEnnHc54vlOZiimnrxfk-sZh6f6suPppJW8rbPQ,3326
 ml_tools/math_utilities.py,sha256=xeKq1quR_3DYLgowcp4Uam_4s3JltUyOnqMOGuAiYWU,8802
 ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJRs,12763
 ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
 ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
 ml_tools/utilities.py,sha256=aWqvYzmxlD74PD5Yqu1VuTekDJeYLQrmPIU_VeVyRp0,22526
-dragon_ml_toolbox-13.7.0.dist-info/METADATA,sha256=Rk5n5BbicDim5Qg6AzpyG8MwJAqlu5MSclAiLP_V-Vc,6166
-dragon_ml_toolbox-13.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-13.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-13.7.0.dist-info/RECORD,,
+dragon_ml_toolbox-14.0.0.dist-info/METADATA,sha256=2efDLOcX7Wx5p_9LfMVX78CK_CP63GT93vH9rX2fa4Y,6209
+dragon_ml_toolbox-14.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-14.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-14.0.0.dist-info/RECORD,,

ml_tools/ML_datasetmaster.py CHANGED Viewed

@@ -1,13 +1,10 @@
 import torch
-from torch.utils.data import Dataset, Subset
+from torch.utils.data import Dataset
 import pandas
 import numpy
 from sklearn.model_selection import train_test_split
 from typing import Literal, Union, Tuple, List, Optional
 from abc import ABC, abstractmethod
-from PIL import Image, ImageOps
-from torchvision.datasets import ImageFolder
-from torchvision import transforms
 import matplotlib.pyplot as plt
 from pathlib import Path
@@ -23,9 +20,7 @@ from ._schema import FeatureSchema
 __all__ = [
     "DatasetMaker",
     "DatasetMakerMulti",
-    "VisionDatasetMaker",
-    "SequenceMaker",
-    "ResizeAspectFill",
+    "SequenceMaker"
 ]
@@ -473,149 +468,6 @@ class _BaseMaker(ABC):
         pass
-# --- VisionDatasetMaker ---
-class VisionDatasetMaker(_BaseMaker):
-    """
-    Creates processed PyTorch datasets for computer vision tasks from an
-    image folder directory.
-    Uses online augmentations per epoch (image augmentation without creating new files).
-    """
-    def __init__(self, full_dataset: ImageFolder):
-        super().__init__()
-        self.full_dataset = full_dataset
-        self.labels = [s[1] for s in self.full_dataset.samples]
-        self.class_map = full_dataset.class_to_idx
-        self._is_split = False
-        self._are_transforms_configured = False
-    @classmethod
-    def from_folder(cls, root_dir: str) -> 'VisionDatasetMaker':
-        """Creates a maker instance from a root directory of images."""
-        initial_transform = transforms.Compose([transforms.ToTensor()])
-        full_dataset = ImageFolder(root=root_dir, transform=initial_transform)
-        _LOGGER.info(f"Found {len(full_dataset)} images in {len(full_dataset.classes)} classes.")
-        return cls(full_dataset)
-    @staticmethod
-    def inspect_folder(path: Union[str, Path]):
-        """
-        Logs a report of the types, sizes, and channels of image files
-        found in the directory and its subdirectories.
-        """
-        path_obj = make_fullpath(path)
-        non_image_files = set()
-        img_types = set()
-        img_sizes = set()
-        img_channels = set()
-        img_counter = 0
-        _LOGGER.info(f"Inspecting folder: {path_obj}...")
-        # Use rglob to recursively find all files
-        for filepath in path_obj.rglob('*'):
-            if filepath.is_file():
-                try:
-                    # Using PIL to open is a more reliable check
-                    with Image.open(filepath) as img:
-                        img_types.add(img.format)
-                        img_sizes.add(img.size)
-                        img_channels.update(img.getbands())
-                        img_counter += 1
-                except (IOError, SyntaxError):
-                    non_image_files.add(filepath.name)
-        if non_image_files:
-            _LOGGER.warning(f"Non-image or corrupted files found and ignored: {non_image_files}")
-        report = (
-            f"\n--- Inspection Report for '{path_obj.name}' ---\n"
-            f"Total images found: {img_counter}\n"
-            f"Image formats: {img_types or 'None'}\n"
-            f"Image sizes (WxH): {img_sizes or 'None'}\n"
-            f"Image channels (bands): {img_channels or 'None'}\n"
-            f"--------------------------------------"
-        )
-        print(report)
-    def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
-        """Splits the dataset into training, validation, and optional test sets."""
-        if self._is_split:
-            _LOGGER.warning("Data has already been split.")
-            return self
-        if val_size + test_size >= 1.0:
-            _LOGGER.error("The sum of val_size and test_size must be less than 1.")
-            raise ValueError()
-        indices = list(range(len(self.full_dataset)))
-        labels_for_split = self.labels if stratify else None
-        train_indices, val_test_indices = train_test_split(
-            indices, test_size=(val_size + test_size), random_state=random_state, stratify=labels_for_split
-        )
-        if test_size > 0:
-            val_test_labels = [self.labels[i] for i in val_test_indices]
-            stratify_val_test = val_test_labels if stratify else None
-            val_indices, test_indices = train_test_split(
-                val_test_indices, test_size=(test_size / (val_size + test_size)),
-                random_state=random_state, stratify=stratify_val_test
-            )
-            self._test_dataset = Subset(self.full_dataset, test_indices)
-            _LOGGER.info(f"Test set created with {len(self._test_dataset)} images.")
-        else:
-            val_indices = val_test_indices
-        self._train_dataset = Subset(self.full_dataset, train_indices)
-        self._val_dataset = Subset(self.full_dataset, val_indices)
-        self._is_split = True
-        _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
-        return self
-    def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
-                             mean: List[float] = [0.485, 0.456, 0.406],
-                             std: List[float] = [0.229, 0.224, 0.225],
-                             extra_train_transforms: Optional[List] = None) -> 'VisionDatasetMaker':
-        """Configures and applies the image transformations (augmentations)."""
-        if not self._is_split:
-            _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
-            raise RuntimeError()
-        base_train_transforms = [transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip()]
-        if extra_train_transforms:
-            base_train_transforms.extend(extra_train_transforms)
-        final_transforms = [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]
-        val_transform = transforms.Compose([transforms.Resize(resize_size), transforms.CenterCrop(crop_size), *final_transforms])
-        train_transform = transforms.Compose([*base_train_transforms, *final_transforms])
-        self._train_dataset.dataset.transform = train_transform # type: ignore
-        self._val_dataset.dataset.transform = val_transform # type: ignore
-        if self._test_dataset:
-            self._test_dataset.dataset.transform = val_transform # type: ignore
-        self._are_transforms_configured = True
-        _LOGGER.info("Image transforms configured and applied.")
-        return self
-    def get_datasets(self) -> Tuple[Dataset, ...]:
-        """Returns the final train, validation, and optional test datasets."""
-        if not self._is_split:
-            _LOGGER.error("Data has not been split. Call .split_data() first.")
-            raise RuntimeError()
-        if not self._are_transforms_configured:
-            _LOGGER.warning("Transforms have not been configured. Using default ToTensor only.")
-        if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
 # --- SequenceMaker ---
 class SequenceMaker(_BaseMaker):
     """
@@ -804,40 +656,5 @@ class SequenceMaker(_BaseMaker):
         return self._train_dataset, self._test_dataset
-# --- Custom Vision Transform Class ---
-class ResizeAspectFill:
-    """
-    Custom transformation to make an image square by padding it to match the
-    longest side, preserving the aspect ratio. The image is finally centered.
-    Args:
-        pad_color (Union[str, int]): Color to use for the padding.
-                                     Defaults to "black".
-    """
-    def __init__(self, pad_color: Union[str, int] = "black") -> None:
-        self.pad_color = pad_color
-    def __call__(self, image: Image.Image) -> Image.Image:
-        if not isinstance(image, Image.Image):
-            _LOGGER.error(f"Expected PIL.Image.Image, got {type(image).__name__}")
-            raise TypeError()
-        w, h = image.size
-        if w == h:
-            return image
-        # Determine padding to center the image
-        if w > h:
-            top_padding = (w - h) // 2
-            bottom_padding = w - h - top_padding
-            padding = (0, top_padding, 0, bottom_padding)
-        else: # h > w
-            left_padding = (h - w) // 2
-            right_padding = h - w - left_padding
-            padding = (left_padding, 0, right_padding, 0)
-        return ImageOps.expand(image, padding, fill=self.pad_color)
 def info():
     _script_info(__all__)

ml_tools/ML_evaluation.py CHANGED Viewed

@@ -24,7 +24,7 @@ import warnings
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .keys import SHAPKeys
+from .keys import SHAPKeys, PyTorchLogKeys
 __all__ = [
@@ -44,8 +44,8 @@ def plot_losses(history: dict, save_dir: Union[str, Path]):
         history (dict): A dictionary containing 'train_loss' and 'val_loss'.
         save_dir (str | Path): Directory to save the plot image.
     """
-    train_loss = history.get('train_loss', [])
-    val_loss = history.get('val_loss', [])
+    train_loss = history.get(PyTorchLogKeys.TRAIN_LOSS, [])
+    val_loss = history.get(PyTorchLogKeys.VAL_LOSS, [])
     if not train_loss and not val_loss:
         print("Warning: Loss history is empty or incomplete. Cannot plot.")

ml_tools/ML_inference.py CHANGED Viewed

@@ -82,7 +82,6 @@ class _BaseInferenceHandler(ABC):
             _LOGGER.warning("CUDA not available, switching to CPU.")
             device_lower = "cpu"
         elif device_lower == "mps" and not torch.backends.mps.is_available():
-            # Your M-series Mac will appreciate this check!
             _LOGGER.warning("Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
             device_lower = "cpu"
         return torch.device(device_lower)

ml_tools/ML_models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 from torch import nn
-from typing import List, Union, Tuple, Dict, Any
+from typing import List, Union, Tuple, Dict, Any, Literal, Optional
 from pathlib import Path
 import json
@@ -748,5 +748,7 @@ class SequencePredictorLSTM(nn.Module, _ArchitectureHandlerMixin):
         )
+# ---- PyTorch models ---
 def info():
     _script_info(__all__)

dragon-ml-toolbox 13.7.0__py3-none-any.whl → 14.0.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 13.7.0py3-none-any.whl → 14.0.0py3-none-any.whl