dragon-ml-toolbox 13.7.0__py3-none-any.whl → 14.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.7.0
3
+ Version: 14.0.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -34,6 +34,7 @@ Requires-Dist: Pillow; extra == "ml"
34
34
  Requires-Dist: evotorch; extra == "ml"
35
35
  Requires-Dist: pyarrow; extra == "ml"
36
36
  Requires-Dist: colorlog; extra == "ml"
37
+ Requires-Dist: torchmetrics; extra == "ml"
37
38
  Provides-Extra: mice
38
39
  Requires-Dist: numpy<2.0; extra == "mice"
39
40
  Requires-Dist: pandas; extra == "mice"
@@ -1,41 +1,48 @@
1
- dragon_ml_toolbox-13.7.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-13.7.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
1
+ dragon_ml_toolbox-14.0.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-14.0.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
3
  ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
4
4
  ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
6
6
  ml_tools/MICE_imputation.py,sha256=KLJXGQLKJ6AuWWttAG-LCCaxpS-ygM4dXPiguHDaL6Y,20815
7
7
  ml_tools/ML_callbacks.py,sha256=elD2Yr030sv_6gX_m9GVd6HTyrbmt34nFS8lrgS4HtM,15808
8
- ml_tools/ML_datasetmaster.py,sha256=6caWbq6eu1RE9V51gmceD71PtMctJRjFuLvkkK5ChiY,36271
9
- ml_tools/ML_evaluation.py,sha256=li77AuP53pCzgrj6p-jTCNtPFgS9Y9XnMWIZn1ulTBM,18946
8
+ ml_tools/ML_datasetmaster.py,sha256=rsJgZEGBJmfeKF6cR8CQZzfEx4T7Y-p1wUnR15_nNw0,28400
9
+ ml_tools/ML_evaluation.py,sha256=4GU86rUWMIGbkXrvN6PyjfGwKtWvXKE7pMlWpWeBq14,18988
10
10
  ml_tools/ML_evaluation_multi.py,sha256=rJKdgtq-9I7oaI7PRzq7aIZ84XdNV0xzlVePZW4nj0k,16095
11
- ml_tools/ML_inference.py,sha256=yq2gdN6s_OUYC5ZLQrIJC5BA5H33q8UKODXwb-_0M2c,23549
12
- ml_tools/ML_models.py,sha256=UVWJHPLVIvFno_csCHH1FwBfTwQ5nX0V8F1TbOByZ4I,31388
11
+ ml_tools/ML_inference.py,sha256=YJ953bhNWsdlPRtJQh3h2ACfMIgp8dQ9KtL9Azar-5s,23489
12
+ ml_tools/ML_models.py,sha256=B_6cUMI0-CnVKcd9BoabvMOG-xIZz-eqP9G9sfOceHc,31434
13
13
  ml_tools/ML_optimization.py,sha256=P0zkhKAwTpkorIBtR0AOIDcyexo5ngmvFUzo3DfNO-E,22692
14
14
  ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
15
- ml_tools/ML_trainer.py,sha256=ZxeOagXW5adFhYIH-oMTlcrLU6VHe4R1EROI7yypNwQ,29665
16
- ml_tools/ML_utilities.py,sha256=EnKpPTnJ2qjZmz7kvows4Uu5CfSA7ByRmI1v2-KarKw,9337
15
+ ml_tools/ML_trainer.py,sha256=ZWI4MbUcLeBxyfoUTL96l5tjHHMp9I64h4SdXnjYmBE,49795
16
+ ml_tools/ML_utilities.py,sha256=z6LbpbZwhn8F__fWlKi-g-cAJQXSxwg1NHfC5FBoAyc,21139
17
+ ml_tools/ML_vision_datasetmaster.py,sha256=tOrdatuq_AP8-GDiTrtARvSJdpc8h7dT-OhDJtRQnsk,54433
18
+ ml_tools/ML_vision_evaluation.py,sha256=t12R7i1RkOCt9zu1_lxSBr8OH6A6Get0k8ftDLctn6I,10486
19
+ ml_tools/ML_vision_inference.py,sha256=He3KV3VJAm8PwO-fOq4b9VO8UXFr-GmpuCnoHXf4VZI,20588
20
+ ml_tools/ML_vision_models.py,sha256=G3S4jB9AE9wMpU9ZygOgOx9q1K6t6LAXBYcJ-U2XQ1M,25600
21
+ ml_tools/ML_vision_transformers.py,sha256=95e0aBkHY5VDGE8i5xy57COU7NvSNIgFknnhBubwE40,1832
17
22
  ml_tools/PSO_optimization.py,sha256=T-HWHMRJUnPvPwixdU5jif3_rnnI36TzcL8u3oSCwuA,22960
18
23
  ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
19
24
  ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
20
25
  ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
26
+ ml_tools/_ML_pytorch_tabular.py,sha256=G9ZkqtjfYe1KAZmkmlL2bCg5s4EIK_wPBBbDmgeEI8k,21753
27
+ ml_tools/_ML_vision_recipe.py,sha256=zrgxFUvTJqQVuwR7jWlbIC2FD29u6eNFPkTRoJ7yEZI,3178
21
28
  ml_tools/__init__.py,sha256=kJiankjz9_qXu7gU92mYqYg_anLvt-B6RtW0mMH8uGo,76
22
29
  ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
23
30
  ml_tools/_schema.py,sha256=yu6aWmn_2Z4_AxAtJGDDCIa96y6JcUp-vgnCS013Qmw,3908
24
31
  ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
25
32
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
26
- ml_tools/custom_logger.py,sha256=7tSAgRL7e-Ekm7rS1FLDocaPLCnaoKc7VSrtfwCtCEg,10067
33
+ ml_tools/custom_logger.py,sha256=TGc0Ww2Xlqj2XE3q4bP43hV7T3qnb5ci9f0pYHXF5TY,11226
27
34
  ml_tools/data_exploration.py,sha256=-BbWO7BBFapPi_7ZuWo65VqguJXaBfgFSptrXyoWrDk,51902
28
35
  ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
29
36
  ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
30
37
  ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
31
38
  ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
32
- ml_tools/keys.py,sha256=oykUVLB4Wos3AZomowjtI8AFFC5xnMUH-icNHydRpOk,2275
39
+ ml_tools/keys.py,sha256=wZOBuEnnHc54vlOZiimnrxfk-sZh6f6suPppJW8rbPQ,3326
33
40
  ml_tools/math_utilities.py,sha256=xeKq1quR_3DYLgowcp4Uam_4s3JltUyOnqMOGuAiYWU,8802
34
41
  ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJRs,12763
35
42
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
36
43
  ml_tools/serde.py,sha256=c8uDYjYry_VrLvoG4ixqDj5pij88lVn6Tu4NHcPkwDU,6943
37
44
  ml_tools/utilities.py,sha256=aWqvYzmxlD74PD5Yqu1VuTekDJeYLQrmPIU_VeVyRp0,22526
38
- dragon_ml_toolbox-13.7.0.dist-info/METADATA,sha256=Rk5n5BbicDim5Qg6AzpyG8MwJAqlu5MSclAiLP_V-Vc,6166
39
- dragon_ml_toolbox-13.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- dragon_ml_toolbox-13.7.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
- dragon_ml_toolbox-13.7.0.dist-info/RECORD,,
45
+ dragon_ml_toolbox-14.0.0.dist-info/METADATA,sha256=2efDLOcX7Wx5p_9LfMVX78CK_CP63GT93vH9rX2fa4Y,6209
46
+ dragon_ml_toolbox-14.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ dragon_ml_toolbox-14.0.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
48
+ dragon_ml_toolbox-14.0.0.dist-info/RECORD,,
@@ -1,13 +1,10 @@
1
1
  import torch
2
- from torch.utils.data import Dataset, Subset
2
+ from torch.utils.data import Dataset
3
3
  import pandas
4
4
  import numpy
5
5
  from sklearn.model_selection import train_test_split
6
6
  from typing import Literal, Union, Tuple, List, Optional
7
7
  from abc import ABC, abstractmethod
8
- from PIL import Image, ImageOps
9
- from torchvision.datasets import ImageFolder
10
- from torchvision import transforms
11
8
  import matplotlib.pyplot as plt
12
9
  from pathlib import Path
13
10
 
@@ -23,9 +20,7 @@ from ._schema import FeatureSchema
23
20
  __all__ = [
24
21
  "DatasetMaker",
25
22
  "DatasetMakerMulti",
26
- "VisionDatasetMaker",
27
- "SequenceMaker",
28
- "ResizeAspectFill",
23
+ "SequenceMaker"
29
24
  ]
30
25
 
31
26
 
@@ -473,149 +468,6 @@ class _BaseMaker(ABC):
473
468
  pass
474
469
 
475
470
 
476
- # --- VisionDatasetMaker ---
477
- class VisionDatasetMaker(_BaseMaker):
478
- """
479
- Creates processed PyTorch datasets for computer vision tasks from an
480
- image folder directory.
481
-
482
- Uses online augmentations per epoch (image augmentation without creating new files).
483
- """
484
- def __init__(self, full_dataset: ImageFolder):
485
- super().__init__()
486
- self.full_dataset = full_dataset
487
- self.labels = [s[1] for s in self.full_dataset.samples]
488
- self.class_map = full_dataset.class_to_idx
489
-
490
- self._is_split = False
491
- self._are_transforms_configured = False
492
-
493
- @classmethod
494
- def from_folder(cls, root_dir: str) -> 'VisionDatasetMaker':
495
- """Creates a maker instance from a root directory of images."""
496
- initial_transform = transforms.Compose([transforms.ToTensor()])
497
- full_dataset = ImageFolder(root=root_dir, transform=initial_transform)
498
- _LOGGER.info(f"Found {len(full_dataset)} images in {len(full_dataset.classes)} classes.")
499
- return cls(full_dataset)
500
-
501
- @staticmethod
502
- def inspect_folder(path: Union[str, Path]):
503
- """
504
- Logs a report of the types, sizes, and channels of image files
505
- found in the directory and its subdirectories.
506
- """
507
- path_obj = make_fullpath(path)
508
-
509
- non_image_files = set()
510
- img_types = set()
511
- img_sizes = set()
512
- img_channels = set()
513
- img_counter = 0
514
-
515
- _LOGGER.info(f"Inspecting folder: {path_obj}...")
516
- # Use rglob to recursively find all files
517
- for filepath in path_obj.rglob('*'):
518
- if filepath.is_file():
519
- try:
520
- # Using PIL to open is a more reliable check
521
- with Image.open(filepath) as img:
522
- img_types.add(img.format)
523
- img_sizes.add(img.size)
524
- img_channels.update(img.getbands())
525
- img_counter += 1
526
- except (IOError, SyntaxError):
527
- non_image_files.add(filepath.name)
528
-
529
- if non_image_files:
530
- _LOGGER.warning(f"Non-image or corrupted files found and ignored: {non_image_files}")
531
-
532
- report = (
533
- f"\n--- Inspection Report for '{path_obj.name}' ---\n"
534
- f"Total images found: {img_counter}\n"
535
- f"Image formats: {img_types or 'None'}\n"
536
- f"Image sizes (WxH): {img_sizes or 'None'}\n"
537
- f"Image channels (bands): {img_channels or 'None'}\n"
538
- f"--------------------------------------"
539
- )
540
- print(report)
541
-
542
- def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
543
- stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
544
- """Splits the dataset into training, validation, and optional test sets."""
545
- if self._is_split:
546
- _LOGGER.warning("Data has already been split.")
547
- return self
548
-
549
- if val_size + test_size >= 1.0:
550
- _LOGGER.error("The sum of val_size and test_size must be less than 1.")
551
- raise ValueError()
552
-
553
- indices = list(range(len(self.full_dataset)))
554
- labels_for_split = self.labels if stratify else None
555
-
556
- train_indices, val_test_indices = train_test_split(
557
- indices, test_size=(val_size + test_size), random_state=random_state, stratify=labels_for_split
558
- )
559
-
560
- if test_size > 0:
561
- val_test_labels = [self.labels[i] for i in val_test_indices]
562
- stratify_val_test = val_test_labels if stratify else None
563
- val_indices, test_indices = train_test_split(
564
- val_test_indices, test_size=(test_size / (val_size + test_size)),
565
- random_state=random_state, stratify=stratify_val_test
566
- )
567
- self._test_dataset = Subset(self.full_dataset, test_indices)
568
- _LOGGER.info(f"Test set created with {len(self._test_dataset)} images.")
569
- else:
570
- val_indices = val_test_indices
571
-
572
- self._train_dataset = Subset(self.full_dataset, train_indices)
573
- self._val_dataset = Subset(self.full_dataset, val_indices)
574
- self._is_split = True
575
-
576
- _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
577
- return self
578
-
579
- def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
580
- mean: List[float] = [0.485, 0.456, 0.406],
581
- std: List[float] = [0.229, 0.224, 0.225],
582
- extra_train_transforms: Optional[List] = None) -> 'VisionDatasetMaker':
583
- """Configures and applies the image transformations (augmentations)."""
584
- if not self._is_split:
585
- _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
586
- raise RuntimeError()
587
-
588
- base_train_transforms = [transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip()]
589
- if extra_train_transforms:
590
- base_train_transforms.extend(extra_train_transforms)
591
-
592
- final_transforms = [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]
593
-
594
- val_transform = transforms.Compose([transforms.Resize(resize_size), transforms.CenterCrop(crop_size), *final_transforms])
595
- train_transform = transforms.Compose([*base_train_transforms, *final_transforms])
596
-
597
- self._train_dataset.dataset.transform = train_transform # type: ignore
598
- self._val_dataset.dataset.transform = val_transform # type: ignore
599
- if self._test_dataset:
600
- self._test_dataset.dataset.transform = val_transform # type: ignore
601
-
602
- self._are_transforms_configured = True
603
- _LOGGER.info("Image transforms configured and applied.")
604
- return self
605
-
606
- def get_datasets(self) -> Tuple[Dataset, ...]:
607
- """Returns the final train, validation, and optional test datasets."""
608
- if not self._is_split:
609
- _LOGGER.error("Data has not been split. Call .split_data() first.")
610
- raise RuntimeError()
611
- if not self._are_transforms_configured:
612
- _LOGGER.warning("Transforms have not been configured. Using default ToTensor only.")
613
-
614
- if self._test_dataset:
615
- return self._train_dataset, self._val_dataset, self._test_dataset
616
- return self._train_dataset, self._val_dataset
617
-
618
-
619
471
  # --- SequenceMaker ---
620
472
  class SequenceMaker(_BaseMaker):
621
473
  """
@@ -804,40 +656,5 @@ class SequenceMaker(_BaseMaker):
804
656
  return self._train_dataset, self._test_dataset
805
657
 
806
658
 
807
- # --- Custom Vision Transform Class ---
808
- class ResizeAspectFill:
809
- """
810
- Custom transformation to make an image square by padding it to match the
811
- longest side, preserving the aspect ratio. The image is finally centered.
812
-
813
- Args:
814
- pad_color (Union[str, int]): Color to use for the padding.
815
- Defaults to "black".
816
- """
817
- def __init__(self, pad_color: Union[str, int] = "black") -> None:
818
- self.pad_color = pad_color
819
-
820
- def __call__(self, image: Image.Image) -> Image.Image:
821
- if not isinstance(image, Image.Image):
822
- _LOGGER.error(f"Expected PIL.Image.Image, got {type(image).__name__}")
823
- raise TypeError()
824
-
825
- w, h = image.size
826
- if w == h:
827
- return image
828
-
829
- # Determine padding to center the image
830
- if w > h:
831
- top_padding = (w - h) // 2
832
- bottom_padding = w - h - top_padding
833
- padding = (0, top_padding, 0, bottom_padding)
834
- else: # h > w
835
- left_padding = (h - w) // 2
836
- right_padding = h - w - left_padding
837
- padding = (left_padding, 0, right_padding, 0)
838
-
839
- return ImageOps.expand(image, padding, fill=self.pad_color)
840
-
841
-
842
659
  def info():
843
660
  _script_info(__all__)
ml_tools/ML_evaluation.py CHANGED
@@ -24,7 +24,7 @@ import warnings
24
24
  from .path_manager import make_fullpath
25
25
  from ._logger import _LOGGER
26
26
  from ._script_info import _script_info
27
- from .keys import SHAPKeys
27
+ from .keys import SHAPKeys, PyTorchLogKeys
28
28
 
29
29
 
30
30
  __all__ = [
@@ -44,8 +44,8 @@ def plot_losses(history: dict, save_dir: Union[str, Path]):
44
44
  history (dict): A dictionary containing 'train_loss' and 'val_loss'.
45
45
  save_dir (str | Path): Directory to save the plot image.
46
46
  """
47
- train_loss = history.get('train_loss', [])
48
- val_loss = history.get('val_loss', [])
47
+ train_loss = history.get(PyTorchLogKeys.TRAIN_LOSS, [])
48
+ val_loss = history.get(PyTorchLogKeys.VAL_LOSS, [])
49
49
 
50
50
  if not train_loss and not val_loss:
51
51
  print("Warning: Loss history is empty or incomplete. Cannot plot.")
ml_tools/ML_inference.py CHANGED
@@ -82,7 +82,6 @@ class _BaseInferenceHandler(ABC):
82
82
  _LOGGER.warning("CUDA not available, switching to CPU.")
83
83
  device_lower = "cpu"
84
84
  elif device_lower == "mps" and not torch.backends.mps.is_available():
85
- # Your M-series Mac will appreciate this check!
86
85
  _LOGGER.warning("Apple Metal Performance Shaders (MPS) not available, switching to CPU.")
87
86
  device_lower = "cpu"
88
87
  return torch.device(device_lower)
ml_tools/ML_models.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import torch
2
2
  from torch import nn
3
- from typing import List, Union, Tuple, Dict, Any
3
+ from typing import List, Union, Tuple, Dict, Any, Literal, Optional
4
4
  from pathlib import Path
5
5
  import json
6
6
 
@@ -748,5 +748,7 @@ class SequencePredictorLSTM(nn.Module, _ArchitectureHandlerMixin):
748
748
  )
749
749
 
750
750
 
751
+ # ---- PyTorch models ---
752
+
751
753
  def info():
752
754
  _script_info(__all__)