PyPI - dragon-ml-toolbox - Versions diffs - 14.3.0__py3-none-any.whl → 14.8.0__py3-none-any.whl - Mend

dragon-ml-toolbox 14.3.0py3-none-any.whl → 14.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (17) hide show

{dragon_ml_toolbox-14.3.0.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/METADATA +2 -1
{dragon_ml_toolbox-14.3.0.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/RECORD +17 -16
ml_tools/ML_configuration.py +116 -0
ml_tools/ML_datasetmaster.py +42 -0
ml_tools/ML_evaluation.py +208 -63
ml_tools/ML_evaluation_multi.py +40 -10
ml_tools/ML_trainer.py +38 -12
ml_tools/ML_utilities.py +50 -1
ml_tools/ML_vision_datasetmaster.py +247 -66
ml_tools/ML_vision_models.py +15 -1
ml_tools/ML_vision_transformers.py +151 -6
ml_tools/ensemble_evaluation.py +53 -10
ml_tools/keys.py +2 -1
{dragon_ml_toolbox-14.3.0.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.3.0.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.3.0.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.3.0.dist-info → dragon_ml_toolbox-14.8.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_vision_datasetmaster.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torchvision.transforms.functional as TF
 from pathlib import Path
 import random
 import json
+import inspect
 from .ML_datasetmaster import _BaseMaker
 from .path_manager import make_fullpath
@@ -272,8 +273,8 @@ class VisionDatasetMaker(_BaseMaker):
                                for validation/testing.
             crop_size (int): The target size (square) for the final
                              cropped image.
-            mean (List[float]): The mean values for normalization (e.g., ImageNet mean).
-            std (List[float]): The standard deviation values for normalization (e.g., ImageNet std).
+            mean (List[float] | None): The mean values for normalization (e.g., ImageNet mean).
+            std (List[float] | None): The standard deviation values for normalization (e.g., ImageNet std).
             extra_train_transforms (List[Callable] | None): A list of additional torchvision transforms to add to the end of the training transformations.
             pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
@@ -411,16 +412,58 @@ class VisionDatasetMaker(_BaseMaker):
         # validate path
         file_path = make_fullpath(filepath, make=True, enforce="file")
-        # 1. Handle pre_transforms
+        # Handle pre_transforms
         for t in components[VisionTransformRecipeKeys.PRE_TRANSFORMS]:
             t_name = t.__class__.__name__
+            t_class = t.__class__
+            kwargs = {}
+            # 1. Check custom registry first
             if t_name in TRANSFORM_REGISTRY:
-                recipe[VisionTransformRecipeKeys.PIPELINE].append({
-                    VisionTransformRecipeKeys.NAME: t_name,
-                    VisionTransformRecipeKeys.KWARGS: getattr(t, VisionTransformRecipeKeys.KWARGS, {})
-                })
+                _LOGGER.debug(f"Found '{t_name}' in TRANSFORM_REGISTRY.")
+                kwargs = getattr(t, VisionTransformRecipeKeys.KWARGS, {})
+            # 2. Else, try to introspect for standard torchvision transforms
             else:
-                _LOGGER.warning(f"Skipping unknown pre_transform '{t_name}' in recipe. Not in TRANSFORM_REGISTRY.")
+                _LOGGER.debug(f"'{t_name}' not in registry. Attempting introspection...")
+                try:
+                    # Get the __init__ signature of the transform's class
+                    sig = inspect.signature(t_class.__init__)
+                    # Iterate over its __init__ parameters (e.g., 'num_output_channels')
+                    for param in sig.parameters.values():
+                        if param.name == 'self':
+                            continue
+                        # Check if the *instance* 't' has that parameter as an attribute
+                        attr_name_public = param.name
+                        attr_name_private = '_' + param.name
+                        attr_to_get = ""
+                        if hasattr(t, attr_name_public):
+                            attr_to_get = attr_name_public
+                        elif hasattr(t, attr_name_private):
+                            attr_to_get = attr_name_private
+                        else:
+                            # Parameter in __init__ has no matching attribute
+                            continue
+                        # Store the value under the __init__ parameter's name
+                        kwargs[param.name] = getattr(t, attr_to_get)
+                    _LOGGER.debug(f"Introspection for '{t_name}' found kwargs: {kwargs}")
+                except (ValueError, TypeError):
+                    # Fails on some built-ins or C-implemented __init__
+                    _LOGGER.warning(f"Could not introspect parameters for '{t_name}'. If this transform has parameters, they will not be saved.")
+                    kwargs = {}
+            # 3. Add to pipeline
+            recipe[VisionTransformRecipeKeys.PIPELINE].append({
+                VisionTransformRecipeKeys.NAME: t_name,
+                VisionTransformRecipeKeys.KWARGS: kwargs
+            })
         # 2. Add standard transforms
         recipe[VisionTransformRecipeKeys.PIPELINE].extend([
@@ -456,6 +499,39 @@ class VisionDatasetMaker(_BaseMaker):
         return self.class_map
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images\n"
+        elif self._full_dataset:
+            return f"Full Dataset: {len(self._full_dataset)} images\n"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found\n"
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Transforms Configured: {self._are_transforms_configured}\n"
+        if self.class_map:
+            s += f"  Classes: {len(self.class_map)}\n"
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
+        elif self._full_dataset:
+            s += f"  Full Dataset Size: {len(self._full_dataset)} images\n"
+        return s
 class _DatasetTransformer(Dataset):
     """
@@ -643,6 +719,7 @@ class SegmentationDatasetMaker(_BaseMaker):
         self._are_transforms_configured = False
         self.train_transform: Optional[Callable] = None
         self.val_transform: Optional[Callable] = None
+        self._has_mean_std: bool = False
     @classmethod
     def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'SegmentationDatasetMaker':
@@ -806,8 +883,8 @@ class SegmentationDatasetMaker(_BaseMaker):
     def configure_transforms(self,
                              resize_size: int = 256,
                              crop_size: int = 224,
-                             mean: List[float] = [0.485, 0.456, 0.406],
-                             std: List[float] = [0.229, 0.224, 0.225]) -> 'SegmentationDatasetMaker':
+                             mean: Optional[List[float]] = [0.485, 0.456, 0.406],
+                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'SegmentationDatasetMaker':
         """
         Configures and applies the image and mask transformations.
@@ -818,8 +895,8 @@ class SegmentationDatasetMaker(_BaseMaker):
                                for validation/testing.
             crop_size (int): The target size (square) for the final
                              cropped image.
-            mean (List[float]): The mean values for image normalization.
-            std (List[float]): The std dev values for image normalization.
+            mean (List[float] | None): The mean values for image normalization.
+            std (List[float] | None): The std dev values for image normalization.
         Returns:
             SegmentationDatasetMaker: The same instance, with transforms applied.
@@ -828,29 +905,50 @@ class SegmentationDatasetMaker(_BaseMaker):
             _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
             raise RuntimeError()
+        if (mean is None and std is not None) or (mean is not None and std is None):
+            _LOGGER.error(f"'mean' and 'std' must be both None or both defined, but only one was provided.")
+            raise ValueError()
         # --- Store components for validation recipe ---
-        self.val_recipe_components = {
+        self.val_recipe_components: dict[str,Any] = {
             VisionTransformRecipeKeys.RESIZE_SIZE: resize_size,
             VisionTransformRecipeKeys.CROP_SIZE: crop_size,
-            VisionTransformRecipeKeys.MEAN: mean,
-            VisionTransformRecipeKeys.STD: std
         }
+        if mean is not None and std is not None:
+            self.val_recipe_components.update({
+                VisionTransformRecipeKeys.MEAN: mean,
+                VisionTransformRecipeKeys.STD: std
+            })
+            self._has_mean_std = True
         # --- Validation/Test Pipeline (Deterministic) ---
-        self.val_transform = _PairedCompose([
-            _PairedResize(resize_size),
-            _PairedCenterCrop(crop_size),
-            _PairedToTensor(),
-            _PairedNormalize(mean, std)
-        ])
-        # --- Training Pipeline (Augmentation) ---
-        self.train_transform = _PairedCompose([
-            _PairedRandomResizedCrop(crop_size),
-            _PairedRandomHorizontalFlip(p=0.5),
-            _PairedToTensor(),
-            _PairedNormalize(mean, std)
-        ])
+        if self._has_mean_std:
+            self.val_transform = _PairedCompose([
+                _PairedResize(resize_size),
+                _PairedCenterCrop(crop_size),
+                _PairedToTensor(),
+                _PairedNormalize(mean, std) # type: ignore
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _PairedCompose([
+                _PairedRandomResizedCrop(crop_size),
+                _PairedRandomHorizontalFlip(p=0.5),
+                _PairedToTensor(),
+                _PairedNormalize(mean, std) # type: ignore
+            ])
+        else:
+            self.val_transform = _PairedCompose([
+                _PairedResize(resize_size),
+                _PairedCenterCrop(crop_size),
+                _PairedToTensor()
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _PairedCompose([
+                _PairedRandomResizedCrop(crop_size),
+                _PairedRandomHorizontalFlip(p=0.5),
+                _PairedToTensor()
+            ])
         # --- Apply Transforms to the Datasets ---
         self._train_dataset.transform = self.train_transform # type: ignore
@@ -903,23 +1001,57 @@ class SegmentationDatasetMaker(_BaseMaker):
         # validate path
         file_path = make_fullpath(filepath, make=True, enforce="file")
         # Add standard transforms
         recipe: Dict[str, Any] = {
             VisionTransformRecipeKeys.TASK: "segmentation",
             VisionTransformRecipeKeys.PIPELINE: [
-                {VisionTransformRecipeKeys.NAME: "Resize", "kwargs": {"size": components["resize_size"]}},
-                {VisionTransformRecipeKeys.NAME: "CenterCrop", "kwargs": {"size": components["crop_size"]}},
-                {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}},
-                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
-                    "mean": components["mean"],
-                    "std": components["std"]
-                }}
+                {VisionTransformRecipeKeys.NAME: "Resize", "kwargs": {"size": components[VisionTransformRecipeKeys.RESIZE_SIZE]}},
+                {VisionTransformRecipeKeys.NAME: "CenterCrop", "kwargs": {"size": components[VisionTransformRecipeKeys.CROP_SIZE]}},
+                {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}}
             ]
         }
+        if self._has_mean_std:
+            recipe[VisionTransformRecipeKeys.PIPELINE].append(
+                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
+                    "mean": components[VisionTransformRecipeKeys.MEAN],
+                    "std": components[VisionTransformRecipeKeys.STD]
+                }}
+            )
         # Save the file
         save_recipe(recipe, file_path)
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images\n"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found\n"
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Total Image-Mask Pairs: {len(self.image_paths)}\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Transforms Configured: {self._are_transforms_configured}\n"
+        if self.class_map:
+            s += f"  Classes: {list(self.class_map.keys())}\n"
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
+        return s
 # Object detection
@@ -1071,6 +1203,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         self.train_transform: Optional[Callable] = None
         self.val_transform: Optional[Callable] = None
         self._val_recipe_components: Optional[Dict[str, Any]] = None
+        self._has_mean_std: bool = False
     @classmethod
     def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'ObjectDetectionDatasetMaker':
@@ -1230,8 +1363,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         return self
     def configure_transforms(self,
-                             mean: List[float] = [0.485, 0.456, 0.406],
-                             std: List[float] = [0.229, 0.224, 0.225]) -> 'ObjectDetectionDatasetMaker':
+                             mean: Optional[List[float]] = [0.485, 0.456, 0.406],
+                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'ObjectDetectionDatasetMaker':
         """
         Configures and applies the image and target transformations.
@@ -1242,8 +1375,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         Transforms are limited to augmentation (flip), ToTensor, and Normalize.
         Args:
-            mean (List[float]): The mean values for image normalization.
-            std (List[float]): The std dev values for image normalization.
+            mean (List[float] | None): The mean values for image normalization.
+            std (List[float] | None): The std dev values for image normalization.
         Returns:
             ObjectDetectionDatasetMaker: The same instance, with transforms applied.
@@ -1252,24 +1385,42 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
             raise RuntimeError()
-        # --- Store components for validation recipe ---
-        self._val_recipe_components = {
-            VisionTransformRecipeKeys.MEAN: mean,
-            VisionTransformRecipeKeys.STD: std
-        }
-        # --- Validation/Test Pipeline (Deterministic) ---
-        self.val_transform = _OD_PairedCompose([
-            _OD_PairedToTensor(),
-            _OD_PairedNormalize(mean, std)
-        ])
+        if (mean is None and std is not None) or (mean is not None and std is None):
+            _LOGGER.error(f"'mean' and 'std' must be both None or both defined, but only one was provided.")
+            raise ValueError()
-        # --- Training Pipeline (Augmentation) ---
-        self.train_transform = _OD_PairedCompose([
-            _OD_PairedRandomHorizontalFlip(p=0.5),
-            _OD_PairedToTensor(),
-            _OD_PairedNormalize(mean, std)
-        ])
+        if mean is not None and std is not None:
+            # --- Store components for validation recipe ---
+            self._val_recipe_components = {
+                VisionTransformRecipeKeys.MEAN: mean,
+                VisionTransformRecipeKeys.STD: std
+            }
+            self._has_mean_std = True
+        if self._has_mean_std:
+            # --- Validation/Test Pipeline (Deterministic) ---
+            self.val_transform = _OD_PairedCompose([
+                _OD_PairedToTensor(),
+                _OD_PairedNormalize(mean, std) # type: ignore
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _OD_PairedCompose([
+                _OD_PairedRandomHorizontalFlip(p=0.5),
+                _OD_PairedToTensor(),
+                _OD_PairedNormalize(mean, std) # type: ignore
+            ])
+        else:
+            # --- Validation/Test Pipeline (Deterministic) ---
+            self.val_transform = _OD_PairedCompose([
+                _OD_PairedToTensor()
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _OD_PairedCompose([
+                _OD_PairedRandomHorizontalFlip(p=0.5),
+                _OD_PairedToTensor()
+            ])
         # --- Apply Transforms to the Datasets ---
         self._train_dataset.transform = self.train_transform # type: ignore
@@ -1325,10 +1476,6 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         components = self._val_recipe_components
-        if not components:
-            _LOGGER.error(f"Error getting the transformers recipe for validation set.")
-            raise ValueError()
         # validate path
         file_path = make_fullpath(filepath, make=True, enforce="file")
@@ -1337,15 +1484,49 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             VisionTransformRecipeKeys.TASK: "object_detection",
             VisionTransformRecipeKeys.PIPELINE: [
                 {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}},
-                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
-                    "mean": components["mean"],
-                    "std": components["std"]
-                }}
             ]
         }
+        if self._has_mean_std and components:
+            recipe[VisionTransformRecipeKeys.PIPELINE].append(
+                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
+                    "mean": components[VisionTransformRecipeKeys.MEAN],
+                    "std": components[VisionTransformRecipeKeys.STD]
+                }}
+            )
         # Save the file
         save_recipe(recipe, file_path)
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images\n"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found\n"
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Total Image-Annotation Pairs: {len(self.image_paths)}\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Transforms Configured: {self._are_transforms_configured}\n"
+        if self.class_map:
+            s += f"  Classes ({len(self.class_map)}): {list(self.class_map.keys())}\n"
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
+        return s
 def info():

ml_tools/ML_vision_models.py CHANGED Viewed

@@ -47,12 +47,17 @@ class _BaseVisionWrapper(nn.Module, _ArchitectureHandlerMixin, ABC):
         self.num_classes = num_classes
         self.in_channels = in_channels
         self.model_name = model_name
+        self._pretrained_default_transforms = None
         # --- 2. Instantiate the base model ---
         if init_with_pretrained:
             weights_enum = getattr(vision_models, weights_enum_name, None) if weights_enum_name else None
             weights = weights_enum.IMAGENET1K_V1 if weights_enum else None
+            # Save transformations for pretrained models
+            if weights:
+                self._pretrained_default_transforms = weights.transforms()
             if weights is None and init_with_pretrained:
                  _LOGGER.warning(f"Could not find modern weights for {model_name}. Using 'pretrained=True' legacy fallback.")
                  self.model = getattr(vision_models, model_name)(pretrained=True)
@@ -331,6 +336,7 @@ class _BaseSegmentationWrapper(nn.Module, _ArchitectureHandlerMixin, ABC):
         self.num_classes = num_classes
         self.in_channels = in_channels
         self.model_name = model_name
+        self._pretrained_default_transforms = None
         # --- 2. Instantiate the base model ---
         model_kwargs = {
@@ -343,6 +349,10 @@ class _BaseSegmentationWrapper(nn.Module, _ArchitectureHandlerMixin, ABC):
             weights_enum = getattr(vision_models.segmentation, weights_enum_name, None) if weights_enum_name else None
             weights = weights_enum.DEFAULT if weights_enum else None
+            # save pretrained model transformations
+            if weights:
+                self._pretrained_default_transforms = weights.transforms()
             if weights is None:
                  _LOGGER.warning(f"Could not find modern weights for {model_name}. Using 'pretrained=True' legacy fallback.")
                  # Legacy models used 'pretrained=True' and num_classes was separate
@@ -520,7 +530,7 @@ class DragonFastRCNN(nn.Module, _ArchitectureHandlerMixin):
     This wrapper allows for customizing the model backbone, input channels,
     and the number of output classes for transfer learning.
-    NOTE: This model is NOT compatible with the MLTrainer class.
+    NOTE: This model is NOT compatible with the MLTrainer class. Use the ObjectDetectionTrainer instead.
     """
     def __init__(self,
                  num_classes: int,
@@ -550,6 +560,7 @@ class DragonFastRCNN(nn.Module, _ArchitectureHandlerMixin):
         self.num_classes = num_classes
         self.in_channels = in_channels
         self.model_name = model_name
+        self._pretrained_default_transforms = None
         # --- 2. Instantiate the base model ---
         model_constructor = getattr(detection_models, model_name)
@@ -560,6 +571,9 @@ class DragonFastRCNN(nn.Module, _ArchitectureHandlerMixin):
         weights_enum = getattr(detection_models, weights_enum_name, None) if weights_enum_name else None
         weights = weights_enum.DEFAULT if weights_enum and init_with_pretrained else None
+        if weights:
+            self._pretrained_default_transforms = weights.transforms()
         self.model = model_constructor(weights=weights, weights_backbone=weights)

dragon-ml-toolbox 14.3.0__py3-none-any.whl → 14.8.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 14.3.0py3-none-any.whl → 14.8.0py3-none-any.whl