PyPI - dragon-ml-toolbox - Versions diffs - 14.3.1__py3-none-any.whl → 16.0.0__py3-none-any.whl - Mend

dragon-ml-toolbox 14.3.1py3-none-any.whl → 16.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (44) hide show

{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/METADATA +10 -5
dragon_ml_toolbox-16.0.0.dist-info/RECORD +51 -0
ml_tools/ETL_cleaning.py +20 -20
ml_tools/ETL_engineering.py +23 -25
ml_tools/GUI_tools.py +20 -20
ml_tools/MICE_imputation.py +3 -3
ml_tools/ML_callbacks.py +43 -26
ml_tools/ML_configuration.py +309 -0
ml_tools/ML_datasetmaster.py +220 -260
ml_tools/ML_evaluation.py +317 -81
ml_tools/ML_evaluation_multi.py +127 -36
ml_tools/ML_inference.py +249 -207
ml_tools/ML_models.py +13 -102
ml_tools/ML_models_advanced.py +1 -1
ml_tools/ML_optimization.py +12 -12
ml_tools/ML_scaler.py +11 -11
ml_tools/ML_sequence_datasetmaster.py +341 -0
ml_tools/ML_sequence_evaluation.py +215 -0
ml_tools/ML_sequence_inference.py +391 -0
ml_tools/ML_sequence_models.py +139 -0
ml_tools/ML_trainer.py +1247 -338
ml_tools/ML_utilities.py +51 -2
ml_tools/ML_vision_datasetmaster.py +262 -118
ml_tools/ML_vision_evaluation.py +26 -6
ml_tools/ML_vision_inference.py +117 -140
ml_tools/ML_vision_models.py +15 -1
ml_tools/ML_vision_transformers.py +233 -7
ml_tools/PSO_optimization.py +6 -6
ml_tools/SQL.py +4 -4
ml_tools/{keys.py → _keys.py} +45 -1
ml_tools/_schema.py +1 -1
ml_tools/ensemble_evaluation.py +54 -11
ml_tools/ensemble_inference.py +7 -33
ml_tools/ensemble_learning.py +1 -1
ml_tools/optimization_tools.py +2 -2
ml_tools/path_manager.py +5 -5
ml_tools/utilities.py +1 -2
dragon_ml_toolbox-14.3.1.dist-info/RECORD +0 -48
ml_tools/RNN_forecast.py +0 -56
ml_tools/_ML_vision_recipe.py +0 -88
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/top_level.txt +0 -0

ml_tools/ML_vision_datasetmaster.py CHANGED Viewed

@@ -12,25 +12,23 @@ import random
 import json
 import inspect
-from .ML_datasetmaster import _BaseMaker
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .keys import VisionTransformRecipeKeys, ObjectDetectionKeys
-from ._ML_vision_recipe import save_recipe
-from .ML_vision_transformers import TRANSFORM_REGISTRY
+from ._keys import VisionTransformRecipeKeys, ObjectDetectionKeys
+from .ML_vision_transformers import TRANSFORM_REGISTRY, _save_recipe
 from .custom_logger import custom_logger
 __all__ = [
-    "VisionDatasetMaker",
-    "SegmentationDatasetMaker",
-    "ObjectDetectionDatasetMaker"
+    "DragonDatasetVision",
+    "DragonDatasetSegmentation",
+    "DragonDatasetObjectDetection"
 ]
-# --- VisionDatasetMaker ---
-class VisionDatasetMaker(_BaseMaker):
+# --- Vision Maker ---
+class DragonDatasetVision:
     """
     Creates processed PyTorch datasets for computer vision tasks from an
     image folder directory.
@@ -45,7 +43,9 @@ class VisionDatasetMaker(_BaseMaker):
         """
         Typically not called directly. Use the class methods `from_folder()` or `from_folders()` to create an instance.
         """
-        super().__init__()
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
         self._full_dataset: Optional[ImageFolder] = None
         self.labels: Optional[List[int]] = None
         self.class_map: Optional[dict[str,int]] = None
@@ -56,7 +56,7 @@ class VisionDatasetMaker(_BaseMaker):
         self._has_mean_std: bool = False
     @classmethod
-    def from_folder(cls, root_dir: Union[str,Path]) -> 'VisionDatasetMaker':
+    def from_folder(cls, root_dir: Union[str,Path]) -> 'DragonDatasetVision':
         """
         Creates a maker instance from a single root directory of images.
@@ -70,7 +70,7 @@ class VisionDatasetMaker(_BaseMaker):
             root_dir (str | Path): The path to the root directory containing class subfolders.
         Returns:
-            VisionDatasetMaker: A new instance with the full dataset loaded.
+            Instance: A new instance with the full dataset loaded.
         """
         root_path = make_fullpath(root_dir, enforce="directory")
         # Load with NO transform. We get PIL Images.
@@ -87,7 +87,7 @@ class VisionDatasetMaker(_BaseMaker):
     def from_folders(cls,
                      train_dir: Union[str,Path],
                      val_dir: Union[str,Path],
-                     test_dir: Optional[Union[str,Path]] = None) -> 'VisionDatasetMaker':
+                     test_dir: Optional[Union[str,Path]] = None) -> 'DragonDatasetVision':
         """
         Creates a maker instance from separate, pre-split directories.
@@ -101,7 +101,7 @@ class VisionDatasetMaker(_BaseMaker):
             test_dir (str | Path | None): Path to the test data directory.
         Returns:
-            VisionDatasetMaker: A new, pre-split instance.
+            Instance: A new, pre-split instance.
         Raises:
             ValueError: If the classes found in train, val, or test directories are inconsistent.
@@ -186,7 +186,7 @@ class VisionDatasetMaker(_BaseMaker):
         print(report)
     def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
+                   stratify: bool = True, random_state: Optional[int] = None) -> 'DragonDatasetVision':
         """
         Splits the dataset into train, validation, and optional test sets.
@@ -202,7 +202,7 @@ class VisionDatasetMaker(_BaseMaker):
             random_state (int | None): Seed for the random number generator for reproducible splits.
         Returns:
-            VisionDatasetMaker: The same instance, now with datasets split.
+            Self: The same instance, now with datasets split.
         Raises:
             ValueError: If `val_size` and `test_size` sum to 1.0 or more.
@@ -249,22 +249,23 @@ class VisionDatasetMaker(_BaseMaker):
         _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
         return self
-    def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
+    def configure_transforms(self,
+                             resize_size: int = 256,
+                             crop_size: int = 224,
                              mean: Optional[List[float]] = [0.485, 0.456, 0.406],
                              std: Optional[List[float]] = [0.229, 0.224, 0.225],
                              pre_transforms: Optional[List[Callable]] = None,
-                             extra_train_transforms: Optional[List[Callable]] = None) -> 'VisionDatasetMaker':
+                             extra_train_transforms: Optional[List[Callable]] = None) -> 'DragonDatasetVision':
         """
         Configures and applies the image transformations and augmentations.
         This method must be called AFTER data is loaded and split.
         It sets up two pipelines:
-        1.  **Training Pipeline:** Includes random augmentations like
-            `RandomResizedCrop` and `RandomHorizontalFlip` (plus any
+        1.  **Training Pipeline:** Includes random augmentations:
+            `RandomResizedCrop(crop_size)`, `RandomHorizontalFlip(0.5)`, and `RandomRotation(90)` (plus any
             `extra_train_transforms`) for online augmentation.
-        2.  **Validation/Test Pipeline:** A deterministic pipeline using
-            `Resize` and `CenterCrop` for consistent evaluation.
+        2.  **Validation/Test Pipeline:** A deterministic pipeline using `Resize` and `CenterCrop` for consistent evaluation.
         Both pipelines finish with `ToTensor` and `Normalize`.
@@ -273,13 +274,13 @@ class VisionDatasetMaker(_BaseMaker):
                                for validation/testing.
             crop_size (int): The target size (square) for the final
                              cropped image.
-            mean (List[float]): The mean values for normalization (e.g., ImageNet mean).
-            std (List[float]): The standard deviation values for normalization (e.g., ImageNet std).
+            mean (List[float] | None): The mean values for normalization (e.g., ImageNet mean).
+            std (List[float] | None): The standard deviation values for normalization (e.g., ImageNet std).
             extra_train_transforms (List[Callable] | None): A list of additional torchvision transforms to add to the end of the training transformations.
             pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
         Returns:
-            VisionDatasetMaker: The same instance, with transforms applied.
+            Self: The same instance, with transforms applied.
         Raises:
             RuntimeError: If called before data is split.
@@ -315,8 +316,9 @@ class VisionDatasetMaker(_BaseMaker):
         # Base augmentations for training
         base_train_transforms = [
-            transforms.RandomResizedCrop(crop_size),
-            transforms.RandomHorizontalFlip()
+            transforms.RandomResizedCrop(size=crop_size),
+            transforms.RandomHorizontalFlip(p=0.5),
+            transforms.RandomRotation(degrees=90)
         ]
         if extra_train_transforms:
             base_train_transforms.extend(extra_train_transforms)
@@ -381,8 +383,8 @@ class VisionDatasetMaker(_BaseMaker):
             _LOGGER.warning("Transforms have not been configured.")
         if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
+            return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+        return self._train_dataset, self._val_dataset # type: ignore
     def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
         """
@@ -481,7 +483,7 @@ class VisionDatasetMaker(_BaseMaker):
             )
         # 3. Save the file
-        save_recipe(recipe, file_path)
+        _save_recipe(recipe, file_path)
     def save_class_map(self, save_dir: Union[str,Path]) -> dict[str,int]:
         """
@@ -499,6 +501,39 @@ class VisionDatasetMaker(_BaseMaker):
         return self.class_map
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
+        elif self._full_dataset:
+            return f"Full Dataset: {len(self._full_dataset)} images"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found"
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Transforms Configured: {self._are_transforms_configured}\n"
+        if self.class_map:
+            s += f"  Classes: {len(self.class_map)}\n"
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
+        elif self._full_dataset:
+            s += f"  Full Dataset Size: {len(self._full_dataset)} images\n"
+        return s
 class _DatasetTransformer(Dataset):
     """
@@ -655,8 +690,8 @@ class _PairedRandomResizedCrop:
         return cropped_image, cropped_mask # type: ignore
-# --- SegmentationDatasetMaker ---
-class SegmentationDatasetMaker(_BaseMaker):
+# --- Segmentation Dataset ---
+class DragonDatasetSegmentation:
     """
     Creates processed PyTorch datasets for segmentation from image and mask folders.
@@ -665,7 +700,7 @@ class SegmentationDatasetMaker(_BaseMaker):
     to both the image and its corresponding mask.
     Workflow:
-    1. `maker = SegmentationDatasetMaker.from_folders(img_dir, mask_dir)`
+    1. `maker = DragonDatasetSegmentation.from_folders(img_dir, mask_dir)`
     2. `maker.set_class_map({'background': 0, 'road': 1})`
     3. `maker.split_data(val_size=0.2)`
     4. `maker.configure_transforms(crop_size=256)`
@@ -677,7 +712,9 @@ class SegmentationDatasetMaker(_BaseMaker):
         """
         Typically not called directly. Use the class method `from_folders()` to create an instance.
         """
-        super().__init__()
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
         self.image_paths: List[Path] = []
         self.mask_paths: List[Path] = []
         self.class_map: Dict[str, int] = {}
@@ -686,9 +723,10 @@ class SegmentationDatasetMaker(_BaseMaker):
         self._are_transforms_configured = False
         self.train_transform: Optional[Callable] = None
         self.val_transform: Optional[Callable] = None
+        self._has_mean_std: bool = False
     @classmethod
-    def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'SegmentationDatasetMaker':
+    def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'DragonDatasetSegmentation':
         """
         Creates a maker instance by loading all matching image-mask pairs
         from two corresponding directories.
@@ -701,7 +739,7 @@ class SegmentationDatasetMaker(_BaseMaker):
             mask_dir (str | Path): Path to the directory containing segmentation masks.
         Returns:
-            SegmentationDatasetMaker: A new instance with all pairs loaded.
+            DragonDatasetSegmentation: A new instance with all pairs loaded.
         """
         maker = cls()
         img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -761,14 +799,14 @@ class SegmentationDatasetMaker(_BaseMaker):
         Logs a report of the types, sizes, and channels of image files
         found in the directory. Useful for checking masks.
         """
-        VisionDatasetMaker.inspect_folder(path)
+        DragonDatasetVision.inspect_folder(path)
-    def set_class_map(self, class_map: Dict[str, int]) -> 'SegmentationDatasetMaker':
+    def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetSegmentation':
         """
-        Sets a map of pixel_value -> class_name. This is used by the MLTrainer for clear evaluation reports.
+        Sets a map of class_name -> pixel value. This is used by the Trainer for clear evaluation reports.
         Args:
-            class_map (Dict[int, str]): A dictionary mapping the integer pixel
+            class_map (Dict[str, int]): A dictionary mapping the integer pixel
                 value in a mask to its string name.
                 Example: {'background': 0, 'road': 1, 'car': 2}
         """
@@ -784,7 +822,7 @@ class SegmentationDatasetMaker(_BaseMaker):
         return []
     def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   random_state: Optional[int] = 42) -> 'SegmentationDatasetMaker':
+                   random_state: Optional[int] = 42) -> 'DragonDatasetSegmentation':
         """
         Splits the loaded image-mask pairs into train, validation, and test sets.
@@ -794,7 +832,7 @@ class SegmentationDatasetMaker(_BaseMaker):
             random_state (int | None): Seed for reproducible splits.
         Returns:
-            SegmentationDatasetMaker: The same instance, now with datasets created.
+            DragonDatasetSegmentation: The same instance, now with datasets created.
         """
         if self._is_split:
             _LOGGER.warning("Data has already been split.")
@@ -849,8 +887,8 @@ class SegmentationDatasetMaker(_BaseMaker):
     def configure_transforms(self,
                              resize_size: int = 256,
                              crop_size: int = 224,
-                             mean: List[float] = [0.485, 0.456, 0.406],
-                             std: List[float] = [0.229, 0.224, 0.225]) -> 'SegmentationDatasetMaker':
+                             mean: Optional[List[float]] = [0.485, 0.456, 0.406],
+                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetSegmentation':
         """
         Configures and applies the image and mask transformations.
@@ -861,39 +899,60 @@ class SegmentationDatasetMaker(_BaseMaker):
                                for validation/testing.
             crop_size (int): The target size (square) for the final
                              cropped image.
-            mean (List[float]): The mean values for image normalization.
-            std (List[float]): The std dev values for image normalization.
+            mean (List[float] | None): The mean values for image normalization.
+            std (List[float] | None): The std dev values for image normalization.
         Returns:
-            SegmentationDatasetMaker: The same instance, with transforms applied.
+            DragonDatasetSegmentation: The same instance, with transforms applied.
         """
         if not self._is_split:
             _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
             raise RuntimeError()
+        if (mean is None and std is not None) or (mean is not None and std is None):
+            _LOGGER.error(f"'mean' and 'std' must be both None or both defined, but only one was provided.")
+            raise ValueError()
         # --- Store components for validation recipe ---
-        self.val_recipe_components = {
+        self.val_recipe_components: dict[str,Any] = {
             VisionTransformRecipeKeys.RESIZE_SIZE: resize_size,
             VisionTransformRecipeKeys.CROP_SIZE: crop_size,
-            VisionTransformRecipeKeys.MEAN: mean,
-            VisionTransformRecipeKeys.STD: std
         }
+        if mean is not None and std is not None:
+            self.val_recipe_components.update({
+                VisionTransformRecipeKeys.MEAN: mean,
+                VisionTransformRecipeKeys.STD: std
+            })
+            self._has_mean_std = True
         # --- Validation/Test Pipeline (Deterministic) ---
-        self.val_transform = _PairedCompose([
-            _PairedResize(resize_size),
-            _PairedCenterCrop(crop_size),
-            _PairedToTensor(),
-            _PairedNormalize(mean, std)
-        ])
-        # --- Training Pipeline (Augmentation) ---
-        self.train_transform = _PairedCompose([
-            _PairedRandomResizedCrop(crop_size),
-            _PairedRandomHorizontalFlip(p=0.5),
-            _PairedToTensor(),
-            _PairedNormalize(mean, std)
-        ])
+        if self._has_mean_std:
+            self.val_transform = _PairedCompose([
+                _PairedResize(resize_size),
+                _PairedCenterCrop(crop_size),
+                _PairedToTensor(),
+                _PairedNormalize(mean, std) # type: ignore
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _PairedCompose([
+                _PairedRandomResizedCrop(crop_size),
+                _PairedRandomHorizontalFlip(p=0.5),
+                _PairedToTensor(),
+                _PairedNormalize(mean, std) # type: ignore
+            ])
+        else:
+            self.val_transform = _PairedCompose([
+                _PairedResize(resize_size),
+                _PairedCenterCrop(crop_size),
+                _PairedToTensor()
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _PairedCompose([
+                _PairedRandomResizedCrop(crop_size),
+                _PairedRandomHorizontalFlip(p=0.5),
+                _PairedToTensor()
+            ])
         # --- Apply Transforms to the Datasets ---
         self._train_dataset.transform = self.train_transform # type: ignore
@@ -921,8 +980,8 @@ class SegmentationDatasetMaker(_BaseMaker):
             raise RuntimeError()
         if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
+            return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+        return self._train_dataset, self._val_dataset # type: ignore
     def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
         """
@@ -946,23 +1005,57 @@ class SegmentationDatasetMaker(_BaseMaker):
         # validate path
         file_path = make_fullpath(filepath, make=True, enforce="file")
         # Add standard transforms
         recipe: Dict[str, Any] = {
             VisionTransformRecipeKeys.TASK: "segmentation",
             VisionTransformRecipeKeys.PIPELINE: [
-                {VisionTransformRecipeKeys.NAME: "Resize", "kwargs": {"size": components["resize_size"]}},
-                {VisionTransformRecipeKeys.NAME: "CenterCrop", "kwargs": {"size": components["crop_size"]}},
-                {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}},
-                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
-                    "mean": components["mean"],
-                    "std": components["std"]
-                }}
+                {VisionTransformRecipeKeys.NAME: "Resize", "kwargs": {"size": components[VisionTransformRecipeKeys.RESIZE_SIZE]}},
+                {VisionTransformRecipeKeys.NAME: "CenterCrop", "kwargs": {"size": components[VisionTransformRecipeKeys.CROP_SIZE]}},
+                {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}}
             ]
         }
+        if self._has_mean_std:
+            recipe[VisionTransformRecipeKeys.PIPELINE].append(
+                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
+                    "mean": components[VisionTransformRecipeKeys.MEAN],
+                    "std": components[VisionTransformRecipeKeys.STD]
+                }}
+            )
         # Save the file
-        save_recipe(recipe, file_path)
+        _save_recipe(recipe, file_path)
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found"
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Total Image-Mask Pairs: {len(self.image_paths)}\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Transforms Configured: {self._are_transforms_configured}\n"
+        if self.class_map:
+            s += f"  Classes: {list(self.class_map.keys())}\n"
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
+        return s
 # Object detection
@@ -990,7 +1083,7 @@ class _ObjectDetectionDataset(Dataset):
         self.annotation_paths = annotation_paths
         self.transform = transform
-        # --- Propagate 'classes' if they exist (for MLTrainer) ---
+        # --- Propagate 'classes' if they exist ---
         self.classes: List[str] = []
     def __len__(self):
@@ -1077,7 +1170,7 @@ class _OD_PairedRandomHorizontalFlip:
         return image, target
-class ObjectDetectionDatasetMaker(_BaseMaker):
+class DragonDatasetObjectDetection:
     """
     Creates processed PyTorch datasets for object detection from image
     and JSON annotation folders.
@@ -1090,7 +1183,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
     so this class provides a `collate_fn` to be used with a DataLoader.
     Workflow:
-    1. `maker = ObjectDetectionDatasetMaker.from_folders(img_dir, ann_dir)`
+    1. `maker = DragonDatasetObjectDetection.from_folders(img_dir, ann_dir)`
     2. `maker.set_class_map({'background': 0, 'person': 1, 'car': 2})`
     3. `maker.split_data(val_size=0.2)`
     4. `maker.configure_transforms()`
@@ -1104,7 +1197,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         """
         Typically not called directly. Use the class method `from_folders()` to create an instance.
         """
-        super().__init__()
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
         self.image_paths: List[Path] = []
         self.annotation_paths: List[Path] = []
         self.class_map: Dict[str, int] = {}
@@ -1114,9 +1209,10 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         self.train_transform: Optional[Callable] = None
         self.val_transform: Optional[Callable] = None
         self._val_recipe_components: Optional[Dict[str, Any]] = None
+        self._has_mean_std: bool = False
     @classmethod
-    def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'ObjectDetectionDatasetMaker':
+    def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'DragonDatasetObjectDetection':
         """
         Creates a maker instance by loading all matching image-annotation pairs
         from two corresponding directories.
@@ -1133,7 +1229,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
                                          annotation files.
         Returns:
-            ObjectDetectionDatasetMaker: A new instance with all pairs loaded.
+            DragonDatasetObjectDetection: A new instance with all pairs loaded.
         """
         maker = cls()
         img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -1180,9 +1276,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         Logs a report of the types, sizes, and channels of image files
         found in the directory.
         """
-        VisionDatasetMaker.inspect_folder(path)
+        DragonDatasetVision.inspect_folder(path)
-    def set_class_map(self, class_map: Dict[str, int]) -> 'ObjectDetectionDatasetMaker':
+    def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetObjectDetection':
         """
         Sets a map of class_name -> pixel_value. This is used by the
         MLTrainer for clear evaluation reports.
@@ -1210,7 +1306,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         return []
     def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   random_state: Optional[int] = 42) -> 'ObjectDetectionDatasetMaker':
+                   random_state: Optional[int] = 42) -> 'DragonDatasetObjectDetection':
         """
         Splits the loaded image-annotation pairs into train, validation, and test sets.
@@ -1220,7 +1316,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             random_state (int | None): Seed for reproducible splits.
         Returns:
-            ObjectDetectionDatasetMaker: The same instance, now with datasets created.
+            DragonDatasetObjectDetection: The same instance, now with datasets created.
         """
         if self._is_split:
             _LOGGER.warning("Data has already been split.")
@@ -1273,8 +1369,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         return self
     def configure_transforms(self,
-                             mean: List[float] = [0.485, 0.456, 0.406],
-                             std: List[float] = [0.229, 0.224, 0.225]) -> 'ObjectDetectionDatasetMaker':
+                             mean: Optional[List[float]] = [0.485, 0.456, 0.406],
+                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetObjectDetection':
         """
         Configures and applies the image and target transformations.
@@ -1285,34 +1381,52 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         Transforms are limited to augmentation (flip), ToTensor, and Normalize.
         Args:
-            mean (List[float]): The mean values for image normalization.
-            std (List[float]): The std dev values for image normalization.
+            mean (List[float] | None): The mean values for image normalization.
+            std (List[float] | None): The std dev values for image normalization.
         Returns:
-            ObjectDetectionDatasetMaker: The same instance, with transforms applied.
+            DragonDatasetObjectDetection: The same instance, with transforms applied.
         """
         if not self._is_split:
             _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
             raise RuntimeError()
-        # --- Store components for validation recipe ---
-        self._val_recipe_components = {
-            VisionTransformRecipeKeys.MEAN: mean,
-            VisionTransformRecipeKeys.STD: std
-        }
-        # --- Validation/Test Pipeline (Deterministic) ---
-        self.val_transform = _OD_PairedCompose([
-            _OD_PairedToTensor(),
-            _OD_PairedNormalize(mean, std)
-        ])
+        if (mean is None and std is not None) or (mean is not None and std is None):
+            _LOGGER.error(f"'mean' and 'std' must be both None or both defined, but only one was provided.")
+            raise ValueError()
-        # --- Training Pipeline (Augmentation) ---
-        self.train_transform = _OD_PairedCompose([
-            _OD_PairedRandomHorizontalFlip(p=0.5),
-            _OD_PairedToTensor(),
-            _OD_PairedNormalize(mean, std)
-        ])
+        if mean is not None and std is not None:
+            # --- Store components for validation recipe ---
+            self._val_recipe_components = {
+                VisionTransformRecipeKeys.MEAN: mean,
+                VisionTransformRecipeKeys.STD: std
+            }
+            self._has_mean_std = True
+        if self._has_mean_std:
+            # --- Validation/Test Pipeline (Deterministic) ---
+            self.val_transform = _OD_PairedCompose([
+                _OD_PairedToTensor(),
+                _OD_PairedNormalize(mean, std) # type: ignore
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _OD_PairedCompose([
+                _OD_PairedRandomHorizontalFlip(p=0.5),
+                _OD_PairedToTensor(),
+                _OD_PairedNormalize(mean, std) # type: ignore
+            ])
+        else:
+            # --- Validation/Test Pipeline (Deterministic) ---
+            self.val_transform = _OD_PairedCompose([
+                _OD_PairedToTensor()
+            ])
+            # --- Training Pipeline (Augmentation) ---
+            self.train_transform = _OD_PairedCompose([
+                _OD_PairedRandomHorizontalFlip(p=0.5),
+                _OD_PairedToTensor()
+            ])
         # --- Apply Transforms to the Datasets ---
         self._train_dataset.transform = self.train_transform # type: ignore
@@ -1340,8 +1454,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             raise RuntimeError()
         if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
+            return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+        return self._train_dataset, self._val_dataset # type: ignore
     @property
     def collate_fn(self) -> Callable:
@@ -1368,10 +1482,6 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         components = self._val_recipe_components
-        if not components:
-            _LOGGER.error(f"Error getting the transformers recipe for validation set.")
-            raise ValueError()
         # validate path
         file_path = make_fullpath(filepath, make=True, enforce="file")
@@ -1380,15 +1490,49 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             VisionTransformRecipeKeys.TASK: "object_detection",
             VisionTransformRecipeKeys.PIPELINE: [
                 {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}},
-                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
-                    "mean": components["mean"],
-                    "std": components["std"]
-                }}
             ]
         }
+        if self._has_mean_std and components:
+            recipe[VisionTransformRecipeKeys.PIPELINE].append(
+                {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
+                    "mean": components[VisionTransformRecipeKeys.MEAN],
+                    "std": components[VisionTransformRecipeKeys.STD]
+                }}
+            )
         # Save the file
-        save_recipe(recipe, file_path)
+        _save_recipe(recipe, file_path)
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found"
+    def __repr__(self) -> str:
+        s = f"<{self.__class__.__name__}>:\n"
+        s += f"  Total Image-Annotation Pairs: {len(self.image_paths)}\n"
+        s += f"  Split: {self._is_split}\n"
+        s += f"  Transforms Configured: {self._are_transforms_configured}\n"
+        if self.class_map:
+            s += f"  Classes ({len(self.class_map)}): {list(self.class_map.keys())}\n"
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
+        return s
 def info():

dragon-ml-toolbox 14.3.1__py3-none-any.whl → 16.0.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 14.3.1py3-none-any.whl → 16.0.0py3-none-any.whl