PyPI - dragon-ml-toolbox - Versions diffs - 14.7.0__py3-none-any.whl → 16.2.1__py3-none-any.whl - Mend

dragon-ml-toolbox 14.7.0py3-none-any.whl → 16.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/METADATA +9 -5
dragon_ml_toolbox-16.2.1.dist-info/RECORD +51 -0
ml_tools/ETL_cleaning.py +20 -20
ml_tools/ETL_engineering.py +23 -25
ml_tools/GUI_tools.py +20 -20
ml_tools/MICE_imputation.py +3 -3
ml_tools/ML_callbacks.py +43 -26
ml_tools/ML_configuration.py +726 -32
ml_tools/ML_datasetmaster.py +235 -280
ml_tools/ML_evaluation.py +160 -42
ml_tools/ML_evaluation_multi.py +103 -35
ml_tools/ML_inference.py +290 -208
ml_tools/ML_models.py +13 -102
ml_tools/ML_models_advanced.py +1 -1
ml_tools/ML_optimization.py +12 -12
ml_tools/ML_scaler.py +11 -11
ml_tools/ML_sequence_datasetmaster.py +341 -0
ml_tools/ML_sequence_evaluation.py +219 -0
ml_tools/ML_sequence_inference.py +391 -0
ml_tools/ML_sequence_models.py +139 -0
ml_tools/ML_trainer.py +1342 -386
ml_tools/ML_utilities.py +1 -1
ml_tools/ML_vision_datasetmaster.py +120 -72
ml_tools/ML_vision_evaluation.py +30 -6
ml_tools/ML_vision_inference.py +129 -152
ml_tools/ML_vision_models.py +1 -1
ml_tools/ML_vision_transformers.py +121 -40
ml_tools/PSO_optimization.py +6 -6
ml_tools/SQL.py +4 -4
ml_tools/{keys.py → _keys.py} +45 -0
ml_tools/_schema.py +1 -1
ml_tools/ensemble_evaluation.py +1 -1
ml_tools/ensemble_inference.py +7 -33
ml_tools/ensemble_learning.py +1 -1
ml_tools/optimization_tools.py +2 -2
ml_tools/path_manager.py +5 -5
ml_tools/utilities.py +1 -2
dragon_ml_toolbox-14.7.0.dist-info/RECORD +0 -49
ml_tools/RNN_forecast.py +0 -56
ml_tools/_ML_vision_recipe.py +0 -88
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
{dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/top_level.txt +0 -0

ml_tools/ML_utilities.py CHANGED Viewed

@@ -7,7 +7,7 @@ from torch import nn
 from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
 from ._script_info import _script_info
 from ._logger import _LOGGER
-from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
+from ._keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
 from .utilities import load_dataframe
 from .custom_logger import save_list_strings, custom_logger
 from .serde import serialize_object_filename

ml_tools/ML_vision_datasetmaster.py CHANGED Viewed

@@ -12,25 +12,23 @@ import random
 import json
 import inspect
-from .ML_datasetmaster import _BaseMaker
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .keys import VisionTransformRecipeKeys, ObjectDetectionKeys
-from ._ML_vision_recipe import save_recipe
-from .ML_vision_transformers import TRANSFORM_REGISTRY
+from ._keys import VisionTransformRecipeKeys, ObjectDetectionKeys
+from .ML_vision_transformers import TRANSFORM_REGISTRY, _save_recipe
 from .custom_logger import custom_logger
 __all__ = [
-    "VisionDatasetMaker",
-    "SegmentationDatasetMaker",
-    "ObjectDetectionDatasetMaker"
+    "DragonDatasetVision",
+    "DragonDatasetSegmentation",
+    "DragonDatasetObjectDetection"
 ]
-# --- VisionDatasetMaker ---
-class VisionDatasetMaker(_BaseMaker):
+# --- Vision Maker ---
+class DragonDatasetVision:
     """
     Creates processed PyTorch datasets for computer vision tasks from an
     image folder directory.
@@ -45,10 +43,12 @@ class VisionDatasetMaker(_BaseMaker):
         """
         Typically not called directly. Use the class methods `from_folder()` or `from_folders()` to create an instance.
         """
-        super().__init__()
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
         self._full_dataset: Optional[ImageFolder] = None
         self.labels: Optional[List[int]] = None
-        self.class_map: Optional[dict[str,int]] = None
+        self.class_map: dict[str,int] = dict()
         self._is_split = False
         self._are_transforms_configured = False
@@ -56,7 +56,7 @@ class VisionDatasetMaker(_BaseMaker):
         self._has_mean_std: bool = False
     @classmethod
-    def from_folder(cls, root_dir: Union[str,Path]) -> 'VisionDatasetMaker':
+    def from_folder(cls, root_dir: Union[str,Path]) -> 'DragonDatasetVision':
         """
         Creates a maker instance from a single root directory of images.
@@ -70,7 +70,7 @@ class VisionDatasetMaker(_BaseMaker):
             root_dir (str | Path): The path to the root directory containing class subfolders.
         Returns:
-            VisionDatasetMaker: A new instance with the full dataset loaded.
+            Instance: A new instance with the full dataset loaded.
         """
         root_path = make_fullpath(root_dir, enforce="directory")
         # Load with NO transform. We get PIL Images.
@@ -87,7 +87,7 @@ class VisionDatasetMaker(_BaseMaker):
     def from_folders(cls,
                      train_dir: Union[str,Path],
                      val_dir: Union[str,Path],
-                     test_dir: Optional[Union[str,Path]] = None) -> 'VisionDatasetMaker':
+                     test_dir: Optional[Union[str,Path]] = None) -> 'DragonDatasetVision':
         """
         Creates a maker instance from separate, pre-split directories.
@@ -101,7 +101,7 @@ class VisionDatasetMaker(_BaseMaker):
             test_dir (str | Path | None): Path to the test data directory.
         Returns:
-            VisionDatasetMaker: A new, pre-split instance.
+            Instance: A new, pre-split instance.
         Raises:
             ValueError: If the classes found in train, val, or test directories are inconsistent.
@@ -186,7 +186,7 @@ class VisionDatasetMaker(_BaseMaker):
         print(report)
     def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
+                   stratify: bool = True, random_state: Optional[int] = None) -> 'DragonDatasetVision':
         """
         Splits the dataset into train, validation, and optional test sets.
@@ -202,7 +202,7 @@ class VisionDatasetMaker(_BaseMaker):
             random_state (int | None): Seed for the random number generator for reproducible splits.
         Returns:
-            VisionDatasetMaker: The same instance, now with datasets split.
+            Self: The same instance, now with datasets split.
         Raises:
             ValueError: If `val_size` and `test_size` sum to 1.0 or more.
@@ -249,22 +249,23 @@ class VisionDatasetMaker(_BaseMaker):
         _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
         return self
-    def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
+    def configure_transforms(self,
+                             resize_size: int = 256,
+                             crop_size: int = 224,
                              mean: Optional[List[float]] = [0.485, 0.456, 0.406],
                              std: Optional[List[float]] = [0.229, 0.224, 0.225],
                              pre_transforms: Optional[List[Callable]] = None,
-                             extra_train_transforms: Optional[List[Callable]] = None) -> 'VisionDatasetMaker':
+                             extra_train_transforms: Optional[List[Callable]] = None) -> 'DragonDatasetVision':
         """
         Configures and applies the image transformations and augmentations.
         This method must be called AFTER data is loaded and split.
         It sets up two pipelines:
-        1.  **Training Pipeline:** Includes random augmentations like
-            `RandomResizedCrop` and `RandomHorizontalFlip` (plus any
+        1.  **Training Pipeline:** Includes random augmentations:
+            `RandomResizedCrop(crop_size)`, `RandomHorizontalFlip(0.5)`, and `RandomRotation(90)` (plus any
             `extra_train_transforms`) for online augmentation.
-        2.  **Validation/Test Pipeline:** A deterministic pipeline using
-            `Resize` and `CenterCrop` for consistent evaluation.
+        2.  **Validation/Test Pipeline:** A deterministic pipeline using `Resize` and `CenterCrop` for consistent evaluation.
         Both pipelines finish with `ToTensor` and `Normalize`.
@@ -279,7 +280,7 @@ class VisionDatasetMaker(_BaseMaker):
             pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
         Returns:
-            VisionDatasetMaker: The same instance, with transforms applied.
+            Self: The same instance, with transforms applied.
         Raises:
             RuntimeError: If called before data is split.
@@ -315,8 +316,9 @@ class VisionDatasetMaker(_BaseMaker):
         # Base augmentations for training
         base_train_transforms = [
-            transforms.RandomResizedCrop(crop_size),
-            transforms.RandomHorizontalFlip()
+            transforms.RandomResizedCrop(size=crop_size),
+            transforms.RandomHorizontalFlip(p=0.5),
+            transforms.RandomRotation(degrees=90)
         ]
         if extra_train_transforms:
             base_train_transforms.extend(extra_train_transforms)
@@ -350,10 +352,10 @@ class VisionDatasetMaker(_BaseMaker):
         # --- Apply Transforms using the Wrapper ---
         # This correctly assigns the transform regardless of whether the dataset is a Subset (from_folder) or an ImageFolder (from_folders).
-        self._train_dataset = _DatasetTransformer(self._train_dataset, train_transform) # type: ignore
-        self._val_dataset = _DatasetTransformer(self._val_dataset, val_transform) # type: ignore
+        self._train_dataset = _DatasetTransformer(self._train_dataset, train_transform, self.class_map) # type: ignore
+        self._val_dataset = _DatasetTransformer(self._val_dataset, val_transform, self.class_map) # type: ignore
         if self._test_dataset:
-            self._test_dataset = _DatasetTransformer(self._test_dataset, val_transform) # type: ignore
+            self._test_dataset = _DatasetTransformer(self._test_dataset, val_transform, self.class_map) # type: ignore
         self._are_transforms_configured = True
         _LOGGER.info("Image transforms configured and applied.")
@@ -381,8 +383,8 @@ class VisionDatasetMaker(_BaseMaker):
             _LOGGER.warning("Transforms have not been configured.")
         if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
+            return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+        return self._train_dataset, self._val_dataset # type: ignore
     def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
         """
@@ -481,7 +483,7 @@ class VisionDatasetMaker(_BaseMaker):
             )
         # 3. Save the file
-        save_recipe(recipe, file_path)
+        _save_recipe(recipe, file_path)
     def save_class_map(self, save_dir: Union[str,Path]) -> dict[str,int]:
         """
@@ -499,6 +501,21 @@ class VisionDatasetMaker(_BaseMaker):
         return self.class_map
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
+        elif self._full_dataset:
+            return f"Full Dataset: {len(self._full_dataset)} images"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found"
     def __repr__(self) -> str:
         s = f"<{self.__class__.__name__}>:\n"
         s += f"  Split: {self._is_split}\n"
@@ -511,7 +528,7 @@ class VisionDatasetMaker(_BaseMaker):
             train_len = len(self._train_dataset) if self._train_dataset else 0
             val_len = len(self._val_dataset) if self._val_dataset else 0
             test_len = len(self._test_dataset) if self._test_dataset else 0
-            s += f"  Datasets (Train/Val/Test): {train_len} / {val_len} / {test_len}\n"
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
         elif self._full_dataset:
             s += f"  Full Dataset Size: {len(self._full_dataset)} images\n"
@@ -523,9 +540,10 @@ class _DatasetTransformer(Dataset):
     Internal wrapper class to apply a specific transform pipeline to any
     dataset (e.g., a full ImageFolder or a Subset).
     """
-    def __init__(self, dataset: Dataset, transform: Optional[transforms.Compose] = None):
+    def __init__(self, dataset: Dataset, transform: Optional[transforms.Compose] = None, class_map: dict[str,int]=dict()):
         self.dataset = dataset
         self.transform = transform
+        self.class_map = class_map
         # --- Propagate attributes for inspection ---
         # For ImageFolder
@@ -565,7 +583,7 @@ class _SegmentationDataset(Dataset):
         self.mask_paths = mask_paths
         self.transform = transform
-        # --- Propagate 'classes' if they exist (for MLTrainer) ---
+        # --- Propagate 'classes' if they exist for trainer ---
         self.classes: List[str] = []
     def __len__(self):
@@ -673,8 +691,8 @@ class _PairedRandomResizedCrop:
         return cropped_image, cropped_mask # type: ignore
-# --- SegmentationDatasetMaker ---
-class SegmentationDatasetMaker(_BaseMaker):
+# --- Segmentation Dataset ---
+class DragonDatasetSegmentation:
     """
     Creates processed PyTorch datasets for segmentation from image and mask folders.
@@ -683,7 +701,7 @@ class SegmentationDatasetMaker(_BaseMaker):
     to both the image and its corresponding mask.
     Workflow:
-    1. `maker = SegmentationDatasetMaker.from_folders(img_dir, mask_dir)`
+    1. `maker = DragonDatasetSegmentation.from_folders(img_dir, mask_dir)`
     2. `maker.set_class_map({'background': 0, 'road': 1})`
     3. `maker.split_data(val_size=0.2)`
     4. `maker.configure_transforms(crop_size=256)`
@@ -695,7 +713,9 @@ class SegmentationDatasetMaker(_BaseMaker):
         """
         Typically not called directly. Use the class method `from_folders()` to create an instance.
         """
-        super().__init__()
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
         self.image_paths: List[Path] = []
         self.mask_paths: List[Path] = []
         self.class_map: Dict[str, int] = {}
@@ -707,7 +727,7 @@ class SegmentationDatasetMaker(_BaseMaker):
         self._has_mean_std: bool = False
     @classmethod
-    def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'SegmentationDatasetMaker':
+    def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'DragonDatasetSegmentation':
         """
         Creates a maker instance by loading all matching image-mask pairs
         from two corresponding directories.
@@ -720,7 +740,7 @@ class SegmentationDatasetMaker(_BaseMaker):
             mask_dir (str | Path): Path to the directory containing segmentation masks.
         Returns:
-            SegmentationDatasetMaker: A new instance with all pairs loaded.
+            DragonDatasetSegmentation: A new instance with all pairs loaded.
         """
         maker = cls()
         img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -780,14 +800,14 @@ class SegmentationDatasetMaker(_BaseMaker):
         Logs a report of the types, sizes, and channels of image files
         found in the directory. Useful for checking masks.
         """
-        VisionDatasetMaker.inspect_folder(path)
+        DragonDatasetVision.inspect_folder(path)
-    def set_class_map(self, class_map: Dict[str, int]) -> 'SegmentationDatasetMaker':
+    def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetSegmentation':
         """
-        Sets a map of pixel_value -> class_name. This is used by the MLTrainer for clear evaluation reports.
+        Sets a map of class_name -> pixel value. This is used by the Trainer for clear evaluation reports.
         Args:
-            class_map (Dict[int, str]): A dictionary mapping the integer pixel
+            class_map (Dict[str, int]): A dictionary mapping the integer pixel
                 value in a mask to its string name.
                 Example: {'background': 0, 'road': 1, 'car': 2}
         """
@@ -803,7 +823,7 @@ class SegmentationDatasetMaker(_BaseMaker):
         return []
     def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   random_state: Optional[int] = 42) -> 'SegmentationDatasetMaker':
+                   random_state: Optional[int] = 42) -> 'DragonDatasetSegmentation':
         """
         Splits the loaded image-mask pairs into train, validation, and test sets.
@@ -813,7 +833,7 @@ class SegmentationDatasetMaker(_BaseMaker):
             random_state (int | None): Seed for reproducible splits.
         Returns:
-            SegmentationDatasetMaker: The same instance, now with datasets created.
+            DragonDatasetSegmentation: The same instance, now with datasets created.
         """
         if self._is_split:
             _LOGGER.warning("Data has already been split.")
@@ -857,7 +877,7 @@ class SegmentationDatasetMaker(_BaseMaker):
         self._train_dataset = _SegmentationDataset(train_imgs, train_masks, transform=None)
         self._val_dataset = _SegmentationDataset(val_imgs, val_masks, transform=None)
-        # Propagate class names to datasets for MLTrainer
+        # Propagate class names to datasets for trainer
         self._train_dataset.classes = self.classes # type: ignore
         self._val_dataset.classes = self.classes # type: ignore
@@ -869,7 +889,7 @@ class SegmentationDatasetMaker(_BaseMaker):
                              resize_size: int = 256,
                              crop_size: int = 224,
                              mean: Optional[List[float]] = [0.485, 0.456, 0.406],
-                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'SegmentationDatasetMaker':
+                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetSegmentation':
         """
         Configures and applies the image and mask transformations.
@@ -884,7 +904,7 @@ class SegmentationDatasetMaker(_BaseMaker):
             std (List[float] | None): The std dev values for image normalization.
         Returns:
-            SegmentationDatasetMaker: The same instance, with transforms applied.
+            DragonDatasetSegmentation: The same instance, with transforms applied.
         """
         if not self._is_split:
             _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
@@ -961,8 +981,8 @@ class SegmentationDatasetMaker(_BaseMaker):
             raise RuntimeError()
         if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
+            return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+        return self._train_dataset, self._val_dataset # type: ignore
     def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
         """
@@ -1006,7 +1026,20 @@ class SegmentationDatasetMaker(_BaseMaker):
             )
         # Save the file
-        save_recipe(recipe, file_path)
+        _save_recipe(recipe, file_path)
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found"
     def __repr__(self) -> str:
         s = f"<{self.__class__.__name__}>:\n"
@@ -1021,7 +1054,7 @@ class SegmentationDatasetMaker(_BaseMaker):
             train_len = len(self._train_dataset) if self._train_dataset else 0
             val_len = len(self._val_dataset) if self._val_dataset else 0
             test_len = len(self._test_dataset) if self._test_dataset else 0
-            s += f"  Datasets (Train/Val/Test): {train_len} / {val_len} / {test_len}\n"
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
         return s
@@ -1051,7 +1084,7 @@ class _ObjectDetectionDataset(Dataset):
         self.annotation_paths = annotation_paths
         self.transform = transform
-        # --- Propagate 'classes' if they exist (for MLTrainer) ---
+        # --- Propagate 'classes' if they exist ---
         self.classes: List[str] = []
     def __len__(self):
@@ -1138,7 +1171,7 @@ class _OD_PairedRandomHorizontalFlip:
         return image, target
-class ObjectDetectionDatasetMaker(_BaseMaker):
+class DragonDatasetObjectDetection:
     """
     Creates processed PyTorch datasets for object detection from image
     and JSON annotation folders.
@@ -1151,7 +1184,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
     so this class provides a `collate_fn` to be used with a DataLoader.
     Workflow:
-    1. `maker = ObjectDetectionDatasetMaker.from_folders(img_dir, ann_dir)`
+    1. `maker = DragonDatasetObjectDetection.from_folders(img_dir, ann_dir)`
     2. `maker.set_class_map({'background': 0, 'person': 1, 'car': 2})`
     3. `maker.split_data(val_size=0.2)`
     4. `maker.configure_transforms()`
@@ -1165,7 +1198,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         """
         Typically not called directly. Use the class method `from_folders()` to create an instance.
         """
-        super().__init__()
+        self._train_dataset = None
+        self._test_dataset = None
+        self._val_dataset = None
         self.image_paths: List[Path] = []
         self.annotation_paths: List[Path] = []
         self.class_map: Dict[str, int] = {}
@@ -1178,7 +1213,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         self._has_mean_std: bool = False
     @classmethod
-    def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'ObjectDetectionDatasetMaker':
+    def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'DragonDatasetObjectDetection':
         """
         Creates a maker instance by loading all matching image-annotation pairs
         from two corresponding directories.
@@ -1195,7 +1230,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
                                          annotation files.
         Returns:
-            ObjectDetectionDatasetMaker: A new instance with all pairs loaded.
+            DragonDatasetObjectDetection: A new instance with all pairs loaded.
         """
         maker = cls()
         img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -1242,12 +1277,12 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         Logs a report of the types, sizes, and channels of image files
         found in the directory.
         """
-        VisionDatasetMaker.inspect_folder(path)
+        DragonDatasetVision.inspect_folder(path)
-    def set_class_map(self, class_map: Dict[str, int]) -> 'ObjectDetectionDatasetMaker':
+    def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetObjectDetection':
         """
         Sets a map of class_name -> pixel_value. This is used by the
-        MLTrainer for clear evaluation reports.
+        trainer for clear evaluation reports.
         **Important:** For object detection models, 'background' MUST
         be included as class 0.
@@ -1272,7 +1307,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
         return []
     def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
-                   random_state: Optional[int] = 42) -> 'ObjectDetectionDatasetMaker':
+                   random_state: Optional[int] = 42) -> 'DragonDatasetObjectDetection':
         """
         Splits the loaded image-annotation pairs into train, validation, and test sets.
@@ -1282,7 +1317,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             random_state (int | None): Seed for reproducible splits.
         Returns:
-            ObjectDetectionDatasetMaker: The same instance, now with datasets created.
+            DragonDatasetObjectDetection: The same instance, now with datasets created.
         """
         if self._is_split:
             _LOGGER.warning("Data has already been split.")
@@ -1336,7 +1371,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
     def configure_transforms(self,
                              mean: Optional[List[float]] = [0.485, 0.456, 0.406],
-                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'ObjectDetectionDatasetMaker':
+                             std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetObjectDetection':
         """
         Configures and applies the image and target transformations.
@@ -1351,7 +1386,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             std (List[float] | None): The std dev values for image normalization.
         Returns:
-            ObjectDetectionDatasetMaker: The same instance, with transforms applied.
+            DragonDatasetObjectDetection: The same instance, with transforms applied.
         """
         if not self._is_split:
             _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
@@ -1420,8 +1455,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             raise RuntimeError()
         if self._test_dataset:
-            return self._train_dataset, self._val_dataset, self._test_dataset
-        return self._train_dataset, self._val_dataset
+            return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
+        return self._train_dataset, self._val_dataset # type: ignore
     @property
     def collate_fn(self) -> Callable:
@@ -1468,8 +1503,21 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             )
         # Save the file
-        save_recipe(recipe, file_path)
+        _save_recipe(recipe, file_path)
+    def images_per_dataset(self) -> str:
+        """
+        Get the number of images per dataset as a string.
+        """
+        if self._is_split:
+            train_len = len(self._train_dataset) if self._train_dataset else 0
+            val_len = len(self._val_dataset) if self._val_dataset else 0
+            test_len = len(self._test_dataset) if self._test_dataset else 0
+            return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
+        else:
+            _LOGGER.warning("No datasets found.")
+            return "No datasets found"
     def __repr__(self) -> str:
         s = f"<{self.__class__.__name__}>:\n"
         s += f"  Total Image-Annotation Pairs: {len(self.image_paths)}\n"
@@ -1483,7 +1531,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
             train_len = len(self._train_dataset) if self._train_dataset else 0
             val_len = len(self._val_dataset) if self._val_dataset else 0
             test_len = len(self._test_dataset) if self._test_dataset else 0
-            s += f"  Datasets (Train/Val/Test): {train_len} / {val_len} / {test_len}\n"
+            s += f"  Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
         return s

ml_tools/ML_vision_evaluation.py CHANGED Viewed

@@ -18,7 +18,10 @@ from torchmetrics.detection import MeanAveragePrecision
 from .path_manager import make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
-from .keys import VisionKeys
+from ._keys import VisionKeys
+from .ML_configuration import (BinarySegmentationMetricsFormat,
+                               MultiClassSegmentationMetricsFormat,
+                               _BaseSegmentationFormat)
 __all__ = [
@@ -26,12 +29,15 @@ __all__ = [
     "object_detection_metrics"
 ]
+DPI_value = 250
 def segmentation_metrics(
     y_true: np.ndarray,
     y_pred: np.ndarray,
     save_dir: Union[str, Path],
-    class_names: Optional[List[str]] = None
+    class_names: Optional[List[str]] = None,
+    config: Optional[Union[BinarySegmentationMetricsFormat, MultiClassSegmentationMetricsFormat]] = None
 ):
     """
     Calculates and saves pixel-level metrics for segmentation tasks.
@@ -48,9 +54,20 @@ def segmentation_metrics(
         y_pred (np.ndarray): Predicted masks (e.g., shape [N, H, W]).
         save_dir (str | Path): Directory to save the metrics report and plots.
         class_names (List[str] | None): Names of the classes for the report.
+        config (object): Formatting configuration object.
     """
     save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
+    # --- Parse Config or use defaults ---
+    if config is None:
+        format_config = _BaseSegmentationFormat()
+    else:
+        format_config = config
+    # --- Set Matplotlib font size ---
+    original_rc_params = plt.rcParams.copy()
+    plt.rcParams.update({'font.size': format_config.font_size})
     # Get all unique class labels present in either true or pred
     labels = np.unique(np.concatenate((np.unique(y_true), np.unique(y_pred)))).astype(int)
@@ -110,7 +127,7 @@ def segmentation_metrics(
     report_lines.append(per_class_df.to_string(index=False, float_format="%.4f"))
     report_string = "\n".join(report_lines)
-    print(report_string)
+    # print(report_string) # <-- I removed the print(report_string)
     # Save text report
     save_filename = VisionKeys.SEGMENTATION_REPORT + ".txt"
@@ -120,11 +137,11 @@ def segmentation_metrics(
     # --- 3. Save Per-Class Metrics Heatmap ---
     try:
-        plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=100)
+        plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=DPI_value)
         sns.heatmap(
             per_class_df.set_index('Class').T,
             annot=True,
-            cmap='viridis',
+            cmap=format_config.heatmap_cmap, # Use config cmap
             fmt='.3f',
             linewidths=0.5
         )
@@ -149,7 +166,11 @@ def segmentation_metrics(
             confusion_matrix=cm,
             display_labels=display_names
         )
-        disp.plot(cmap='Blues', ax=ax_cm, xticks_rotation=45)
+        disp.plot(cmap=format_config.cm_cmap, ax=ax_cm, xticks_rotation=45) # Use config cmap
+        # Manually update font size of cell texts
+        for text in disp.text_.flatten(): # type: ignore
+            text.set_fontsize(format_config.font_size)
         ax_cm.set_title("Pixel-Level Confusion Matrix")
         plt.tight_layout()
@@ -160,6 +181,9 @@ def segmentation_metrics(
         plt.close(fig_cm)
     except Exception as e:
         _LOGGER.error(f"Could not generate confusion matrix: {e}")
+    # --- Restore RC params ---
+    plt.rcParams.update(original_rc_params)
 def object_detection_metrics(

dragon-ml-toolbox 14.7.0__py3-none-any.whl → 16.2.1__py3-none-any.whl

dragon-ml-toolbox 14.7.0py3-none-any.whl → 16.2.1py3-none-any.whl