dragon-ml-toolbox 14.7.0__py3-none-any.whl → 16.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/METADATA +9 -5
  2. dragon_ml_toolbox-16.2.1.dist-info/RECORD +51 -0
  3. ml_tools/ETL_cleaning.py +20 -20
  4. ml_tools/ETL_engineering.py +23 -25
  5. ml_tools/GUI_tools.py +20 -20
  6. ml_tools/MICE_imputation.py +3 -3
  7. ml_tools/ML_callbacks.py +43 -26
  8. ml_tools/ML_configuration.py +726 -32
  9. ml_tools/ML_datasetmaster.py +235 -280
  10. ml_tools/ML_evaluation.py +160 -42
  11. ml_tools/ML_evaluation_multi.py +103 -35
  12. ml_tools/ML_inference.py +290 -208
  13. ml_tools/ML_models.py +13 -102
  14. ml_tools/ML_models_advanced.py +1 -1
  15. ml_tools/ML_optimization.py +12 -12
  16. ml_tools/ML_scaler.py +11 -11
  17. ml_tools/ML_sequence_datasetmaster.py +341 -0
  18. ml_tools/ML_sequence_evaluation.py +219 -0
  19. ml_tools/ML_sequence_inference.py +391 -0
  20. ml_tools/ML_sequence_models.py +139 -0
  21. ml_tools/ML_trainer.py +1342 -386
  22. ml_tools/ML_utilities.py +1 -1
  23. ml_tools/ML_vision_datasetmaster.py +120 -72
  24. ml_tools/ML_vision_evaluation.py +30 -6
  25. ml_tools/ML_vision_inference.py +129 -152
  26. ml_tools/ML_vision_models.py +1 -1
  27. ml_tools/ML_vision_transformers.py +121 -40
  28. ml_tools/PSO_optimization.py +6 -6
  29. ml_tools/SQL.py +4 -4
  30. ml_tools/{keys.py → _keys.py} +45 -0
  31. ml_tools/_schema.py +1 -1
  32. ml_tools/ensemble_evaluation.py +1 -1
  33. ml_tools/ensemble_inference.py +7 -33
  34. ml_tools/ensemble_learning.py +1 -1
  35. ml_tools/optimization_tools.py +2 -2
  36. ml_tools/path_manager.py +5 -5
  37. ml_tools/utilities.py +1 -2
  38. dragon_ml_toolbox-14.7.0.dist-info/RECORD +0 -49
  39. ml_tools/RNN_forecast.py +0 -56
  40. ml_tools/_ML_vision_recipe.py +0 -88
  41. {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/WHEEL +0 -0
  42. {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/licenses/LICENSE +0 -0
  43. {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  44. {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.1.dist-info}/top_level.txt +0 -0
ml_tools/ML_utilities.py CHANGED
@@ -7,7 +7,7 @@ from torch import nn
7
7
  from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
8
8
  from ._script_info import _script_info
9
9
  from ._logger import _LOGGER
10
- from .keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
10
+ from ._keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
11
11
  from .utilities import load_dataframe
12
12
  from .custom_logger import save_list_strings, custom_logger
13
13
  from .serde import serialize_object_filename
@@ -12,25 +12,23 @@ import random
12
12
  import json
13
13
  import inspect
14
14
 
15
- from .ML_datasetmaster import _BaseMaker
16
15
  from .path_manager import make_fullpath
17
16
  from ._logger import _LOGGER
18
17
  from ._script_info import _script_info
19
- from .keys import VisionTransformRecipeKeys, ObjectDetectionKeys
20
- from ._ML_vision_recipe import save_recipe
21
- from .ML_vision_transformers import TRANSFORM_REGISTRY
18
+ from ._keys import VisionTransformRecipeKeys, ObjectDetectionKeys
19
+ from .ML_vision_transformers import TRANSFORM_REGISTRY, _save_recipe
22
20
  from .custom_logger import custom_logger
23
21
 
24
22
 
25
23
  __all__ = [
26
- "VisionDatasetMaker",
27
- "SegmentationDatasetMaker",
28
- "ObjectDetectionDatasetMaker"
24
+ "DragonDatasetVision",
25
+ "DragonDatasetSegmentation",
26
+ "DragonDatasetObjectDetection"
29
27
  ]
30
28
 
31
29
 
32
- # --- VisionDatasetMaker ---
33
- class VisionDatasetMaker(_BaseMaker):
30
+ # --- Vision Maker ---
31
+ class DragonDatasetVision:
34
32
  """
35
33
  Creates processed PyTorch datasets for computer vision tasks from an
36
34
  image folder directory.
@@ -45,10 +43,12 @@ class VisionDatasetMaker(_BaseMaker):
45
43
  """
46
44
  Typically not called directly. Use the class methods `from_folder()` or `from_folders()` to create an instance.
47
45
  """
48
- super().__init__()
46
+ self._train_dataset = None
47
+ self._test_dataset = None
48
+ self._val_dataset = None
49
49
  self._full_dataset: Optional[ImageFolder] = None
50
50
  self.labels: Optional[List[int]] = None
51
- self.class_map: Optional[dict[str,int]] = None
51
+ self.class_map: dict[str,int] = dict()
52
52
 
53
53
  self._is_split = False
54
54
  self._are_transforms_configured = False
@@ -56,7 +56,7 @@ class VisionDatasetMaker(_BaseMaker):
56
56
  self._has_mean_std: bool = False
57
57
 
58
58
  @classmethod
59
- def from_folder(cls, root_dir: Union[str,Path]) -> 'VisionDatasetMaker':
59
+ def from_folder(cls, root_dir: Union[str,Path]) -> 'DragonDatasetVision':
60
60
  """
61
61
  Creates a maker instance from a single root directory of images.
62
62
 
@@ -70,7 +70,7 @@ class VisionDatasetMaker(_BaseMaker):
70
70
  root_dir (str | Path): The path to the root directory containing class subfolders.
71
71
 
72
72
  Returns:
73
- VisionDatasetMaker: A new instance with the full dataset loaded.
73
+ Instance: A new instance with the full dataset loaded.
74
74
  """
75
75
  root_path = make_fullpath(root_dir, enforce="directory")
76
76
  # Load with NO transform. We get PIL Images.
@@ -87,7 +87,7 @@ class VisionDatasetMaker(_BaseMaker):
87
87
  def from_folders(cls,
88
88
  train_dir: Union[str,Path],
89
89
  val_dir: Union[str,Path],
90
- test_dir: Optional[Union[str,Path]] = None) -> 'VisionDatasetMaker':
90
+ test_dir: Optional[Union[str,Path]] = None) -> 'DragonDatasetVision':
91
91
  """
92
92
  Creates a maker instance from separate, pre-split directories.
93
93
 
@@ -101,7 +101,7 @@ class VisionDatasetMaker(_BaseMaker):
101
101
  test_dir (str | Path | None): Path to the test data directory.
102
102
 
103
103
  Returns:
104
- VisionDatasetMaker: A new, pre-split instance.
104
+ Instance: A new, pre-split instance.
105
105
 
106
106
  Raises:
107
107
  ValueError: If the classes found in train, val, or test directories are inconsistent.
@@ -186,7 +186,7 @@ class VisionDatasetMaker(_BaseMaker):
186
186
  print(report)
187
187
 
188
188
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
189
- stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
189
+ stratify: bool = True, random_state: Optional[int] = None) -> 'DragonDatasetVision':
190
190
  """
191
191
  Splits the dataset into train, validation, and optional test sets.
192
192
 
@@ -202,7 +202,7 @@ class VisionDatasetMaker(_BaseMaker):
202
202
  random_state (int | None): Seed for the random number generator for reproducible splits.
203
203
 
204
204
  Returns:
205
- VisionDatasetMaker: The same instance, now with datasets split.
205
+ Self: The same instance, now with datasets split.
206
206
 
207
207
  Raises:
208
208
  ValueError: If `val_size` and `test_size` sum to 1.0 or more.
@@ -249,22 +249,23 @@ class VisionDatasetMaker(_BaseMaker):
249
249
  _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
250
250
  return self
251
251
 
252
- def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
252
+ def configure_transforms(self,
253
+ resize_size: int = 256,
254
+ crop_size: int = 224,
253
255
  mean: Optional[List[float]] = [0.485, 0.456, 0.406],
254
256
  std: Optional[List[float]] = [0.229, 0.224, 0.225],
255
257
  pre_transforms: Optional[List[Callable]] = None,
256
- extra_train_transforms: Optional[List[Callable]] = None) -> 'VisionDatasetMaker':
258
+ extra_train_transforms: Optional[List[Callable]] = None) -> 'DragonDatasetVision':
257
259
  """
258
260
  Configures and applies the image transformations and augmentations.
259
261
 
260
262
  This method must be called AFTER data is loaded and split.
261
263
 
262
264
  It sets up two pipelines:
263
- 1. **Training Pipeline:** Includes random augmentations like
264
- `RandomResizedCrop` and `RandomHorizontalFlip` (plus any
265
+ 1. **Training Pipeline:** Includes random augmentations:
266
+ `RandomResizedCrop(crop_size)`, `RandomHorizontalFlip(0.5)`, and `RandomRotation(90)` (plus any
265
267
  `extra_train_transforms`) for online augmentation.
266
- 2. **Validation/Test Pipeline:** A deterministic pipeline using
267
- `Resize` and `CenterCrop` for consistent evaluation.
268
+ 2. **Validation/Test Pipeline:** A deterministic pipeline using `Resize` and `CenterCrop` for consistent evaluation.
268
269
 
269
270
  Both pipelines finish with `ToTensor` and `Normalize`.
270
271
 
@@ -279,7 +280,7 @@ class VisionDatasetMaker(_BaseMaker):
279
280
  pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
280
281
 
281
282
  Returns:
282
- VisionDatasetMaker: The same instance, with transforms applied.
283
+ Self: The same instance, with transforms applied.
283
284
 
284
285
  Raises:
285
286
  RuntimeError: If called before data is split.
@@ -315,8 +316,9 @@ class VisionDatasetMaker(_BaseMaker):
315
316
 
316
317
  # Base augmentations for training
317
318
  base_train_transforms = [
318
- transforms.RandomResizedCrop(crop_size),
319
- transforms.RandomHorizontalFlip()
319
+ transforms.RandomResizedCrop(size=crop_size),
320
+ transforms.RandomHorizontalFlip(p=0.5),
321
+ transforms.RandomRotation(degrees=90)
320
322
  ]
321
323
  if extra_train_transforms:
322
324
  base_train_transforms.extend(extra_train_transforms)
@@ -350,10 +352,10 @@ class VisionDatasetMaker(_BaseMaker):
350
352
  # --- Apply Transforms using the Wrapper ---
351
353
  # This correctly assigns the transform regardless of whether the dataset is a Subset (from_folder) or an ImageFolder (from_folders).
352
354
 
353
- self._train_dataset = _DatasetTransformer(self._train_dataset, train_transform) # type: ignore
354
- self._val_dataset = _DatasetTransformer(self._val_dataset, val_transform) # type: ignore
355
+ self._train_dataset = _DatasetTransformer(self._train_dataset, train_transform, self.class_map) # type: ignore
356
+ self._val_dataset = _DatasetTransformer(self._val_dataset, val_transform, self.class_map) # type: ignore
355
357
  if self._test_dataset:
356
- self._test_dataset = _DatasetTransformer(self._test_dataset, val_transform) # type: ignore
358
+ self._test_dataset = _DatasetTransformer(self._test_dataset, val_transform, self.class_map) # type: ignore
357
359
 
358
360
  self._are_transforms_configured = True
359
361
  _LOGGER.info("Image transforms configured and applied.")
@@ -381,8 +383,8 @@ class VisionDatasetMaker(_BaseMaker):
381
383
  _LOGGER.warning("Transforms have not been configured.")
382
384
 
383
385
  if self._test_dataset:
384
- return self._train_dataset, self._val_dataset, self._test_dataset
385
- return self._train_dataset, self._val_dataset
386
+ return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
387
+ return self._train_dataset, self._val_dataset # type: ignore
386
388
 
387
389
  def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
388
390
  """
@@ -481,7 +483,7 @@ class VisionDatasetMaker(_BaseMaker):
481
483
  )
482
484
 
483
485
  # 3. Save the file
484
- save_recipe(recipe, file_path)
486
+ _save_recipe(recipe, file_path)
485
487
 
486
488
  def save_class_map(self, save_dir: Union[str,Path]) -> dict[str,int]:
487
489
  """
@@ -499,6 +501,21 @@ class VisionDatasetMaker(_BaseMaker):
499
501
 
500
502
  return self.class_map
501
503
 
504
+ def images_per_dataset(self) -> str:
505
+ """
506
+ Get the number of images per dataset as a string.
507
+ """
508
+ if self._is_split:
509
+ train_len = len(self._train_dataset) if self._train_dataset else 0
510
+ val_len = len(self._val_dataset) if self._val_dataset else 0
511
+ test_len = len(self._test_dataset) if self._test_dataset else 0
512
+ return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
513
+ elif self._full_dataset:
514
+ return f"Full Dataset: {len(self._full_dataset)} images"
515
+ else:
516
+ _LOGGER.warning("No datasets found.")
517
+ return "No datasets found"
518
+
502
519
  def __repr__(self) -> str:
503
520
  s = f"<{self.__class__.__name__}>:\n"
504
521
  s += f" Split: {self._is_split}\n"
@@ -511,7 +528,7 @@ class VisionDatasetMaker(_BaseMaker):
511
528
  train_len = len(self._train_dataset) if self._train_dataset else 0
512
529
  val_len = len(self._val_dataset) if self._val_dataset else 0
513
530
  test_len = len(self._test_dataset) if self._test_dataset else 0
514
- s += f" Datasets (Train/Val/Test): {train_len} / {val_len} / {test_len}\n"
531
+ s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
515
532
  elif self._full_dataset:
516
533
  s += f" Full Dataset Size: {len(self._full_dataset)} images\n"
517
534
 
@@ -523,9 +540,10 @@ class _DatasetTransformer(Dataset):
523
540
  Internal wrapper class to apply a specific transform pipeline to any
524
541
  dataset (e.g., a full ImageFolder or a Subset).
525
542
  """
526
- def __init__(self, dataset: Dataset, transform: Optional[transforms.Compose] = None):
543
+ def __init__(self, dataset: Dataset, transform: Optional[transforms.Compose] = None, class_map: dict[str,int]=dict()):
527
544
  self.dataset = dataset
528
545
  self.transform = transform
546
+ self.class_map = class_map
529
547
 
530
548
  # --- Propagate attributes for inspection ---
531
549
  # For ImageFolder
@@ -565,7 +583,7 @@ class _SegmentationDataset(Dataset):
565
583
  self.mask_paths = mask_paths
566
584
  self.transform = transform
567
585
 
568
- # --- Propagate 'classes' if they exist (for MLTrainer) ---
586
+ # --- Propagate 'classes' if they exist for trainer ---
569
587
  self.classes: List[str] = []
570
588
 
571
589
  def __len__(self):
@@ -673,8 +691,8 @@ class _PairedRandomResizedCrop:
673
691
 
674
692
  return cropped_image, cropped_mask # type: ignore
675
693
 
676
- # --- SegmentationDatasetMaker ---
677
- class SegmentationDatasetMaker(_BaseMaker):
694
+ # --- Segmentation Dataset ---
695
+ class DragonDatasetSegmentation:
678
696
  """
679
697
  Creates processed PyTorch datasets for segmentation from image and mask folders.
680
698
 
@@ -683,7 +701,7 @@ class SegmentationDatasetMaker(_BaseMaker):
683
701
  to both the image and its corresponding mask.
684
702
 
685
703
  Workflow:
686
- 1. `maker = SegmentationDatasetMaker.from_folders(img_dir, mask_dir)`
704
+ 1. `maker = DragonDatasetSegmentation.from_folders(img_dir, mask_dir)`
687
705
  2. `maker.set_class_map({'background': 0, 'road': 1})`
688
706
  3. `maker.split_data(val_size=0.2)`
689
707
  4. `maker.configure_transforms(crop_size=256)`
@@ -695,7 +713,9 @@ class SegmentationDatasetMaker(_BaseMaker):
695
713
  """
696
714
  Typically not called directly. Use the class method `from_folders()` to create an instance.
697
715
  """
698
- super().__init__()
716
+ self._train_dataset = None
717
+ self._test_dataset = None
718
+ self._val_dataset = None
699
719
  self.image_paths: List[Path] = []
700
720
  self.mask_paths: List[Path] = []
701
721
  self.class_map: Dict[str, int] = {}
@@ -707,7 +727,7 @@ class SegmentationDatasetMaker(_BaseMaker):
707
727
  self._has_mean_std: bool = False
708
728
 
709
729
  @classmethod
710
- def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'SegmentationDatasetMaker':
730
+ def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'DragonDatasetSegmentation':
711
731
  """
712
732
  Creates a maker instance by loading all matching image-mask pairs
713
733
  from two corresponding directories.
@@ -720,7 +740,7 @@ class SegmentationDatasetMaker(_BaseMaker):
720
740
  mask_dir (str | Path): Path to the directory containing segmentation masks.
721
741
 
722
742
  Returns:
723
- SegmentationDatasetMaker: A new instance with all pairs loaded.
743
+ DragonDatasetSegmentation: A new instance with all pairs loaded.
724
744
  """
725
745
  maker = cls()
726
746
  img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -780,14 +800,14 @@ class SegmentationDatasetMaker(_BaseMaker):
780
800
  Logs a report of the types, sizes, and channels of image files
781
801
  found in the directory. Useful for checking masks.
782
802
  """
783
- VisionDatasetMaker.inspect_folder(path)
803
+ DragonDatasetVision.inspect_folder(path)
784
804
 
785
- def set_class_map(self, class_map: Dict[str, int]) -> 'SegmentationDatasetMaker':
805
+ def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetSegmentation':
786
806
  """
787
- Sets a map of pixel_value -> class_name. This is used by the MLTrainer for clear evaluation reports.
807
+ Sets a map of class_name -> pixel value. This is used by the Trainer for clear evaluation reports.
788
808
 
789
809
  Args:
790
- class_map (Dict[int, str]): A dictionary mapping the integer pixel
810
+ class_map (Dict[str, int]): A dictionary mapping the integer pixel
791
811
  value in a mask to its string name.
792
812
  Example: {'background': 0, 'road': 1, 'car': 2}
793
813
  """
@@ -803,7 +823,7 @@ class SegmentationDatasetMaker(_BaseMaker):
803
823
  return []
804
824
 
805
825
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
806
- random_state: Optional[int] = 42) -> 'SegmentationDatasetMaker':
826
+ random_state: Optional[int] = 42) -> 'DragonDatasetSegmentation':
807
827
  """
808
828
  Splits the loaded image-mask pairs into train, validation, and test sets.
809
829
 
@@ -813,7 +833,7 @@ class SegmentationDatasetMaker(_BaseMaker):
813
833
  random_state (int | None): Seed for reproducible splits.
814
834
 
815
835
  Returns:
816
- SegmentationDatasetMaker: The same instance, now with datasets created.
836
+ DragonDatasetSegmentation: The same instance, now with datasets created.
817
837
  """
818
838
  if self._is_split:
819
839
  _LOGGER.warning("Data has already been split.")
@@ -857,7 +877,7 @@ class SegmentationDatasetMaker(_BaseMaker):
857
877
  self._train_dataset = _SegmentationDataset(train_imgs, train_masks, transform=None)
858
878
  self._val_dataset = _SegmentationDataset(val_imgs, val_masks, transform=None)
859
879
 
860
- # Propagate class names to datasets for MLTrainer
880
+ # Propagate class names to datasets for trainer
861
881
  self._train_dataset.classes = self.classes # type: ignore
862
882
  self._val_dataset.classes = self.classes # type: ignore
863
883
 
@@ -869,7 +889,7 @@ class SegmentationDatasetMaker(_BaseMaker):
869
889
  resize_size: int = 256,
870
890
  crop_size: int = 224,
871
891
  mean: Optional[List[float]] = [0.485, 0.456, 0.406],
872
- std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'SegmentationDatasetMaker':
892
+ std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetSegmentation':
873
893
  """
874
894
  Configures and applies the image and mask transformations.
875
895
 
@@ -884,7 +904,7 @@ class SegmentationDatasetMaker(_BaseMaker):
884
904
  std (List[float] | None): The std dev values for image normalization.
885
905
 
886
906
  Returns:
887
- SegmentationDatasetMaker: The same instance, with transforms applied.
907
+ DragonDatasetSegmentation: The same instance, with transforms applied.
888
908
  """
889
909
  if not self._is_split:
890
910
  _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
@@ -961,8 +981,8 @@ class SegmentationDatasetMaker(_BaseMaker):
961
981
  raise RuntimeError()
962
982
 
963
983
  if self._test_dataset:
964
- return self._train_dataset, self._val_dataset, self._test_dataset
965
- return self._train_dataset, self._val_dataset
984
+ return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
985
+ return self._train_dataset, self._val_dataset # type: ignore
966
986
 
967
987
  def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
968
988
  """
@@ -1006,7 +1026,20 @@ class SegmentationDatasetMaker(_BaseMaker):
1006
1026
  )
1007
1027
 
1008
1028
  # Save the file
1009
- save_recipe(recipe, file_path)
1029
+ _save_recipe(recipe, file_path)
1030
+
1031
+ def images_per_dataset(self) -> str:
1032
+ """
1033
+ Get the number of images per dataset as a string.
1034
+ """
1035
+ if self._is_split:
1036
+ train_len = len(self._train_dataset) if self._train_dataset else 0
1037
+ val_len = len(self._val_dataset) if self._val_dataset else 0
1038
+ test_len = len(self._test_dataset) if self._test_dataset else 0
1039
+ return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
1040
+ else:
1041
+ _LOGGER.warning("No datasets found.")
1042
+ return "No datasets found"
1010
1043
 
1011
1044
  def __repr__(self) -> str:
1012
1045
  s = f"<{self.__class__.__name__}>:\n"
@@ -1021,7 +1054,7 @@ class SegmentationDatasetMaker(_BaseMaker):
1021
1054
  train_len = len(self._train_dataset) if self._train_dataset else 0
1022
1055
  val_len = len(self._val_dataset) if self._val_dataset else 0
1023
1056
  test_len = len(self._test_dataset) if self._test_dataset else 0
1024
- s += f" Datasets (Train/Val/Test): {train_len} / {val_len} / {test_len}\n"
1057
+ s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
1025
1058
 
1026
1059
  return s
1027
1060
 
@@ -1051,7 +1084,7 @@ class _ObjectDetectionDataset(Dataset):
1051
1084
  self.annotation_paths = annotation_paths
1052
1085
  self.transform = transform
1053
1086
 
1054
- # --- Propagate 'classes' if they exist (for MLTrainer) ---
1087
+ # --- Propagate 'classes' if they exist ---
1055
1088
  self.classes: List[str] = []
1056
1089
 
1057
1090
  def __len__(self):
@@ -1138,7 +1171,7 @@ class _OD_PairedRandomHorizontalFlip:
1138
1171
  return image, target
1139
1172
 
1140
1173
 
1141
- class ObjectDetectionDatasetMaker(_BaseMaker):
1174
+ class DragonDatasetObjectDetection:
1142
1175
  """
1143
1176
  Creates processed PyTorch datasets for object detection from image
1144
1177
  and JSON annotation folders.
@@ -1151,7 +1184,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1151
1184
  so this class provides a `collate_fn` to be used with a DataLoader.
1152
1185
 
1153
1186
  Workflow:
1154
- 1. `maker = ObjectDetectionDatasetMaker.from_folders(img_dir, ann_dir)`
1187
+ 1. `maker = DragonDatasetObjectDetection.from_folders(img_dir, ann_dir)`
1155
1188
  2. `maker.set_class_map({'background': 0, 'person': 1, 'car': 2})`
1156
1189
  3. `maker.split_data(val_size=0.2)`
1157
1190
  4. `maker.configure_transforms()`
@@ -1165,7 +1198,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1165
1198
  """
1166
1199
  Typically not called directly. Use the class method `from_folders()` to create an instance.
1167
1200
  """
1168
- super().__init__()
1201
+ self._train_dataset = None
1202
+ self._test_dataset = None
1203
+ self._val_dataset = None
1169
1204
  self.image_paths: List[Path] = []
1170
1205
  self.annotation_paths: List[Path] = []
1171
1206
  self.class_map: Dict[str, int] = {}
@@ -1178,7 +1213,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1178
1213
  self._has_mean_std: bool = False
1179
1214
 
1180
1215
  @classmethod
1181
- def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'ObjectDetectionDatasetMaker':
1216
+ def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'DragonDatasetObjectDetection':
1182
1217
  """
1183
1218
  Creates a maker instance by loading all matching image-annotation pairs
1184
1219
  from two corresponding directories.
@@ -1195,7 +1230,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1195
1230
  annotation files.
1196
1231
 
1197
1232
  Returns:
1198
- ObjectDetectionDatasetMaker: A new instance with all pairs loaded.
1233
+ DragonDatasetObjectDetection: A new instance with all pairs loaded.
1199
1234
  """
1200
1235
  maker = cls()
1201
1236
  img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -1242,12 +1277,12 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1242
1277
  Logs a report of the types, sizes, and channels of image files
1243
1278
  found in the directory.
1244
1279
  """
1245
- VisionDatasetMaker.inspect_folder(path)
1280
+ DragonDatasetVision.inspect_folder(path)
1246
1281
 
1247
- def set_class_map(self, class_map: Dict[str, int]) -> 'ObjectDetectionDatasetMaker':
1282
+ def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetObjectDetection':
1248
1283
  """
1249
1284
  Sets a map of class_name -> pixel_value. This is used by the
1250
- MLTrainer for clear evaluation reports.
1285
+ trainer for clear evaluation reports.
1251
1286
 
1252
1287
  **Important:** For object detection models, 'background' MUST
1253
1288
  be included as class 0.
@@ -1272,7 +1307,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1272
1307
  return []
1273
1308
 
1274
1309
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
1275
- random_state: Optional[int] = 42) -> 'ObjectDetectionDatasetMaker':
1310
+ random_state: Optional[int] = 42) -> 'DragonDatasetObjectDetection':
1276
1311
  """
1277
1312
  Splits the loaded image-annotation pairs into train, validation, and test sets.
1278
1313
 
@@ -1282,7 +1317,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1282
1317
  random_state (int | None): Seed for reproducible splits.
1283
1318
 
1284
1319
  Returns:
1285
- ObjectDetectionDatasetMaker: The same instance, now with datasets created.
1320
+ DragonDatasetObjectDetection: The same instance, now with datasets created.
1286
1321
  """
1287
1322
  if self._is_split:
1288
1323
  _LOGGER.warning("Data has already been split.")
@@ -1336,7 +1371,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1336
1371
 
1337
1372
  def configure_transforms(self,
1338
1373
  mean: Optional[List[float]] = [0.485, 0.456, 0.406],
1339
- std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'ObjectDetectionDatasetMaker':
1374
+ std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetObjectDetection':
1340
1375
  """
1341
1376
  Configures and applies the image and target transformations.
1342
1377
 
@@ -1351,7 +1386,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1351
1386
  std (List[float] | None): The std dev values for image normalization.
1352
1387
 
1353
1388
  Returns:
1354
- ObjectDetectionDatasetMaker: The same instance, with transforms applied.
1389
+ DragonDatasetObjectDetection: The same instance, with transforms applied.
1355
1390
  """
1356
1391
  if not self._is_split:
1357
1392
  _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
@@ -1420,8 +1455,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1420
1455
  raise RuntimeError()
1421
1456
 
1422
1457
  if self._test_dataset:
1423
- return self._train_dataset, self._val_dataset, self._test_dataset
1424
- return self._train_dataset, self._val_dataset
1458
+ return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
1459
+ return self._train_dataset, self._val_dataset # type: ignore
1425
1460
 
1426
1461
  @property
1427
1462
  def collate_fn(self) -> Callable:
@@ -1468,8 +1503,21 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1468
1503
  )
1469
1504
 
1470
1505
  # Save the file
1471
- save_recipe(recipe, file_path)
1472
-
1506
+ _save_recipe(recipe, file_path)
1507
+
1508
+ def images_per_dataset(self) -> str:
1509
+ """
1510
+ Get the number of images per dataset as a string.
1511
+ """
1512
+ if self._is_split:
1513
+ train_len = len(self._train_dataset) if self._train_dataset else 0
1514
+ val_len = len(self._val_dataset) if self._val_dataset else 0
1515
+ test_len = len(self._test_dataset) if self._test_dataset else 0
1516
+ return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
1517
+ else:
1518
+ _LOGGER.warning("No datasets found.")
1519
+ return "No datasets found"
1520
+
1473
1521
  def __repr__(self) -> str:
1474
1522
  s = f"<{self.__class__.__name__}>:\n"
1475
1523
  s += f" Total Image-Annotation Pairs: {len(self.image_paths)}\n"
@@ -1483,7 +1531,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1483
1531
  train_len = len(self._train_dataset) if self._train_dataset else 0
1484
1532
  val_len = len(self._val_dataset) if self._val_dataset else 0
1485
1533
  test_len = len(self._test_dataset) if self._test_dataset else 0
1486
- s += f" Datasets (Train/Val/Test): {train_len} / {val_len} / {test_len}\n"
1534
+ s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
1487
1535
 
1488
1536
  return s
1489
1537
 
@@ -18,7 +18,10 @@ from torchmetrics.detection import MeanAveragePrecision
18
18
  from .path_manager import make_fullpath
19
19
  from ._logger import _LOGGER
20
20
  from ._script_info import _script_info
21
- from .keys import VisionKeys
21
+ from ._keys import VisionKeys
22
+ from .ML_configuration import (BinarySegmentationMetricsFormat,
23
+ MultiClassSegmentationMetricsFormat,
24
+ _BaseSegmentationFormat)
22
25
 
23
26
 
24
27
  __all__ = [
@@ -26,12 +29,15 @@ __all__ = [
26
29
  "object_detection_metrics"
27
30
  ]
28
31
 
32
+ DPI_value = 250
33
+
29
34
 
30
35
  def segmentation_metrics(
31
36
  y_true: np.ndarray,
32
37
  y_pred: np.ndarray,
33
38
  save_dir: Union[str, Path],
34
- class_names: Optional[List[str]] = None
39
+ class_names: Optional[List[str]] = None,
40
+ config: Optional[Union[BinarySegmentationMetricsFormat, MultiClassSegmentationMetricsFormat]] = None
35
41
  ):
36
42
  """
37
43
  Calculates and saves pixel-level metrics for segmentation tasks.
@@ -48,9 +54,20 @@ def segmentation_metrics(
48
54
  y_pred (np.ndarray): Predicted masks (e.g., shape [N, H, W]).
49
55
  save_dir (str | Path): Directory to save the metrics report and plots.
50
56
  class_names (List[str] | None): Names of the classes for the report.
57
+ config (object): Formatting configuration object.
51
58
  """
52
59
  save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
53
60
 
61
+ # --- Parse Config or use defaults ---
62
+ if config is None:
63
+ format_config = _BaseSegmentationFormat()
64
+ else:
65
+ format_config = config
66
+
67
+ # --- Set Matplotlib font size ---
68
+ original_rc_params = plt.rcParams.copy()
69
+ plt.rcParams.update({'font.size': format_config.font_size})
70
+
54
71
  # Get all unique class labels present in either true or pred
55
72
  labels = np.unique(np.concatenate((np.unique(y_true), np.unique(y_pred)))).astype(int)
56
73
 
@@ -110,7 +127,7 @@ def segmentation_metrics(
110
127
  report_lines.append(per_class_df.to_string(index=False, float_format="%.4f"))
111
128
 
112
129
  report_string = "\n".join(report_lines)
113
- print(report_string)
130
+ # print(report_string) # <-- I removed the print(report_string)
114
131
 
115
132
  # Save text report
116
133
  save_filename = VisionKeys.SEGMENTATION_REPORT + ".txt"
@@ -120,11 +137,11 @@ def segmentation_metrics(
120
137
 
121
138
  # --- 3. Save Per-Class Metrics Heatmap ---
122
139
  try:
123
- plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=100)
140
+ plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=DPI_value)
124
141
  sns.heatmap(
125
142
  per_class_df.set_index('Class').T,
126
143
  annot=True,
127
- cmap='viridis',
144
+ cmap=format_config.heatmap_cmap, # Use config cmap
128
145
  fmt='.3f',
129
146
  linewidths=0.5
130
147
  )
@@ -149,7 +166,11 @@ def segmentation_metrics(
149
166
  confusion_matrix=cm,
150
167
  display_labels=display_names
151
168
  )
152
- disp.plot(cmap='Blues', ax=ax_cm, xticks_rotation=45)
169
+ disp.plot(cmap=format_config.cm_cmap, ax=ax_cm, xticks_rotation=45) # Use config cmap
170
+
171
+ # Manually update font size of cell texts
172
+ for text in disp.text_.flatten(): # type: ignore
173
+ text.set_fontsize(format_config.font_size)
153
174
 
154
175
  ax_cm.set_title("Pixel-Level Confusion Matrix")
155
176
  plt.tight_layout()
@@ -160,6 +181,9 @@ def segmentation_metrics(
160
181
  plt.close(fig_cm)
161
182
  except Exception as e:
162
183
  _LOGGER.error(f"Could not generate confusion matrix: {e}")
184
+
185
+ # --- Restore RC params ---
186
+ plt.rcParams.update(original_rc_params)
163
187
 
164
188
 
165
189
  def object_detection_metrics(