dragon-ml-toolbox 14.3.1__py3-none-any.whl → 16.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (44) hide show
  1. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/METADATA +10 -5
  2. dragon_ml_toolbox-16.0.0.dist-info/RECORD +51 -0
  3. ml_tools/ETL_cleaning.py +20 -20
  4. ml_tools/ETL_engineering.py +23 -25
  5. ml_tools/GUI_tools.py +20 -20
  6. ml_tools/MICE_imputation.py +3 -3
  7. ml_tools/ML_callbacks.py +43 -26
  8. ml_tools/ML_configuration.py +309 -0
  9. ml_tools/ML_datasetmaster.py +220 -260
  10. ml_tools/ML_evaluation.py +317 -81
  11. ml_tools/ML_evaluation_multi.py +127 -36
  12. ml_tools/ML_inference.py +249 -207
  13. ml_tools/ML_models.py +13 -102
  14. ml_tools/ML_models_advanced.py +1 -1
  15. ml_tools/ML_optimization.py +12 -12
  16. ml_tools/ML_scaler.py +11 -11
  17. ml_tools/ML_sequence_datasetmaster.py +341 -0
  18. ml_tools/ML_sequence_evaluation.py +215 -0
  19. ml_tools/ML_sequence_inference.py +391 -0
  20. ml_tools/ML_sequence_models.py +139 -0
  21. ml_tools/ML_trainer.py +1247 -338
  22. ml_tools/ML_utilities.py +51 -2
  23. ml_tools/ML_vision_datasetmaster.py +262 -118
  24. ml_tools/ML_vision_evaluation.py +26 -6
  25. ml_tools/ML_vision_inference.py +117 -140
  26. ml_tools/ML_vision_models.py +15 -1
  27. ml_tools/ML_vision_transformers.py +233 -7
  28. ml_tools/PSO_optimization.py +6 -6
  29. ml_tools/SQL.py +4 -4
  30. ml_tools/{keys.py → _keys.py} +45 -1
  31. ml_tools/_schema.py +1 -1
  32. ml_tools/ensemble_evaluation.py +54 -11
  33. ml_tools/ensemble_inference.py +7 -33
  34. ml_tools/ensemble_learning.py +1 -1
  35. ml_tools/optimization_tools.py +2 -2
  36. ml_tools/path_manager.py +5 -5
  37. ml_tools/utilities.py +1 -2
  38. dragon_ml_toolbox-14.3.1.dist-info/RECORD +0 -48
  39. ml_tools/RNN_forecast.py +0 -56
  40. ml_tools/_ML_vision_recipe.py +0 -88
  41. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/WHEEL +0 -0
  42. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE +0 -0
  43. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
  44. {dragon_ml_toolbox-14.3.1.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/top_level.txt +0 -0
@@ -12,25 +12,23 @@ import random
12
12
  import json
13
13
  import inspect
14
14
 
15
- from .ML_datasetmaster import _BaseMaker
16
15
  from .path_manager import make_fullpath
17
16
  from ._logger import _LOGGER
18
17
  from ._script_info import _script_info
19
- from .keys import VisionTransformRecipeKeys, ObjectDetectionKeys
20
- from ._ML_vision_recipe import save_recipe
21
- from .ML_vision_transformers import TRANSFORM_REGISTRY
18
+ from ._keys import VisionTransformRecipeKeys, ObjectDetectionKeys
19
+ from .ML_vision_transformers import TRANSFORM_REGISTRY, _save_recipe
22
20
  from .custom_logger import custom_logger
23
21
 
24
22
 
25
23
  __all__ = [
26
- "VisionDatasetMaker",
27
- "SegmentationDatasetMaker",
28
- "ObjectDetectionDatasetMaker"
24
+ "DragonDatasetVision",
25
+ "DragonDatasetSegmentation",
26
+ "DragonDatasetObjectDetection"
29
27
  ]
30
28
 
31
29
 
32
- # --- VisionDatasetMaker ---
33
- class VisionDatasetMaker(_BaseMaker):
30
+ # --- Vision Maker ---
31
+ class DragonDatasetVision:
34
32
  """
35
33
  Creates processed PyTorch datasets for computer vision tasks from an
36
34
  image folder directory.
@@ -45,7 +43,9 @@ class VisionDatasetMaker(_BaseMaker):
45
43
  """
46
44
  Typically not called directly. Use the class methods `from_folder()` or `from_folders()` to create an instance.
47
45
  """
48
- super().__init__()
46
+ self._train_dataset = None
47
+ self._test_dataset = None
48
+ self._val_dataset = None
49
49
  self._full_dataset: Optional[ImageFolder] = None
50
50
  self.labels: Optional[List[int]] = None
51
51
  self.class_map: Optional[dict[str,int]] = None
@@ -56,7 +56,7 @@ class VisionDatasetMaker(_BaseMaker):
56
56
  self._has_mean_std: bool = False
57
57
 
58
58
  @classmethod
59
- def from_folder(cls, root_dir: Union[str,Path]) -> 'VisionDatasetMaker':
59
+ def from_folder(cls, root_dir: Union[str,Path]) -> 'DragonDatasetVision':
60
60
  """
61
61
  Creates a maker instance from a single root directory of images.
62
62
 
@@ -70,7 +70,7 @@ class VisionDatasetMaker(_BaseMaker):
70
70
  root_dir (str | Path): The path to the root directory containing class subfolders.
71
71
 
72
72
  Returns:
73
- VisionDatasetMaker: A new instance with the full dataset loaded.
73
+ Instance: A new instance with the full dataset loaded.
74
74
  """
75
75
  root_path = make_fullpath(root_dir, enforce="directory")
76
76
  # Load with NO transform. We get PIL Images.
@@ -87,7 +87,7 @@ class VisionDatasetMaker(_BaseMaker):
87
87
  def from_folders(cls,
88
88
  train_dir: Union[str,Path],
89
89
  val_dir: Union[str,Path],
90
- test_dir: Optional[Union[str,Path]] = None) -> 'VisionDatasetMaker':
90
+ test_dir: Optional[Union[str,Path]] = None) -> 'DragonDatasetVision':
91
91
  """
92
92
  Creates a maker instance from separate, pre-split directories.
93
93
 
@@ -101,7 +101,7 @@ class VisionDatasetMaker(_BaseMaker):
101
101
  test_dir (str | Path | None): Path to the test data directory.
102
102
 
103
103
  Returns:
104
- VisionDatasetMaker: A new, pre-split instance.
104
+ Instance: A new, pre-split instance.
105
105
 
106
106
  Raises:
107
107
  ValueError: If the classes found in train, val, or test directories are inconsistent.
@@ -186,7 +186,7 @@ class VisionDatasetMaker(_BaseMaker):
186
186
  print(report)
187
187
 
188
188
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
189
- stratify: bool = True, random_state: Optional[int] = None) -> 'VisionDatasetMaker':
189
+ stratify: bool = True, random_state: Optional[int] = None) -> 'DragonDatasetVision':
190
190
  """
191
191
  Splits the dataset into train, validation, and optional test sets.
192
192
 
@@ -202,7 +202,7 @@ class VisionDatasetMaker(_BaseMaker):
202
202
  random_state (int | None): Seed for the random number generator for reproducible splits.
203
203
 
204
204
  Returns:
205
- VisionDatasetMaker: The same instance, now with datasets split.
205
+ Self: The same instance, now with datasets split.
206
206
 
207
207
  Raises:
208
208
  ValueError: If `val_size` and `test_size` sum to 1.0 or more.
@@ -249,22 +249,23 @@ class VisionDatasetMaker(_BaseMaker):
249
249
  _LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
250
250
  return self
251
251
 
252
- def configure_transforms(self, resize_size: int = 256, crop_size: int = 224,
252
+ def configure_transforms(self,
253
+ resize_size: int = 256,
254
+ crop_size: int = 224,
253
255
  mean: Optional[List[float]] = [0.485, 0.456, 0.406],
254
256
  std: Optional[List[float]] = [0.229, 0.224, 0.225],
255
257
  pre_transforms: Optional[List[Callable]] = None,
256
- extra_train_transforms: Optional[List[Callable]] = None) -> 'VisionDatasetMaker':
258
+ extra_train_transforms: Optional[List[Callable]] = None) -> 'DragonDatasetVision':
257
259
  """
258
260
  Configures and applies the image transformations and augmentations.
259
261
 
260
262
  This method must be called AFTER data is loaded and split.
261
263
 
262
264
  It sets up two pipelines:
263
- 1. **Training Pipeline:** Includes random augmentations like
264
- `RandomResizedCrop` and `RandomHorizontalFlip` (plus any
265
+ 1. **Training Pipeline:** Includes random augmentations:
266
+ `RandomResizedCrop(crop_size)`, `RandomHorizontalFlip(0.5)`, and `RandomRotation(90)` (plus any
265
267
  `extra_train_transforms`) for online augmentation.
266
- 2. **Validation/Test Pipeline:** A deterministic pipeline using
267
- `Resize` and `CenterCrop` for consistent evaluation.
268
+ 2. **Validation/Test Pipeline:** A deterministic pipeline using `Resize` and `CenterCrop` for consistent evaluation.
268
269
 
269
270
  Both pipelines finish with `ToTensor` and `Normalize`.
270
271
 
@@ -273,13 +274,13 @@ class VisionDatasetMaker(_BaseMaker):
273
274
  for validation/testing.
274
275
  crop_size (int): The target size (square) for the final
275
276
  cropped image.
276
- mean (List[float]): The mean values for normalization (e.g., ImageNet mean).
277
- std (List[float]): The standard deviation values for normalization (e.g., ImageNet std).
277
+ mean (List[float] | None): The mean values for normalization (e.g., ImageNet mean).
278
+ std (List[float] | None): The standard deviation values for normalization (e.g., ImageNet std).
278
279
  extra_train_transforms (List[Callable] | None): A list of additional torchvision transforms to add to the end of the training transformations.
279
280
  pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
280
281
 
281
282
  Returns:
282
- VisionDatasetMaker: The same instance, with transforms applied.
283
+ Self: The same instance, with transforms applied.
283
284
 
284
285
  Raises:
285
286
  RuntimeError: If called before data is split.
@@ -315,8 +316,9 @@ class VisionDatasetMaker(_BaseMaker):
315
316
 
316
317
  # Base augmentations for training
317
318
  base_train_transforms = [
318
- transforms.RandomResizedCrop(crop_size),
319
- transforms.RandomHorizontalFlip()
319
+ transforms.RandomResizedCrop(size=crop_size),
320
+ transforms.RandomHorizontalFlip(p=0.5),
321
+ transforms.RandomRotation(degrees=90)
320
322
  ]
321
323
  if extra_train_transforms:
322
324
  base_train_transforms.extend(extra_train_transforms)
@@ -381,8 +383,8 @@ class VisionDatasetMaker(_BaseMaker):
381
383
  _LOGGER.warning("Transforms have not been configured.")
382
384
 
383
385
  if self._test_dataset:
384
- return self._train_dataset, self._val_dataset, self._test_dataset
385
- return self._train_dataset, self._val_dataset
386
+ return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
387
+ return self._train_dataset, self._val_dataset # type: ignore
386
388
 
387
389
  def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
388
390
  """
@@ -481,7 +483,7 @@ class VisionDatasetMaker(_BaseMaker):
481
483
  )
482
484
 
483
485
  # 3. Save the file
484
- save_recipe(recipe, file_path)
486
+ _save_recipe(recipe, file_path)
485
487
 
486
488
  def save_class_map(self, save_dir: Union[str,Path]) -> dict[str,int]:
487
489
  """
@@ -499,6 +501,39 @@ class VisionDatasetMaker(_BaseMaker):
499
501
 
500
502
  return self.class_map
501
503
 
504
+ def images_per_dataset(self) -> str:
505
+ """
506
+ Get the number of images per dataset as a string.
507
+ """
508
+ if self._is_split:
509
+ train_len = len(self._train_dataset) if self._train_dataset else 0
510
+ val_len = len(self._val_dataset) if self._val_dataset else 0
511
+ test_len = len(self._test_dataset) if self._test_dataset else 0
512
+ return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
513
+ elif self._full_dataset:
514
+ return f"Full Dataset: {len(self._full_dataset)} images"
515
+ else:
516
+ _LOGGER.warning("No datasets found.")
517
+ return "No datasets found"
518
+
519
+ def __repr__(self) -> str:
520
+ s = f"<{self.__class__.__name__}>:\n"
521
+ s += f" Split: {self._is_split}\n"
522
+ s += f" Transforms Configured: {self._are_transforms_configured}\n"
523
+
524
+ if self.class_map:
525
+ s += f" Classes: {len(self.class_map)}\n"
526
+
527
+ if self._is_split:
528
+ train_len = len(self._train_dataset) if self._train_dataset else 0
529
+ val_len = len(self._val_dataset) if self._val_dataset else 0
530
+ test_len = len(self._test_dataset) if self._test_dataset else 0
531
+ s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
532
+ elif self._full_dataset:
533
+ s += f" Full Dataset Size: {len(self._full_dataset)} images\n"
534
+
535
+ return s
536
+
502
537
 
503
538
  class _DatasetTransformer(Dataset):
504
539
  """
@@ -655,8 +690,8 @@ class _PairedRandomResizedCrop:
655
690
 
656
691
  return cropped_image, cropped_mask # type: ignore
657
692
 
658
- # --- SegmentationDatasetMaker ---
659
- class SegmentationDatasetMaker(_BaseMaker):
693
+ # --- Segmentation Dataset ---
694
+ class DragonDatasetSegmentation:
660
695
  """
661
696
  Creates processed PyTorch datasets for segmentation from image and mask folders.
662
697
 
@@ -665,7 +700,7 @@ class SegmentationDatasetMaker(_BaseMaker):
665
700
  to both the image and its corresponding mask.
666
701
 
667
702
  Workflow:
668
- 1. `maker = SegmentationDatasetMaker.from_folders(img_dir, mask_dir)`
703
+ 1. `maker = DragonDatasetSegmentation.from_folders(img_dir, mask_dir)`
669
704
  2. `maker.set_class_map({'background': 0, 'road': 1})`
670
705
  3. `maker.split_data(val_size=0.2)`
671
706
  4. `maker.configure_transforms(crop_size=256)`
@@ -677,7 +712,9 @@ class SegmentationDatasetMaker(_BaseMaker):
677
712
  """
678
713
  Typically not called directly. Use the class method `from_folders()` to create an instance.
679
714
  """
680
- super().__init__()
715
+ self._train_dataset = None
716
+ self._test_dataset = None
717
+ self._val_dataset = None
681
718
  self.image_paths: List[Path] = []
682
719
  self.mask_paths: List[Path] = []
683
720
  self.class_map: Dict[str, int] = {}
@@ -686,9 +723,10 @@ class SegmentationDatasetMaker(_BaseMaker):
686
723
  self._are_transforms_configured = False
687
724
  self.train_transform: Optional[Callable] = None
688
725
  self.val_transform: Optional[Callable] = None
726
+ self._has_mean_std: bool = False
689
727
 
690
728
  @classmethod
691
- def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'SegmentationDatasetMaker':
729
+ def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'DragonDatasetSegmentation':
692
730
  """
693
731
  Creates a maker instance by loading all matching image-mask pairs
694
732
  from two corresponding directories.
@@ -701,7 +739,7 @@ class SegmentationDatasetMaker(_BaseMaker):
701
739
  mask_dir (str | Path): Path to the directory containing segmentation masks.
702
740
 
703
741
  Returns:
704
- SegmentationDatasetMaker: A new instance with all pairs loaded.
742
+ DragonDatasetSegmentation: A new instance with all pairs loaded.
705
743
  """
706
744
  maker = cls()
707
745
  img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -761,14 +799,14 @@ class SegmentationDatasetMaker(_BaseMaker):
761
799
  Logs a report of the types, sizes, and channels of image files
762
800
  found in the directory. Useful for checking masks.
763
801
  """
764
- VisionDatasetMaker.inspect_folder(path)
802
+ DragonDatasetVision.inspect_folder(path)
765
803
 
766
- def set_class_map(self, class_map: Dict[str, int]) -> 'SegmentationDatasetMaker':
804
+ def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetSegmentation':
767
805
  """
768
- Sets a map of pixel_value -> class_name. This is used by the MLTrainer for clear evaluation reports.
806
+ Sets a map of class_name -> pixel value. This is used by the Trainer for clear evaluation reports.
769
807
 
770
808
  Args:
771
- class_map (Dict[int, str]): A dictionary mapping the integer pixel
809
+ class_map (Dict[str, int]): A dictionary mapping the integer pixel
772
810
  value in a mask to its string name.
773
811
  Example: {'background': 0, 'road': 1, 'car': 2}
774
812
  """
@@ -784,7 +822,7 @@ class SegmentationDatasetMaker(_BaseMaker):
784
822
  return []
785
823
 
786
824
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
787
- random_state: Optional[int] = 42) -> 'SegmentationDatasetMaker':
825
+ random_state: Optional[int] = 42) -> 'DragonDatasetSegmentation':
788
826
  """
789
827
  Splits the loaded image-mask pairs into train, validation, and test sets.
790
828
 
@@ -794,7 +832,7 @@ class SegmentationDatasetMaker(_BaseMaker):
794
832
  random_state (int | None): Seed for reproducible splits.
795
833
 
796
834
  Returns:
797
- SegmentationDatasetMaker: The same instance, now with datasets created.
835
+ DragonDatasetSegmentation: The same instance, now with datasets created.
798
836
  """
799
837
  if self._is_split:
800
838
  _LOGGER.warning("Data has already been split.")
@@ -849,8 +887,8 @@ class SegmentationDatasetMaker(_BaseMaker):
849
887
  def configure_transforms(self,
850
888
  resize_size: int = 256,
851
889
  crop_size: int = 224,
852
- mean: List[float] = [0.485, 0.456, 0.406],
853
- std: List[float] = [0.229, 0.224, 0.225]) -> 'SegmentationDatasetMaker':
890
+ mean: Optional[List[float]] = [0.485, 0.456, 0.406],
891
+ std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetSegmentation':
854
892
  """
855
893
  Configures and applies the image and mask transformations.
856
894
 
@@ -861,39 +899,60 @@ class SegmentationDatasetMaker(_BaseMaker):
861
899
  for validation/testing.
862
900
  crop_size (int): The target size (square) for the final
863
901
  cropped image.
864
- mean (List[float]): The mean values for image normalization.
865
- std (List[float]): The std dev values for image normalization.
902
+ mean (List[float] | None): The mean values for image normalization.
903
+ std (List[float] | None): The std dev values for image normalization.
866
904
 
867
905
  Returns:
868
- SegmentationDatasetMaker: The same instance, with transforms applied.
906
+ DragonDatasetSegmentation: The same instance, with transforms applied.
869
907
  """
870
908
  if not self._is_split:
871
909
  _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
872
910
  raise RuntimeError()
873
911
 
912
+ if (mean is None and std is not None) or (mean is not None and std is None):
913
+ _LOGGER.error(f"'mean' and 'std' must be both None or both defined, but only one was provided.")
914
+ raise ValueError()
915
+
874
916
  # --- Store components for validation recipe ---
875
- self.val_recipe_components = {
917
+ self.val_recipe_components: dict[str,Any] = {
876
918
  VisionTransformRecipeKeys.RESIZE_SIZE: resize_size,
877
919
  VisionTransformRecipeKeys.CROP_SIZE: crop_size,
878
- VisionTransformRecipeKeys.MEAN: mean,
879
- VisionTransformRecipeKeys.STD: std
880
920
  }
921
+
922
+ if mean is not None and std is not None:
923
+ self.val_recipe_components.update({
924
+ VisionTransformRecipeKeys.MEAN: mean,
925
+ VisionTransformRecipeKeys.STD: std
926
+ })
927
+ self._has_mean_std = True
881
928
 
882
929
  # --- Validation/Test Pipeline (Deterministic) ---
883
- self.val_transform = _PairedCompose([
884
- _PairedResize(resize_size),
885
- _PairedCenterCrop(crop_size),
886
- _PairedToTensor(),
887
- _PairedNormalize(mean, std)
888
- ])
889
-
890
- # --- Training Pipeline (Augmentation) ---
891
- self.train_transform = _PairedCompose([
892
- _PairedRandomResizedCrop(crop_size),
893
- _PairedRandomHorizontalFlip(p=0.5),
894
- _PairedToTensor(),
895
- _PairedNormalize(mean, std)
896
- ])
930
+ if self._has_mean_std:
931
+ self.val_transform = _PairedCompose([
932
+ _PairedResize(resize_size),
933
+ _PairedCenterCrop(crop_size),
934
+ _PairedToTensor(),
935
+ _PairedNormalize(mean, std) # type: ignore
936
+ ])
937
+ # --- Training Pipeline (Augmentation) ---
938
+ self.train_transform = _PairedCompose([
939
+ _PairedRandomResizedCrop(crop_size),
940
+ _PairedRandomHorizontalFlip(p=0.5),
941
+ _PairedToTensor(),
942
+ _PairedNormalize(mean, std) # type: ignore
943
+ ])
944
+ else:
945
+ self.val_transform = _PairedCompose([
946
+ _PairedResize(resize_size),
947
+ _PairedCenterCrop(crop_size),
948
+ _PairedToTensor()
949
+ ])
950
+ # --- Training Pipeline (Augmentation) ---
951
+ self.train_transform = _PairedCompose([
952
+ _PairedRandomResizedCrop(crop_size),
953
+ _PairedRandomHorizontalFlip(p=0.5),
954
+ _PairedToTensor()
955
+ ])
897
956
 
898
957
  # --- Apply Transforms to the Datasets ---
899
958
  self._train_dataset.transform = self.train_transform # type: ignore
@@ -921,8 +980,8 @@ class SegmentationDatasetMaker(_BaseMaker):
921
980
  raise RuntimeError()
922
981
 
923
982
  if self._test_dataset:
924
- return self._train_dataset, self._val_dataset, self._test_dataset
925
- return self._train_dataset, self._val_dataset
983
+ return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
984
+ return self._train_dataset, self._val_dataset # type: ignore
926
985
 
927
986
  def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
928
987
  """
@@ -946,23 +1005,57 @@ class SegmentationDatasetMaker(_BaseMaker):
946
1005
 
947
1006
  # validate path
948
1007
  file_path = make_fullpath(filepath, make=True, enforce="file")
949
-
1008
+
950
1009
  # Add standard transforms
951
1010
  recipe: Dict[str, Any] = {
952
1011
  VisionTransformRecipeKeys.TASK: "segmentation",
953
1012
  VisionTransformRecipeKeys.PIPELINE: [
954
- {VisionTransformRecipeKeys.NAME: "Resize", "kwargs": {"size": components["resize_size"]}},
955
- {VisionTransformRecipeKeys.NAME: "CenterCrop", "kwargs": {"size": components["crop_size"]}},
956
- {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}},
957
- {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
958
- "mean": components["mean"],
959
- "std": components["std"]
960
- }}
1013
+ {VisionTransformRecipeKeys.NAME: "Resize", "kwargs": {"size": components[VisionTransformRecipeKeys.RESIZE_SIZE]}},
1014
+ {VisionTransformRecipeKeys.NAME: "CenterCrop", "kwargs": {"size": components[VisionTransformRecipeKeys.CROP_SIZE]}},
1015
+ {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}}
961
1016
  ]
962
1017
  }
963
1018
 
1019
+ if self._has_mean_std:
1020
+ recipe[VisionTransformRecipeKeys.PIPELINE].append(
1021
+ {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
1022
+ "mean": components[VisionTransformRecipeKeys.MEAN],
1023
+ "std": components[VisionTransformRecipeKeys.STD]
1024
+ }}
1025
+ )
1026
+
964
1027
  # Save the file
965
- save_recipe(recipe, file_path)
1028
+ _save_recipe(recipe, file_path)
1029
+
1030
+ def images_per_dataset(self) -> str:
1031
+ """
1032
+ Get the number of images per dataset as a string.
1033
+ """
1034
+ if self._is_split:
1035
+ train_len = len(self._train_dataset) if self._train_dataset else 0
1036
+ val_len = len(self._val_dataset) if self._val_dataset else 0
1037
+ test_len = len(self._test_dataset) if self._test_dataset else 0
1038
+ return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
1039
+ else:
1040
+ _LOGGER.warning("No datasets found.")
1041
+ return "No datasets found"
1042
+
1043
+ def __repr__(self) -> str:
1044
+ s = f"<{self.__class__.__name__}>:\n"
1045
+ s += f" Total Image-Mask Pairs: {len(self.image_paths)}\n"
1046
+ s += f" Split: {self._is_split}\n"
1047
+ s += f" Transforms Configured: {self._are_transforms_configured}\n"
1048
+
1049
+ if self.class_map:
1050
+ s += f" Classes: {list(self.class_map.keys())}\n"
1051
+
1052
+ if self._is_split:
1053
+ train_len = len(self._train_dataset) if self._train_dataset else 0
1054
+ val_len = len(self._val_dataset) if self._val_dataset else 0
1055
+ test_len = len(self._test_dataset) if self._test_dataset else 0
1056
+ s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
1057
+
1058
+ return s
966
1059
 
967
1060
 
968
1061
  # Object detection
@@ -990,7 +1083,7 @@ class _ObjectDetectionDataset(Dataset):
990
1083
  self.annotation_paths = annotation_paths
991
1084
  self.transform = transform
992
1085
 
993
- # --- Propagate 'classes' if they exist (for MLTrainer) ---
1086
+ # --- Propagate 'classes' if they exist ---
994
1087
  self.classes: List[str] = []
995
1088
 
996
1089
  def __len__(self):
@@ -1077,7 +1170,7 @@ class _OD_PairedRandomHorizontalFlip:
1077
1170
  return image, target
1078
1171
 
1079
1172
 
1080
- class ObjectDetectionDatasetMaker(_BaseMaker):
1173
+ class DragonDatasetObjectDetection:
1081
1174
  """
1082
1175
  Creates processed PyTorch datasets for object detection from image
1083
1176
  and JSON annotation folders.
@@ -1090,7 +1183,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1090
1183
  so this class provides a `collate_fn` to be used with a DataLoader.
1091
1184
 
1092
1185
  Workflow:
1093
- 1. `maker = ObjectDetectionDatasetMaker.from_folders(img_dir, ann_dir)`
1186
+ 1. `maker = DragonDatasetObjectDetection.from_folders(img_dir, ann_dir)`
1094
1187
  2. `maker.set_class_map({'background': 0, 'person': 1, 'car': 2})`
1095
1188
  3. `maker.split_data(val_size=0.2)`
1096
1189
  4. `maker.configure_transforms()`
@@ -1104,7 +1197,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1104
1197
  """
1105
1198
  Typically not called directly. Use the class method `from_folders()` to create an instance.
1106
1199
  """
1107
- super().__init__()
1200
+ self._train_dataset = None
1201
+ self._test_dataset = None
1202
+ self._val_dataset = None
1108
1203
  self.image_paths: List[Path] = []
1109
1204
  self.annotation_paths: List[Path] = []
1110
1205
  self.class_map: Dict[str, int] = {}
@@ -1114,9 +1209,10 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1114
1209
  self.train_transform: Optional[Callable] = None
1115
1210
  self.val_transform: Optional[Callable] = None
1116
1211
  self._val_recipe_components: Optional[Dict[str, Any]] = None
1212
+ self._has_mean_std: bool = False
1117
1213
 
1118
1214
  @classmethod
1119
- def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'ObjectDetectionDatasetMaker':
1215
+ def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'DragonDatasetObjectDetection':
1120
1216
  """
1121
1217
  Creates a maker instance by loading all matching image-annotation pairs
1122
1218
  from two corresponding directories.
@@ -1133,7 +1229,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1133
1229
  annotation files.
1134
1230
 
1135
1231
  Returns:
1136
- ObjectDetectionDatasetMaker: A new instance with all pairs loaded.
1232
+ DragonDatasetObjectDetection: A new instance with all pairs loaded.
1137
1233
  """
1138
1234
  maker = cls()
1139
1235
  img_path_obj = make_fullpath(image_dir, enforce="directory")
@@ -1180,9 +1276,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1180
1276
  Logs a report of the types, sizes, and channels of image files
1181
1277
  found in the directory.
1182
1278
  """
1183
- VisionDatasetMaker.inspect_folder(path)
1279
+ DragonDatasetVision.inspect_folder(path)
1184
1280
 
1185
- def set_class_map(self, class_map: Dict[str, int]) -> 'ObjectDetectionDatasetMaker':
1281
+ def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetObjectDetection':
1186
1282
  """
1187
1283
  Sets a map of class_name -> pixel_value. This is used by the
1188
1284
  MLTrainer for clear evaluation reports.
@@ -1210,7 +1306,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1210
1306
  return []
1211
1307
 
1212
1308
  def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
1213
- random_state: Optional[int] = 42) -> 'ObjectDetectionDatasetMaker':
1309
+ random_state: Optional[int] = 42) -> 'DragonDatasetObjectDetection':
1214
1310
  """
1215
1311
  Splits the loaded image-annotation pairs into train, validation, and test sets.
1216
1312
 
@@ -1220,7 +1316,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1220
1316
  random_state (int | None): Seed for reproducible splits.
1221
1317
 
1222
1318
  Returns:
1223
- ObjectDetectionDatasetMaker: The same instance, now with datasets created.
1319
+ DragonDatasetObjectDetection: The same instance, now with datasets created.
1224
1320
  """
1225
1321
  if self._is_split:
1226
1322
  _LOGGER.warning("Data has already been split.")
@@ -1273,8 +1369,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1273
1369
  return self
1274
1370
 
1275
1371
  def configure_transforms(self,
1276
- mean: List[float] = [0.485, 0.456, 0.406],
1277
- std: List[float] = [0.229, 0.224, 0.225]) -> 'ObjectDetectionDatasetMaker':
1372
+ mean: Optional[List[float]] = [0.485, 0.456, 0.406],
1373
+ std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetObjectDetection':
1278
1374
  """
1279
1375
  Configures and applies the image and target transformations.
1280
1376
 
@@ -1285,34 +1381,52 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1285
1381
  Transforms are limited to augmentation (flip), ToTensor, and Normalize.
1286
1382
 
1287
1383
  Args:
1288
- mean (List[float]): The mean values for image normalization.
1289
- std (List[float]): The std dev values for image normalization.
1384
+ mean (List[float] | None): The mean values for image normalization.
1385
+ std (List[float] | None): The std dev values for image normalization.
1290
1386
 
1291
1387
  Returns:
1292
- ObjectDetectionDatasetMaker: The same instance, with transforms applied.
1388
+ DragonDatasetObjectDetection: The same instance, with transforms applied.
1293
1389
  """
1294
1390
  if not self._is_split:
1295
1391
  _LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
1296
1392
  raise RuntimeError()
1297
1393
 
1298
- # --- Store components for validation recipe ---
1299
- self._val_recipe_components = {
1300
- VisionTransformRecipeKeys.MEAN: mean,
1301
- VisionTransformRecipeKeys.STD: std
1302
- }
1303
-
1304
- # --- Validation/Test Pipeline (Deterministic) ---
1305
- self.val_transform = _OD_PairedCompose([
1306
- _OD_PairedToTensor(),
1307
- _OD_PairedNormalize(mean, std)
1308
- ])
1394
+ if (mean is None and std is not None) or (mean is not None and std is None):
1395
+ _LOGGER.error(f"'mean' and 'std' must be both None or both defined, but only one was provided.")
1396
+ raise ValueError()
1309
1397
 
1310
- # --- Training Pipeline (Augmentation) ---
1311
- self.train_transform = _OD_PairedCompose([
1312
- _OD_PairedRandomHorizontalFlip(p=0.5),
1313
- _OD_PairedToTensor(),
1314
- _OD_PairedNormalize(mean, std)
1315
- ])
1398
+ if mean is not None and std is not None:
1399
+ # --- Store components for validation recipe ---
1400
+ self._val_recipe_components = {
1401
+ VisionTransformRecipeKeys.MEAN: mean,
1402
+ VisionTransformRecipeKeys.STD: std
1403
+ }
1404
+ self._has_mean_std = True
1405
+
1406
+ if self._has_mean_std:
1407
+ # --- Validation/Test Pipeline (Deterministic) ---
1408
+ self.val_transform = _OD_PairedCompose([
1409
+ _OD_PairedToTensor(),
1410
+ _OD_PairedNormalize(mean, std) # type: ignore
1411
+ ])
1412
+
1413
+ # --- Training Pipeline (Augmentation) ---
1414
+ self.train_transform = _OD_PairedCompose([
1415
+ _OD_PairedRandomHorizontalFlip(p=0.5),
1416
+ _OD_PairedToTensor(),
1417
+ _OD_PairedNormalize(mean, std) # type: ignore
1418
+ ])
1419
+ else:
1420
+ # --- Validation/Test Pipeline (Deterministic) ---
1421
+ self.val_transform = _OD_PairedCompose([
1422
+ _OD_PairedToTensor()
1423
+ ])
1424
+
1425
+ # --- Training Pipeline (Augmentation) ---
1426
+ self.train_transform = _OD_PairedCompose([
1427
+ _OD_PairedRandomHorizontalFlip(p=0.5),
1428
+ _OD_PairedToTensor()
1429
+ ])
1316
1430
 
1317
1431
  # --- Apply Transforms to the Datasets ---
1318
1432
  self._train_dataset.transform = self.train_transform # type: ignore
@@ -1340,8 +1454,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1340
1454
  raise RuntimeError()
1341
1455
 
1342
1456
  if self._test_dataset:
1343
- return self._train_dataset, self._val_dataset, self._test_dataset
1344
- return self._train_dataset, self._val_dataset
1457
+ return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
1458
+ return self._train_dataset, self._val_dataset # type: ignore
1345
1459
 
1346
1460
  @property
1347
1461
  def collate_fn(self) -> Callable:
@@ -1368,10 +1482,6 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1368
1482
 
1369
1483
  components = self._val_recipe_components
1370
1484
 
1371
- if not components:
1372
- _LOGGER.error(f"Error getting the transformers recipe for validation set.")
1373
- raise ValueError()
1374
-
1375
1485
  # validate path
1376
1486
  file_path = make_fullpath(filepath, make=True, enforce="file")
1377
1487
 
@@ -1380,15 +1490,49 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
1380
1490
  VisionTransformRecipeKeys.TASK: "object_detection",
1381
1491
  VisionTransformRecipeKeys.PIPELINE: [
1382
1492
  {VisionTransformRecipeKeys.NAME: "ToTensor", "kwargs": {}},
1383
- {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
1384
- "mean": components["mean"],
1385
- "std": components["std"]
1386
- }}
1387
1493
  ]
1388
1494
  }
1389
1495
 
1496
+ if self._has_mean_std and components:
1497
+ recipe[VisionTransformRecipeKeys.PIPELINE].append(
1498
+ {VisionTransformRecipeKeys.NAME: "Normalize", "kwargs": {
1499
+ "mean": components[VisionTransformRecipeKeys.MEAN],
1500
+ "std": components[VisionTransformRecipeKeys.STD]
1501
+ }}
1502
+ )
1503
+
1390
1504
  # Save the file
1391
- save_recipe(recipe, file_path)
1505
+ _save_recipe(recipe, file_path)
1506
+
1507
+ def images_per_dataset(self) -> str:
1508
+ """
1509
+ Get the number of images per dataset as a string.
1510
+ """
1511
+ if self._is_split:
1512
+ train_len = len(self._train_dataset) if self._train_dataset else 0
1513
+ val_len = len(self._val_dataset) if self._val_dataset else 0
1514
+ test_len = len(self._test_dataset) if self._test_dataset else 0
1515
+ return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
1516
+ else:
1517
+ _LOGGER.warning("No datasets found.")
1518
+ return "No datasets found"
1519
+
1520
+ def __repr__(self) -> str:
1521
+ s = f"<{self.__class__.__name__}>:\n"
1522
+ s += f" Total Image-Annotation Pairs: {len(self.image_paths)}\n"
1523
+ s += f" Split: {self._is_split}\n"
1524
+ s += f" Transforms Configured: {self._are_transforms_configured}\n"
1525
+
1526
+ if self.class_map:
1527
+ s += f" Classes ({len(self.class_map)}): {list(self.class_map.keys())}\n"
1528
+
1529
+ if self._is_split:
1530
+ train_len = len(self._train_dataset) if self._train_dataset else 0
1531
+ val_len = len(self._val_dataset) if self._val_dataset else 0
1532
+ test_len = len(self._test_dataset) if self._test_dataset else 0
1533
+ s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
1534
+
1535
+ return s
1392
1536
 
1393
1537
 
1394
1538
  def info():