dragon-ml-toolbox 14.7.0__py3-none-any.whl → 16.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/METADATA +9 -5
- dragon_ml_toolbox-16.2.0.dist-info/RECORD +51 -0
- ml_tools/ETL_cleaning.py +20 -20
- ml_tools/ETL_engineering.py +23 -25
- ml_tools/GUI_tools.py +20 -20
- ml_tools/MICE_imputation.py +3 -3
- ml_tools/ML_callbacks.py +43 -26
- ml_tools/ML_configuration.py +704 -24
- ml_tools/ML_datasetmaster.py +235 -280
- ml_tools/ML_evaluation.py +144 -39
- ml_tools/ML_evaluation_multi.py +103 -35
- ml_tools/ML_inference.py +290 -208
- ml_tools/ML_models.py +13 -102
- ml_tools/ML_models_advanced.py +1 -1
- ml_tools/ML_optimization.py +12 -12
- ml_tools/ML_scaler.py +11 -11
- ml_tools/ML_sequence_datasetmaster.py +341 -0
- ml_tools/ML_sequence_evaluation.py +219 -0
- ml_tools/ML_sequence_inference.py +391 -0
- ml_tools/ML_sequence_models.py +139 -0
- ml_tools/ML_trainer.py +1342 -386
- ml_tools/ML_utilities.py +1 -1
- ml_tools/ML_vision_datasetmaster.py +120 -72
- ml_tools/ML_vision_evaluation.py +30 -6
- ml_tools/ML_vision_inference.py +129 -152
- ml_tools/ML_vision_models.py +1 -1
- ml_tools/ML_vision_transformers.py +121 -40
- ml_tools/PSO_optimization.py +6 -6
- ml_tools/SQL.py +4 -4
- ml_tools/{keys.py → _keys.py} +45 -0
- ml_tools/_schema.py +1 -1
- ml_tools/ensemble_evaluation.py +1 -1
- ml_tools/ensemble_inference.py +7 -33
- ml_tools/ensemble_learning.py +1 -1
- ml_tools/optimization_tools.py +2 -2
- ml_tools/path_manager.py +5 -5
- ml_tools/utilities.py +1 -2
- dragon_ml_toolbox-14.7.0.dist-info/RECORD +0 -49
- ml_tools/RNN_forecast.py +0 -56
- ml_tools/_ML_vision_recipe.py +0 -88
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-14.7.0.dist-info → dragon_ml_toolbox-16.2.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_utilities.py
CHANGED
|
@@ -7,7 +7,7 @@ from torch import nn
|
|
|
7
7
|
from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
|
|
8
8
|
from ._script_info import _script_info
|
|
9
9
|
from ._logger import _LOGGER
|
|
10
|
-
from .
|
|
10
|
+
from ._keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
|
|
11
11
|
from .utilities import load_dataframe
|
|
12
12
|
from .custom_logger import save_list_strings, custom_logger
|
|
13
13
|
from .serde import serialize_object_filename
|
|
@@ -12,25 +12,23 @@ import random
|
|
|
12
12
|
import json
|
|
13
13
|
import inspect
|
|
14
14
|
|
|
15
|
-
from .ML_datasetmaster import _BaseMaker
|
|
16
15
|
from .path_manager import make_fullpath
|
|
17
16
|
from ._logger import _LOGGER
|
|
18
17
|
from ._script_info import _script_info
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .ML_vision_transformers import TRANSFORM_REGISTRY
|
|
18
|
+
from ._keys import VisionTransformRecipeKeys, ObjectDetectionKeys
|
|
19
|
+
from .ML_vision_transformers import TRANSFORM_REGISTRY, _save_recipe
|
|
22
20
|
from .custom_logger import custom_logger
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
__all__ = [
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
24
|
+
"DragonDatasetVision",
|
|
25
|
+
"DragonDatasetSegmentation",
|
|
26
|
+
"DragonDatasetObjectDetection"
|
|
29
27
|
]
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
# ---
|
|
33
|
-
class
|
|
30
|
+
# --- Vision Maker ---
|
|
31
|
+
class DragonDatasetVision:
|
|
34
32
|
"""
|
|
35
33
|
Creates processed PyTorch datasets for computer vision tasks from an
|
|
36
34
|
image folder directory.
|
|
@@ -45,10 +43,12 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
45
43
|
"""
|
|
46
44
|
Typically not called directly. Use the class methods `from_folder()` or `from_folders()` to create an instance.
|
|
47
45
|
"""
|
|
48
|
-
|
|
46
|
+
self._train_dataset = None
|
|
47
|
+
self._test_dataset = None
|
|
48
|
+
self._val_dataset = None
|
|
49
49
|
self._full_dataset: Optional[ImageFolder] = None
|
|
50
50
|
self.labels: Optional[List[int]] = None
|
|
51
|
-
self.class_map:
|
|
51
|
+
self.class_map: dict[str,int] = dict()
|
|
52
52
|
|
|
53
53
|
self._is_split = False
|
|
54
54
|
self._are_transforms_configured = False
|
|
@@ -56,7 +56,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
56
56
|
self._has_mean_std: bool = False
|
|
57
57
|
|
|
58
58
|
@classmethod
|
|
59
|
-
def from_folder(cls, root_dir: Union[str,Path]) -> '
|
|
59
|
+
def from_folder(cls, root_dir: Union[str,Path]) -> 'DragonDatasetVision':
|
|
60
60
|
"""
|
|
61
61
|
Creates a maker instance from a single root directory of images.
|
|
62
62
|
|
|
@@ -70,7 +70,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
70
70
|
root_dir (str | Path): The path to the root directory containing class subfolders.
|
|
71
71
|
|
|
72
72
|
Returns:
|
|
73
|
-
|
|
73
|
+
Instance: A new instance with the full dataset loaded.
|
|
74
74
|
"""
|
|
75
75
|
root_path = make_fullpath(root_dir, enforce="directory")
|
|
76
76
|
# Load with NO transform. We get PIL Images.
|
|
@@ -87,7 +87,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
87
87
|
def from_folders(cls,
|
|
88
88
|
train_dir: Union[str,Path],
|
|
89
89
|
val_dir: Union[str,Path],
|
|
90
|
-
test_dir: Optional[Union[str,Path]] = None) -> '
|
|
90
|
+
test_dir: Optional[Union[str,Path]] = None) -> 'DragonDatasetVision':
|
|
91
91
|
"""
|
|
92
92
|
Creates a maker instance from separate, pre-split directories.
|
|
93
93
|
|
|
@@ -101,7 +101,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
101
101
|
test_dir (str | Path | None): Path to the test data directory.
|
|
102
102
|
|
|
103
103
|
Returns:
|
|
104
|
-
|
|
104
|
+
Instance: A new, pre-split instance.
|
|
105
105
|
|
|
106
106
|
Raises:
|
|
107
107
|
ValueError: If the classes found in train, val, or test directories are inconsistent.
|
|
@@ -186,7 +186,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
186
186
|
print(report)
|
|
187
187
|
|
|
188
188
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
189
|
-
stratify: bool = True, random_state: Optional[int] = None) -> '
|
|
189
|
+
stratify: bool = True, random_state: Optional[int] = None) -> 'DragonDatasetVision':
|
|
190
190
|
"""
|
|
191
191
|
Splits the dataset into train, validation, and optional test sets.
|
|
192
192
|
|
|
@@ -202,7 +202,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
202
202
|
random_state (int | None): Seed for the random number generator for reproducible splits.
|
|
203
203
|
|
|
204
204
|
Returns:
|
|
205
|
-
|
|
205
|
+
Self: The same instance, now with datasets split.
|
|
206
206
|
|
|
207
207
|
Raises:
|
|
208
208
|
ValueError: If `val_size` and `test_size` sum to 1.0 or more.
|
|
@@ -249,22 +249,23 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
249
249
|
_LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
|
|
250
250
|
return self
|
|
251
251
|
|
|
252
|
-
def configure_transforms(self,
|
|
252
|
+
def configure_transforms(self,
|
|
253
|
+
resize_size: int = 256,
|
|
254
|
+
crop_size: int = 224,
|
|
253
255
|
mean: Optional[List[float]] = [0.485, 0.456, 0.406],
|
|
254
256
|
std: Optional[List[float]] = [0.229, 0.224, 0.225],
|
|
255
257
|
pre_transforms: Optional[List[Callable]] = None,
|
|
256
|
-
extra_train_transforms: Optional[List[Callable]] = None) -> '
|
|
258
|
+
extra_train_transforms: Optional[List[Callable]] = None) -> 'DragonDatasetVision':
|
|
257
259
|
"""
|
|
258
260
|
Configures and applies the image transformations and augmentations.
|
|
259
261
|
|
|
260
262
|
This method must be called AFTER data is loaded and split.
|
|
261
263
|
|
|
262
264
|
It sets up two pipelines:
|
|
263
|
-
1. **Training Pipeline:** Includes random augmentations
|
|
264
|
-
`RandomResizedCrop` and `
|
|
265
|
+
1. **Training Pipeline:** Includes random augmentations:
|
|
266
|
+
`RandomResizedCrop(crop_size)`, `RandomHorizontalFlip(0.5)`, and `RandomRotation(90)` (plus any
|
|
265
267
|
`extra_train_transforms`) for online augmentation.
|
|
266
|
-
2. **Validation/Test Pipeline:** A deterministic pipeline using
|
|
267
|
-
`Resize` and `CenterCrop` for consistent evaluation.
|
|
268
|
+
2. **Validation/Test Pipeline:** A deterministic pipeline using `Resize` and `CenterCrop` for consistent evaluation.
|
|
268
269
|
|
|
269
270
|
Both pipelines finish with `ToTensor` and `Normalize`.
|
|
270
271
|
|
|
@@ -279,7 +280,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
279
280
|
pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
|
|
280
281
|
|
|
281
282
|
Returns:
|
|
282
|
-
|
|
283
|
+
Self: The same instance, with transforms applied.
|
|
283
284
|
|
|
284
285
|
Raises:
|
|
285
286
|
RuntimeError: If called before data is split.
|
|
@@ -315,8 +316,9 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
315
316
|
|
|
316
317
|
# Base augmentations for training
|
|
317
318
|
base_train_transforms = [
|
|
318
|
-
transforms.RandomResizedCrop(crop_size),
|
|
319
|
-
transforms.RandomHorizontalFlip()
|
|
319
|
+
transforms.RandomResizedCrop(size=crop_size),
|
|
320
|
+
transforms.RandomHorizontalFlip(p=0.5),
|
|
321
|
+
transforms.RandomRotation(degrees=90)
|
|
320
322
|
]
|
|
321
323
|
if extra_train_transforms:
|
|
322
324
|
base_train_transforms.extend(extra_train_transforms)
|
|
@@ -350,10 +352,10 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
350
352
|
# --- Apply Transforms using the Wrapper ---
|
|
351
353
|
# This correctly assigns the transform regardless of whether the dataset is a Subset (from_folder) or an ImageFolder (from_folders).
|
|
352
354
|
|
|
353
|
-
self._train_dataset = _DatasetTransformer(self._train_dataset, train_transform) # type: ignore
|
|
354
|
-
self._val_dataset = _DatasetTransformer(self._val_dataset, val_transform) # type: ignore
|
|
355
|
+
self._train_dataset = _DatasetTransformer(self._train_dataset, train_transform, self.class_map) # type: ignore
|
|
356
|
+
self._val_dataset = _DatasetTransformer(self._val_dataset, val_transform, self.class_map) # type: ignore
|
|
355
357
|
if self._test_dataset:
|
|
356
|
-
self._test_dataset = _DatasetTransformer(self._test_dataset, val_transform) # type: ignore
|
|
358
|
+
self._test_dataset = _DatasetTransformer(self._test_dataset, val_transform, self.class_map) # type: ignore
|
|
357
359
|
|
|
358
360
|
self._are_transforms_configured = True
|
|
359
361
|
_LOGGER.info("Image transforms configured and applied.")
|
|
@@ -381,8 +383,8 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
381
383
|
_LOGGER.warning("Transforms have not been configured.")
|
|
382
384
|
|
|
383
385
|
if self._test_dataset:
|
|
384
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
385
|
-
return self._train_dataset, self._val_dataset
|
|
386
|
+
return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
|
|
387
|
+
return self._train_dataset, self._val_dataset # type: ignore
|
|
386
388
|
|
|
387
389
|
def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
|
|
388
390
|
"""
|
|
@@ -481,7 +483,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
481
483
|
)
|
|
482
484
|
|
|
483
485
|
# 3. Save the file
|
|
484
|
-
|
|
486
|
+
_save_recipe(recipe, file_path)
|
|
485
487
|
|
|
486
488
|
def save_class_map(self, save_dir: Union[str,Path]) -> dict[str,int]:
|
|
487
489
|
"""
|
|
@@ -499,6 +501,21 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
499
501
|
|
|
500
502
|
return self.class_map
|
|
501
503
|
|
|
504
|
+
def images_per_dataset(self) -> str:
|
|
505
|
+
"""
|
|
506
|
+
Get the number of images per dataset as a string.
|
|
507
|
+
"""
|
|
508
|
+
if self._is_split:
|
|
509
|
+
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
510
|
+
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
511
|
+
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
512
|
+
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
|
|
513
|
+
elif self._full_dataset:
|
|
514
|
+
return f"Full Dataset: {len(self._full_dataset)} images"
|
|
515
|
+
else:
|
|
516
|
+
_LOGGER.warning("No datasets found.")
|
|
517
|
+
return "No datasets found"
|
|
518
|
+
|
|
502
519
|
def __repr__(self) -> str:
|
|
503
520
|
s = f"<{self.__class__.__name__}>:\n"
|
|
504
521
|
s += f" Split: {self._is_split}\n"
|
|
@@ -511,7 +528,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
511
528
|
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
512
529
|
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
513
530
|
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
514
|
-
s += f" Datasets (Train
|
|
531
|
+
s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
|
|
515
532
|
elif self._full_dataset:
|
|
516
533
|
s += f" Full Dataset Size: {len(self._full_dataset)} images\n"
|
|
517
534
|
|
|
@@ -523,9 +540,10 @@ class _DatasetTransformer(Dataset):
|
|
|
523
540
|
Internal wrapper class to apply a specific transform pipeline to any
|
|
524
541
|
dataset (e.g., a full ImageFolder or a Subset).
|
|
525
542
|
"""
|
|
526
|
-
def __init__(self, dataset: Dataset, transform: Optional[transforms.Compose] = None):
|
|
543
|
+
def __init__(self, dataset: Dataset, transform: Optional[transforms.Compose] = None, class_map: dict[str,int]=dict()):
|
|
527
544
|
self.dataset = dataset
|
|
528
545
|
self.transform = transform
|
|
546
|
+
self.class_map = class_map
|
|
529
547
|
|
|
530
548
|
# --- Propagate attributes for inspection ---
|
|
531
549
|
# For ImageFolder
|
|
@@ -565,7 +583,7 @@ class _SegmentationDataset(Dataset):
|
|
|
565
583
|
self.mask_paths = mask_paths
|
|
566
584
|
self.transform = transform
|
|
567
585
|
|
|
568
|
-
# --- Propagate 'classes' if they exist
|
|
586
|
+
# --- Propagate 'classes' if they exist for trainer ---
|
|
569
587
|
self.classes: List[str] = []
|
|
570
588
|
|
|
571
589
|
def __len__(self):
|
|
@@ -673,8 +691,8 @@ class _PairedRandomResizedCrop:
|
|
|
673
691
|
|
|
674
692
|
return cropped_image, cropped_mask # type: ignore
|
|
675
693
|
|
|
676
|
-
# ---
|
|
677
|
-
class
|
|
694
|
+
# --- Segmentation Dataset ---
|
|
695
|
+
class DragonDatasetSegmentation:
|
|
678
696
|
"""
|
|
679
697
|
Creates processed PyTorch datasets for segmentation from image and mask folders.
|
|
680
698
|
|
|
@@ -683,7 +701,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
683
701
|
to both the image and its corresponding mask.
|
|
684
702
|
|
|
685
703
|
Workflow:
|
|
686
|
-
1. `maker =
|
|
704
|
+
1. `maker = DragonDatasetSegmentation.from_folders(img_dir, mask_dir)`
|
|
687
705
|
2. `maker.set_class_map({'background': 0, 'road': 1})`
|
|
688
706
|
3. `maker.split_data(val_size=0.2)`
|
|
689
707
|
4. `maker.configure_transforms(crop_size=256)`
|
|
@@ -695,7 +713,9 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
695
713
|
"""
|
|
696
714
|
Typically not called directly. Use the class method `from_folders()` to create an instance.
|
|
697
715
|
"""
|
|
698
|
-
|
|
716
|
+
self._train_dataset = None
|
|
717
|
+
self._test_dataset = None
|
|
718
|
+
self._val_dataset = None
|
|
699
719
|
self.image_paths: List[Path] = []
|
|
700
720
|
self.mask_paths: List[Path] = []
|
|
701
721
|
self.class_map: Dict[str, int] = {}
|
|
@@ -707,7 +727,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
707
727
|
self._has_mean_std: bool = False
|
|
708
728
|
|
|
709
729
|
@classmethod
|
|
710
|
-
def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> '
|
|
730
|
+
def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'DragonDatasetSegmentation':
|
|
711
731
|
"""
|
|
712
732
|
Creates a maker instance by loading all matching image-mask pairs
|
|
713
733
|
from two corresponding directories.
|
|
@@ -720,7 +740,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
720
740
|
mask_dir (str | Path): Path to the directory containing segmentation masks.
|
|
721
741
|
|
|
722
742
|
Returns:
|
|
723
|
-
|
|
743
|
+
DragonDatasetSegmentation: A new instance with all pairs loaded.
|
|
724
744
|
"""
|
|
725
745
|
maker = cls()
|
|
726
746
|
img_path_obj = make_fullpath(image_dir, enforce="directory")
|
|
@@ -780,14 +800,14 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
780
800
|
Logs a report of the types, sizes, and channels of image files
|
|
781
801
|
found in the directory. Useful for checking masks.
|
|
782
802
|
"""
|
|
783
|
-
|
|
803
|
+
DragonDatasetVision.inspect_folder(path)
|
|
784
804
|
|
|
785
|
-
def set_class_map(self, class_map: Dict[str, int]) -> '
|
|
805
|
+
def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetSegmentation':
|
|
786
806
|
"""
|
|
787
|
-
Sets a map of
|
|
807
|
+
Sets a map of class_name -> pixel value. This is used by the Trainer for clear evaluation reports.
|
|
788
808
|
|
|
789
809
|
Args:
|
|
790
|
-
class_map (Dict[
|
|
810
|
+
class_map (Dict[str, int]): A dictionary mapping the integer pixel
|
|
791
811
|
value in a mask to its string name.
|
|
792
812
|
Example: {'background': 0, 'road': 1, 'car': 2}
|
|
793
813
|
"""
|
|
@@ -803,7 +823,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
803
823
|
return []
|
|
804
824
|
|
|
805
825
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
806
|
-
random_state: Optional[int] = 42) -> '
|
|
826
|
+
random_state: Optional[int] = 42) -> 'DragonDatasetSegmentation':
|
|
807
827
|
"""
|
|
808
828
|
Splits the loaded image-mask pairs into train, validation, and test sets.
|
|
809
829
|
|
|
@@ -813,7 +833,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
813
833
|
random_state (int | None): Seed for reproducible splits.
|
|
814
834
|
|
|
815
835
|
Returns:
|
|
816
|
-
|
|
836
|
+
DragonDatasetSegmentation: The same instance, now with datasets created.
|
|
817
837
|
"""
|
|
818
838
|
if self._is_split:
|
|
819
839
|
_LOGGER.warning("Data has already been split.")
|
|
@@ -857,7 +877,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
857
877
|
self._train_dataset = _SegmentationDataset(train_imgs, train_masks, transform=None)
|
|
858
878
|
self._val_dataset = _SegmentationDataset(val_imgs, val_masks, transform=None)
|
|
859
879
|
|
|
860
|
-
# Propagate class names to datasets for
|
|
880
|
+
# Propagate class names to datasets for trainer
|
|
861
881
|
self._train_dataset.classes = self.classes # type: ignore
|
|
862
882
|
self._val_dataset.classes = self.classes # type: ignore
|
|
863
883
|
|
|
@@ -869,7 +889,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
869
889
|
resize_size: int = 256,
|
|
870
890
|
crop_size: int = 224,
|
|
871
891
|
mean: Optional[List[float]] = [0.485, 0.456, 0.406],
|
|
872
|
-
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> '
|
|
892
|
+
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetSegmentation':
|
|
873
893
|
"""
|
|
874
894
|
Configures and applies the image and mask transformations.
|
|
875
895
|
|
|
@@ -884,7 +904,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
884
904
|
std (List[float] | None): The std dev values for image normalization.
|
|
885
905
|
|
|
886
906
|
Returns:
|
|
887
|
-
|
|
907
|
+
DragonDatasetSegmentation: The same instance, with transforms applied.
|
|
888
908
|
"""
|
|
889
909
|
if not self._is_split:
|
|
890
910
|
_LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
|
|
@@ -961,8 +981,8 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
961
981
|
raise RuntimeError()
|
|
962
982
|
|
|
963
983
|
if self._test_dataset:
|
|
964
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
965
|
-
return self._train_dataset, self._val_dataset
|
|
984
|
+
return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
|
|
985
|
+
return self._train_dataset, self._val_dataset # type: ignore
|
|
966
986
|
|
|
967
987
|
def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
|
|
968
988
|
"""
|
|
@@ -1006,7 +1026,20 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
1006
1026
|
)
|
|
1007
1027
|
|
|
1008
1028
|
# Save the file
|
|
1009
|
-
|
|
1029
|
+
_save_recipe(recipe, file_path)
|
|
1030
|
+
|
|
1031
|
+
def images_per_dataset(self) -> str:
|
|
1032
|
+
"""
|
|
1033
|
+
Get the number of images per dataset as a string.
|
|
1034
|
+
"""
|
|
1035
|
+
if self._is_split:
|
|
1036
|
+
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
1037
|
+
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
1038
|
+
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
1039
|
+
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
|
|
1040
|
+
else:
|
|
1041
|
+
_LOGGER.warning("No datasets found.")
|
|
1042
|
+
return "No datasets found"
|
|
1010
1043
|
|
|
1011
1044
|
def __repr__(self) -> str:
|
|
1012
1045
|
s = f"<{self.__class__.__name__}>:\n"
|
|
@@ -1021,7 +1054,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
1021
1054
|
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
1022
1055
|
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
1023
1056
|
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
1024
|
-
s += f" Datasets (Train
|
|
1057
|
+
s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
|
|
1025
1058
|
|
|
1026
1059
|
return s
|
|
1027
1060
|
|
|
@@ -1051,7 +1084,7 @@ class _ObjectDetectionDataset(Dataset):
|
|
|
1051
1084
|
self.annotation_paths = annotation_paths
|
|
1052
1085
|
self.transform = transform
|
|
1053
1086
|
|
|
1054
|
-
# --- Propagate 'classes' if they exist
|
|
1087
|
+
# --- Propagate 'classes' if they exist ---
|
|
1055
1088
|
self.classes: List[str] = []
|
|
1056
1089
|
|
|
1057
1090
|
def __len__(self):
|
|
@@ -1138,7 +1171,7 @@ class _OD_PairedRandomHorizontalFlip:
|
|
|
1138
1171
|
return image, target
|
|
1139
1172
|
|
|
1140
1173
|
|
|
1141
|
-
class
|
|
1174
|
+
class DragonDatasetObjectDetection:
|
|
1142
1175
|
"""
|
|
1143
1176
|
Creates processed PyTorch datasets for object detection from image
|
|
1144
1177
|
and JSON annotation folders.
|
|
@@ -1151,7 +1184,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1151
1184
|
so this class provides a `collate_fn` to be used with a DataLoader.
|
|
1152
1185
|
|
|
1153
1186
|
Workflow:
|
|
1154
|
-
1. `maker =
|
|
1187
|
+
1. `maker = DragonDatasetObjectDetection.from_folders(img_dir, ann_dir)`
|
|
1155
1188
|
2. `maker.set_class_map({'background': 0, 'person': 1, 'car': 2})`
|
|
1156
1189
|
3. `maker.split_data(val_size=0.2)`
|
|
1157
1190
|
4. `maker.configure_transforms()`
|
|
@@ -1165,7 +1198,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1165
1198
|
"""
|
|
1166
1199
|
Typically not called directly. Use the class method `from_folders()` to create an instance.
|
|
1167
1200
|
"""
|
|
1168
|
-
|
|
1201
|
+
self._train_dataset = None
|
|
1202
|
+
self._test_dataset = None
|
|
1203
|
+
self._val_dataset = None
|
|
1169
1204
|
self.image_paths: List[Path] = []
|
|
1170
1205
|
self.annotation_paths: List[Path] = []
|
|
1171
1206
|
self.class_map: Dict[str, int] = {}
|
|
@@ -1178,7 +1213,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1178
1213
|
self._has_mean_std: bool = False
|
|
1179
1214
|
|
|
1180
1215
|
@classmethod
|
|
1181
|
-
def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> '
|
|
1216
|
+
def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'DragonDatasetObjectDetection':
|
|
1182
1217
|
"""
|
|
1183
1218
|
Creates a maker instance by loading all matching image-annotation pairs
|
|
1184
1219
|
from two corresponding directories.
|
|
@@ -1195,7 +1230,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1195
1230
|
annotation files.
|
|
1196
1231
|
|
|
1197
1232
|
Returns:
|
|
1198
|
-
|
|
1233
|
+
DragonDatasetObjectDetection: A new instance with all pairs loaded.
|
|
1199
1234
|
"""
|
|
1200
1235
|
maker = cls()
|
|
1201
1236
|
img_path_obj = make_fullpath(image_dir, enforce="directory")
|
|
@@ -1242,12 +1277,12 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1242
1277
|
Logs a report of the types, sizes, and channels of image files
|
|
1243
1278
|
found in the directory.
|
|
1244
1279
|
"""
|
|
1245
|
-
|
|
1280
|
+
DragonDatasetVision.inspect_folder(path)
|
|
1246
1281
|
|
|
1247
|
-
def set_class_map(self, class_map: Dict[str, int]) -> '
|
|
1282
|
+
def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetObjectDetection':
|
|
1248
1283
|
"""
|
|
1249
1284
|
Sets a map of class_name -> pixel_value. This is used by the
|
|
1250
|
-
|
|
1285
|
+
trainer for clear evaluation reports.
|
|
1251
1286
|
|
|
1252
1287
|
**Important:** For object detection models, 'background' MUST
|
|
1253
1288
|
be included as class 0.
|
|
@@ -1272,7 +1307,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1272
1307
|
return []
|
|
1273
1308
|
|
|
1274
1309
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
1275
|
-
random_state: Optional[int] = 42) -> '
|
|
1310
|
+
random_state: Optional[int] = 42) -> 'DragonDatasetObjectDetection':
|
|
1276
1311
|
"""
|
|
1277
1312
|
Splits the loaded image-annotation pairs into train, validation, and test sets.
|
|
1278
1313
|
|
|
@@ -1282,7 +1317,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1282
1317
|
random_state (int | None): Seed for reproducible splits.
|
|
1283
1318
|
|
|
1284
1319
|
Returns:
|
|
1285
|
-
|
|
1320
|
+
DragonDatasetObjectDetection: The same instance, now with datasets created.
|
|
1286
1321
|
"""
|
|
1287
1322
|
if self._is_split:
|
|
1288
1323
|
_LOGGER.warning("Data has already been split.")
|
|
@@ -1336,7 +1371,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1336
1371
|
|
|
1337
1372
|
def configure_transforms(self,
|
|
1338
1373
|
mean: Optional[List[float]] = [0.485, 0.456, 0.406],
|
|
1339
|
-
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> '
|
|
1374
|
+
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetObjectDetection':
|
|
1340
1375
|
"""
|
|
1341
1376
|
Configures and applies the image and target transformations.
|
|
1342
1377
|
|
|
@@ -1351,7 +1386,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1351
1386
|
std (List[float] | None): The std dev values for image normalization.
|
|
1352
1387
|
|
|
1353
1388
|
Returns:
|
|
1354
|
-
|
|
1389
|
+
DragonDatasetObjectDetection: The same instance, with transforms applied.
|
|
1355
1390
|
"""
|
|
1356
1391
|
if not self._is_split:
|
|
1357
1392
|
_LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
|
|
@@ -1420,8 +1455,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1420
1455
|
raise RuntimeError()
|
|
1421
1456
|
|
|
1422
1457
|
if self._test_dataset:
|
|
1423
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
1424
|
-
return self._train_dataset, self._val_dataset
|
|
1458
|
+
return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
|
|
1459
|
+
return self._train_dataset, self._val_dataset # type: ignore
|
|
1425
1460
|
|
|
1426
1461
|
@property
|
|
1427
1462
|
def collate_fn(self) -> Callable:
|
|
@@ -1468,8 +1503,21 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1468
1503
|
)
|
|
1469
1504
|
|
|
1470
1505
|
# Save the file
|
|
1471
|
-
|
|
1472
|
-
|
|
1506
|
+
_save_recipe(recipe, file_path)
|
|
1507
|
+
|
|
1508
|
+
def images_per_dataset(self) -> str:
|
|
1509
|
+
"""
|
|
1510
|
+
Get the number of images per dataset as a string.
|
|
1511
|
+
"""
|
|
1512
|
+
if self._is_split:
|
|
1513
|
+
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
1514
|
+
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
1515
|
+
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
1516
|
+
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
|
|
1517
|
+
else:
|
|
1518
|
+
_LOGGER.warning("No datasets found.")
|
|
1519
|
+
return "No datasets found"
|
|
1520
|
+
|
|
1473
1521
|
def __repr__(self) -> str:
|
|
1474
1522
|
s = f"<{self.__class__.__name__}>:\n"
|
|
1475
1523
|
s += f" Total Image-Annotation Pairs: {len(self.image_paths)}\n"
|
|
@@ -1483,7 +1531,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1483
1531
|
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
1484
1532
|
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
1485
1533
|
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
1486
|
-
s += f" Datasets (Train
|
|
1534
|
+
s += f" Datasets (Train|Val|Test): {train_len} | {val_len} | {test_len}\n"
|
|
1487
1535
|
|
|
1488
1536
|
return s
|
|
1489
1537
|
|
ml_tools/ML_vision_evaluation.py
CHANGED
|
@@ -18,7 +18,10 @@ from torchmetrics.detection import MeanAveragePrecision
|
|
|
18
18
|
from .path_manager import make_fullpath
|
|
19
19
|
from ._logger import _LOGGER
|
|
20
20
|
from ._script_info import _script_info
|
|
21
|
-
from .
|
|
21
|
+
from ._keys import VisionKeys
|
|
22
|
+
from .ML_configuration import (BinarySegmentationMetricsFormat,
|
|
23
|
+
MultiClassSegmentationMetricsFormat,
|
|
24
|
+
_BaseSegmentationFormat)
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
__all__ = [
|
|
@@ -26,12 +29,15 @@ __all__ = [
|
|
|
26
29
|
"object_detection_metrics"
|
|
27
30
|
]
|
|
28
31
|
|
|
32
|
+
DPI_value = 250
|
|
33
|
+
|
|
29
34
|
|
|
30
35
|
def segmentation_metrics(
|
|
31
36
|
y_true: np.ndarray,
|
|
32
37
|
y_pred: np.ndarray,
|
|
33
38
|
save_dir: Union[str, Path],
|
|
34
|
-
class_names: Optional[List[str]] = None
|
|
39
|
+
class_names: Optional[List[str]] = None,
|
|
40
|
+
config: Optional[Union[BinarySegmentationMetricsFormat, MultiClassSegmentationMetricsFormat]] = None
|
|
35
41
|
):
|
|
36
42
|
"""
|
|
37
43
|
Calculates and saves pixel-level metrics for segmentation tasks.
|
|
@@ -48,9 +54,20 @@ def segmentation_metrics(
|
|
|
48
54
|
y_pred (np.ndarray): Predicted masks (e.g., shape [N, H, W]).
|
|
49
55
|
save_dir (str | Path): Directory to save the metrics report and plots.
|
|
50
56
|
class_names (List[str] | None): Names of the classes for the report.
|
|
57
|
+
config (object): Formatting configuration object.
|
|
51
58
|
"""
|
|
52
59
|
save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
53
60
|
|
|
61
|
+
# --- Parse Config or use defaults ---
|
|
62
|
+
if config is None:
|
|
63
|
+
format_config = _BaseSegmentationFormat()
|
|
64
|
+
else:
|
|
65
|
+
format_config = config
|
|
66
|
+
|
|
67
|
+
# --- Set Matplotlib font size ---
|
|
68
|
+
original_rc_params = plt.rcParams.copy()
|
|
69
|
+
plt.rcParams.update({'font.size': format_config.font_size})
|
|
70
|
+
|
|
54
71
|
# Get all unique class labels present in either true or pred
|
|
55
72
|
labels = np.unique(np.concatenate((np.unique(y_true), np.unique(y_pred)))).astype(int)
|
|
56
73
|
|
|
@@ -110,7 +127,7 @@ def segmentation_metrics(
|
|
|
110
127
|
report_lines.append(per_class_df.to_string(index=False, float_format="%.4f"))
|
|
111
128
|
|
|
112
129
|
report_string = "\n".join(report_lines)
|
|
113
|
-
print(report_string)
|
|
130
|
+
# print(report_string) # <-- I removed the print(report_string)
|
|
114
131
|
|
|
115
132
|
# Save text report
|
|
116
133
|
save_filename = VisionKeys.SEGMENTATION_REPORT + ".txt"
|
|
@@ -120,11 +137,11 @@ def segmentation_metrics(
|
|
|
120
137
|
|
|
121
138
|
# --- 3. Save Per-Class Metrics Heatmap ---
|
|
122
139
|
try:
|
|
123
|
-
plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=
|
|
140
|
+
plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=DPI_value)
|
|
124
141
|
sns.heatmap(
|
|
125
142
|
per_class_df.set_index('Class').T,
|
|
126
143
|
annot=True,
|
|
127
|
-
cmap=
|
|
144
|
+
cmap=format_config.heatmap_cmap, # Use config cmap
|
|
128
145
|
fmt='.3f',
|
|
129
146
|
linewidths=0.5
|
|
130
147
|
)
|
|
@@ -149,7 +166,11 @@ def segmentation_metrics(
|
|
|
149
166
|
confusion_matrix=cm,
|
|
150
167
|
display_labels=display_names
|
|
151
168
|
)
|
|
152
|
-
disp.plot(cmap=
|
|
169
|
+
disp.plot(cmap=format_config.cm_cmap, ax=ax_cm, xticks_rotation=45) # Use config cmap
|
|
170
|
+
|
|
171
|
+
# Manually update font size of cell texts
|
|
172
|
+
for text in disp.text_.flatten(): # type: ignore
|
|
173
|
+
text.set_fontsize(format_config.font_size)
|
|
153
174
|
|
|
154
175
|
ax_cm.set_title("Pixel-Level Confusion Matrix")
|
|
155
176
|
plt.tight_layout()
|
|
@@ -160,6 +181,9 @@ def segmentation_metrics(
|
|
|
160
181
|
plt.close(fig_cm)
|
|
161
182
|
except Exception as e:
|
|
162
183
|
_LOGGER.error(f"Could not generate confusion matrix: {e}")
|
|
184
|
+
|
|
185
|
+
# --- Restore RC params ---
|
|
186
|
+
plt.rcParams.update(original_rc_params)
|
|
163
187
|
|
|
164
188
|
|
|
165
189
|
def object_detection_metrics(
|