dragon-ml-toolbox 14.8.0__py3-none-any.whl → 16.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/METADATA +9 -5
- dragon_ml_toolbox-16.0.0.dist-info/RECORD +51 -0
- ml_tools/ETL_cleaning.py +20 -20
- ml_tools/ETL_engineering.py +23 -25
- ml_tools/GUI_tools.py +20 -20
- ml_tools/MICE_imputation.py +3 -3
- ml_tools/ML_callbacks.py +43 -26
- ml_tools/ML_configuration.py +204 -11
- ml_tools/ML_datasetmaster.py +198 -280
- ml_tools/ML_evaluation.py +132 -41
- ml_tools/ML_evaluation_multi.py +96 -35
- ml_tools/ML_inference.py +249 -207
- ml_tools/ML_models.py +13 -102
- ml_tools/ML_models_advanced.py +1 -1
- ml_tools/ML_optimization.py +12 -12
- ml_tools/ML_scaler.py +11 -11
- ml_tools/ML_sequence_datasetmaster.py +341 -0
- ml_tools/ML_sequence_evaluation.py +215 -0
- ml_tools/ML_sequence_inference.py +391 -0
- ml_tools/ML_sequence_models.py +139 -0
- ml_tools/ML_trainer.py +1237 -354
- ml_tools/ML_utilities.py +1 -1
- ml_tools/ML_vision_datasetmaster.py +73 -67
- ml_tools/ML_vision_evaluation.py +26 -6
- ml_tools/ML_vision_inference.py +117 -140
- ml_tools/ML_vision_models.py +1 -1
- ml_tools/ML_vision_transformers.py +121 -40
- ml_tools/PSO_optimization.py +6 -6
- ml_tools/SQL.py +4 -4
- ml_tools/{keys.py → _keys.py} +43 -0
- ml_tools/_schema.py +1 -1
- ml_tools/ensemble_evaluation.py +1 -1
- ml_tools/ensemble_inference.py +7 -33
- ml_tools/ensemble_learning.py +1 -1
- ml_tools/optimization_tools.py +2 -2
- ml_tools/path_manager.py +5 -5
- ml_tools/utilities.py +1 -2
- dragon_ml_toolbox-14.8.0.dist-info/RECORD +0 -49
- ml_tools/RNN_forecast.py +0 -56
- ml_tools/_ML_vision_recipe.py +0 -88
- {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-14.8.0.dist-info → dragon_ml_toolbox-16.0.0.dist-info}/top_level.txt +0 -0
ml_tools/ML_utilities.py
CHANGED
|
@@ -7,7 +7,7 @@ from torch import nn
|
|
|
7
7
|
from .path_manager import make_fullpath, list_subdirectories, list_files_by_extension
|
|
8
8
|
from ._script_info import _script_info
|
|
9
9
|
from ._logger import _LOGGER
|
|
10
|
-
from .
|
|
10
|
+
from ._keys import DatasetKeys, PytorchModelArchitectureKeys, PytorchArtifactPathKeys, SHAPKeys, UtilityKeys, PyTorchCheckpointKeys
|
|
11
11
|
from .utilities import load_dataframe
|
|
12
12
|
from .custom_logger import save_list_strings, custom_logger
|
|
13
13
|
from .serde import serialize_object_filename
|
|
@@ -12,25 +12,23 @@ import random
|
|
|
12
12
|
import json
|
|
13
13
|
import inspect
|
|
14
14
|
|
|
15
|
-
from .ML_datasetmaster import _BaseMaker
|
|
16
15
|
from .path_manager import make_fullpath
|
|
17
16
|
from ._logger import _LOGGER
|
|
18
17
|
from ._script_info import _script_info
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .ML_vision_transformers import TRANSFORM_REGISTRY
|
|
18
|
+
from ._keys import VisionTransformRecipeKeys, ObjectDetectionKeys
|
|
19
|
+
from .ML_vision_transformers import TRANSFORM_REGISTRY, _save_recipe
|
|
22
20
|
from .custom_logger import custom_logger
|
|
23
21
|
|
|
24
22
|
|
|
25
23
|
__all__ = [
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
24
|
+
"DragonDatasetVision",
|
|
25
|
+
"DragonDatasetSegmentation",
|
|
26
|
+
"DragonDatasetObjectDetection"
|
|
29
27
|
]
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
# ---
|
|
33
|
-
class
|
|
30
|
+
# --- Vision Maker ---
|
|
31
|
+
class DragonDatasetVision:
|
|
34
32
|
"""
|
|
35
33
|
Creates processed PyTorch datasets for computer vision tasks from an
|
|
36
34
|
image folder directory.
|
|
@@ -45,7 +43,9 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
45
43
|
"""
|
|
46
44
|
Typically not called directly. Use the class methods `from_folder()` or `from_folders()` to create an instance.
|
|
47
45
|
"""
|
|
48
|
-
|
|
46
|
+
self._train_dataset = None
|
|
47
|
+
self._test_dataset = None
|
|
48
|
+
self._val_dataset = None
|
|
49
49
|
self._full_dataset: Optional[ImageFolder] = None
|
|
50
50
|
self.labels: Optional[List[int]] = None
|
|
51
51
|
self.class_map: Optional[dict[str,int]] = None
|
|
@@ -56,7 +56,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
56
56
|
self._has_mean_std: bool = False
|
|
57
57
|
|
|
58
58
|
@classmethod
|
|
59
|
-
def from_folder(cls, root_dir: Union[str,Path]) -> '
|
|
59
|
+
def from_folder(cls, root_dir: Union[str,Path]) -> 'DragonDatasetVision':
|
|
60
60
|
"""
|
|
61
61
|
Creates a maker instance from a single root directory of images.
|
|
62
62
|
|
|
@@ -70,7 +70,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
70
70
|
root_dir (str | Path): The path to the root directory containing class subfolders.
|
|
71
71
|
|
|
72
72
|
Returns:
|
|
73
|
-
|
|
73
|
+
Instance: A new instance with the full dataset loaded.
|
|
74
74
|
"""
|
|
75
75
|
root_path = make_fullpath(root_dir, enforce="directory")
|
|
76
76
|
# Load with NO transform. We get PIL Images.
|
|
@@ -87,7 +87,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
87
87
|
def from_folders(cls,
|
|
88
88
|
train_dir: Union[str,Path],
|
|
89
89
|
val_dir: Union[str,Path],
|
|
90
|
-
test_dir: Optional[Union[str,Path]] = None) -> '
|
|
90
|
+
test_dir: Optional[Union[str,Path]] = None) -> 'DragonDatasetVision':
|
|
91
91
|
"""
|
|
92
92
|
Creates a maker instance from separate, pre-split directories.
|
|
93
93
|
|
|
@@ -101,7 +101,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
101
101
|
test_dir (str | Path | None): Path to the test data directory.
|
|
102
102
|
|
|
103
103
|
Returns:
|
|
104
|
-
|
|
104
|
+
Instance: A new, pre-split instance.
|
|
105
105
|
|
|
106
106
|
Raises:
|
|
107
107
|
ValueError: If the classes found in train, val, or test directories are inconsistent.
|
|
@@ -186,7 +186,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
186
186
|
print(report)
|
|
187
187
|
|
|
188
188
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
189
|
-
stratify: bool = True, random_state: Optional[int] = None) -> '
|
|
189
|
+
stratify: bool = True, random_state: Optional[int] = None) -> 'DragonDatasetVision':
|
|
190
190
|
"""
|
|
191
191
|
Splits the dataset into train, validation, and optional test sets.
|
|
192
192
|
|
|
@@ -202,7 +202,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
202
202
|
random_state (int | None): Seed for the random number generator for reproducible splits.
|
|
203
203
|
|
|
204
204
|
Returns:
|
|
205
|
-
|
|
205
|
+
Self: The same instance, now with datasets split.
|
|
206
206
|
|
|
207
207
|
Raises:
|
|
208
208
|
ValueError: If `val_size` and `test_size` sum to 1.0 or more.
|
|
@@ -249,22 +249,23 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
249
249
|
_LOGGER.info(f"Data split into: \n- Training: {len(self._train_dataset)} images \n- Validation: {len(self._val_dataset)} images")
|
|
250
250
|
return self
|
|
251
251
|
|
|
252
|
-
def configure_transforms(self,
|
|
252
|
+
def configure_transforms(self,
|
|
253
|
+
resize_size: int = 256,
|
|
254
|
+
crop_size: int = 224,
|
|
253
255
|
mean: Optional[List[float]] = [0.485, 0.456, 0.406],
|
|
254
256
|
std: Optional[List[float]] = [0.229, 0.224, 0.225],
|
|
255
257
|
pre_transforms: Optional[List[Callable]] = None,
|
|
256
|
-
extra_train_transforms: Optional[List[Callable]] = None) -> '
|
|
258
|
+
extra_train_transforms: Optional[List[Callable]] = None) -> 'DragonDatasetVision':
|
|
257
259
|
"""
|
|
258
260
|
Configures and applies the image transformations and augmentations.
|
|
259
261
|
|
|
260
262
|
This method must be called AFTER data is loaded and split.
|
|
261
263
|
|
|
262
264
|
It sets up two pipelines:
|
|
263
|
-
1. **Training Pipeline:** Includes random augmentations
|
|
264
|
-
`RandomResizedCrop` and `
|
|
265
|
+
1. **Training Pipeline:** Includes random augmentations:
|
|
266
|
+
`RandomResizedCrop(crop_size)`, `RandomHorizontalFlip(0.5)`, and `RandomRotation(90)` (plus any
|
|
265
267
|
`extra_train_transforms`) for online augmentation.
|
|
266
|
-
2. **Validation/Test Pipeline:** A deterministic pipeline using
|
|
267
|
-
`Resize` and `CenterCrop` for consistent evaluation.
|
|
268
|
+
2. **Validation/Test Pipeline:** A deterministic pipeline using `Resize` and `CenterCrop` for consistent evaluation.
|
|
268
269
|
|
|
269
270
|
Both pipelines finish with `ToTensor` and `Normalize`.
|
|
270
271
|
|
|
@@ -279,7 +280,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
279
280
|
pre_transforms (List[Callable] | None): An list of transforms to be applied at the very beginning of the transformations for all sets.
|
|
280
281
|
|
|
281
282
|
Returns:
|
|
282
|
-
|
|
283
|
+
Self: The same instance, with transforms applied.
|
|
283
284
|
|
|
284
285
|
Raises:
|
|
285
286
|
RuntimeError: If called before data is split.
|
|
@@ -315,8 +316,9 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
315
316
|
|
|
316
317
|
# Base augmentations for training
|
|
317
318
|
base_train_transforms = [
|
|
318
|
-
transforms.RandomResizedCrop(crop_size),
|
|
319
|
-
transforms.RandomHorizontalFlip()
|
|
319
|
+
transforms.RandomResizedCrop(size=crop_size),
|
|
320
|
+
transforms.RandomHorizontalFlip(p=0.5),
|
|
321
|
+
transforms.RandomRotation(degrees=90)
|
|
320
322
|
]
|
|
321
323
|
if extra_train_transforms:
|
|
322
324
|
base_train_transforms.extend(extra_train_transforms)
|
|
@@ -381,8 +383,8 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
381
383
|
_LOGGER.warning("Transforms have not been configured.")
|
|
382
384
|
|
|
383
385
|
if self._test_dataset:
|
|
384
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
385
|
-
return self._train_dataset, self._val_dataset
|
|
386
|
+
return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
|
|
387
|
+
return self._train_dataset, self._val_dataset # type: ignore
|
|
386
388
|
|
|
387
389
|
def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
|
|
388
390
|
"""
|
|
@@ -481,7 +483,7 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
481
483
|
)
|
|
482
484
|
|
|
483
485
|
# 3. Save the file
|
|
484
|
-
|
|
486
|
+
_save_recipe(recipe, file_path)
|
|
485
487
|
|
|
486
488
|
def save_class_map(self, save_dir: Union[str,Path]) -> dict[str,int]:
|
|
487
489
|
"""
|
|
@@ -507,12 +509,12 @@ class VisionDatasetMaker(_BaseMaker):
|
|
|
507
509
|
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
508
510
|
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
509
511
|
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
510
|
-
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images
|
|
512
|
+
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
|
|
511
513
|
elif self._full_dataset:
|
|
512
|
-
return f"Full Dataset: {len(self._full_dataset)} images
|
|
514
|
+
return f"Full Dataset: {len(self._full_dataset)} images"
|
|
513
515
|
else:
|
|
514
516
|
_LOGGER.warning("No datasets found.")
|
|
515
|
-
return "No datasets found
|
|
517
|
+
return "No datasets found"
|
|
516
518
|
|
|
517
519
|
def __repr__(self) -> str:
|
|
518
520
|
s = f"<{self.__class__.__name__}>:\n"
|
|
@@ -688,8 +690,8 @@ class _PairedRandomResizedCrop:
|
|
|
688
690
|
|
|
689
691
|
return cropped_image, cropped_mask # type: ignore
|
|
690
692
|
|
|
691
|
-
# ---
|
|
692
|
-
class
|
|
693
|
+
# --- Segmentation Dataset ---
|
|
694
|
+
class DragonDatasetSegmentation:
|
|
693
695
|
"""
|
|
694
696
|
Creates processed PyTorch datasets for segmentation from image and mask folders.
|
|
695
697
|
|
|
@@ -698,7 +700,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
698
700
|
to both the image and its corresponding mask.
|
|
699
701
|
|
|
700
702
|
Workflow:
|
|
701
|
-
1. `maker =
|
|
703
|
+
1. `maker = DragonDatasetSegmentation.from_folders(img_dir, mask_dir)`
|
|
702
704
|
2. `maker.set_class_map({'background': 0, 'road': 1})`
|
|
703
705
|
3. `maker.split_data(val_size=0.2)`
|
|
704
706
|
4. `maker.configure_transforms(crop_size=256)`
|
|
@@ -710,7 +712,9 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
710
712
|
"""
|
|
711
713
|
Typically not called directly. Use the class method `from_folders()` to create an instance.
|
|
712
714
|
"""
|
|
713
|
-
|
|
715
|
+
self._train_dataset = None
|
|
716
|
+
self._test_dataset = None
|
|
717
|
+
self._val_dataset = None
|
|
714
718
|
self.image_paths: List[Path] = []
|
|
715
719
|
self.mask_paths: List[Path] = []
|
|
716
720
|
self.class_map: Dict[str, int] = {}
|
|
@@ -722,7 +726,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
722
726
|
self._has_mean_std: bool = False
|
|
723
727
|
|
|
724
728
|
@classmethod
|
|
725
|
-
def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> '
|
|
729
|
+
def from_folders(cls, image_dir: Union[str, Path], mask_dir: Union[str, Path]) -> 'DragonDatasetSegmentation':
|
|
726
730
|
"""
|
|
727
731
|
Creates a maker instance by loading all matching image-mask pairs
|
|
728
732
|
from two corresponding directories.
|
|
@@ -735,7 +739,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
735
739
|
mask_dir (str | Path): Path to the directory containing segmentation masks.
|
|
736
740
|
|
|
737
741
|
Returns:
|
|
738
|
-
|
|
742
|
+
DragonDatasetSegmentation: A new instance with all pairs loaded.
|
|
739
743
|
"""
|
|
740
744
|
maker = cls()
|
|
741
745
|
img_path_obj = make_fullpath(image_dir, enforce="directory")
|
|
@@ -795,14 +799,14 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
795
799
|
Logs a report of the types, sizes, and channels of image files
|
|
796
800
|
found in the directory. Useful for checking masks.
|
|
797
801
|
"""
|
|
798
|
-
|
|
802
|
+
DragonDatasetVision.inspect_folder(path)
|
|
799
803
|
|
|
800
|
-
def set_class_map(self, class_map: Dict[str, int]) -> '
|
|
804
|
+
def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetSegmentation':
|
|
801
805
|
"""
|
|
802
|
-
Sets a map of
|
|
806
|
+
Sets a map of class_name -> pixel value. This is used by the Trainer for clear evaluation reports.
|
|
803
807
|
|
|
804
808
|
Args:
|
|
805
|
-
class_map (Dict[
|
|
809
|
+
class_map (Dict[str, int]): A dictionary mapping the integer pixel
|
|
806
810
|
value in a mask to its string name.
|
|
807
811
|
Example: {'background': 0, 'road': 1, 'car': 2}
|
|
808
812
|
"""
|
|
@@ -818,7 +822,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
818
822
|
return []
|
|
819
823
|
|
|
820
824
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
821
|
-
random_state: Optional[int] = 42) -> '
|
|
825
|
+
random_state: Optional[int] = 42) -> 'DragonDatasetSegmentation':
|
|
822
826
|
"""
|
|
823
827
|
Splits the loaded image-mask pairs into train, validation, and test sets.
|
|
824
828
|
|
|
@@ -828,7 +832,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
828
832
|
random_state (int | None): Seed for reproducible splits.
|
|
829
833
|
|
|
830
834
|
Returns:
|
|
831
|
-
|
|
835
|
+
DragonDatasetSegmentation: The same instance, now with datasets created.
|
|
832
836
|
"""
|
|
833
837
|
if self._is_split:
|
|
834
838
|
_LOGGER.warning("Data has already been split.")
|
|
@@ -884,7 +888,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
884
888
|
resize_size: int = 256,
|
|
885
889
|
crop_size: int = 224,
|
|
886
890
|
mean: Optional[List[float]] = [0.485, 0.456, 0.406],
|
|
887
|
-
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> '
|
|
891
|
+
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetSegmentation':
|
|
888
892
|
"""
|
|
889
893
|
Configures and applies the image and mask transformations.
|
|
890
894
|
|
|
@@ -899,7 +903,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
899
903
|
std (List[float] | None): The std dev values for image normalization.
|
|
900
904
|
|
|
901
905
|
Returns:
|
|
902
|
-
|
|
906
|
+
DragonDatasetSegmentation: The same instance, with transforms applied.
|
|
903
907
|
"""
|
|
904
908
|
if not self._is_split:
|
|
905
909
|
_LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
|
|
@@ -976,8 +980,8 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
976
980
|
raise RuntimeError()
|
|
977
981
|
|
|
978
982
|
if self._test_dataset:
|
|
979
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
980
|
-
return self._train_dataset, self._val_dataset
|
|
983
|
+
return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
|
|
984
|
+
return self._train_dataset, self._val_dataset # type: ignore
|
|
981
985
|
|
|
982
986
|
def save_transform_recipe(self, filepath: Union[str, Path]) -> None:
|
|
983
987
|
"""
|
|
@@ -1021,7 +1025,7 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
1021
1025
|
)
|
|
1022
1026
|
|
|
1023
1027
|
# Save the file
|
|
1024
|
-
|
|
1028
|
+
_save_recipe(recipe, file_path)
|
|
1025
1029
|
|
|
1026
1030
|
def images_per_dataset(self) -> str:
|
|
1027
1031
|
"""
|
|
@@ -1031,10 +1035,10 @@ class SegmentationDatasetMaker(_BaseMaker):
|
|
|
1031
1035
|
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
1032
1036
|
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
1033
1037
|
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
1034
|
-
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images
|
|
1038
|
+
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
|
|
1035
1039
|
else:
|
|
1036
1040
|
_LOGGER.warning("No datasets found.")
|
|
1037
|
-
return "No datasets found
|
|
1041
|
+
return "No datasets found"
|
|
1038
1042
|
|
|
1039
1043
|
def __repr__(self) -> str:
|
|
1040
1044
|
s = f"<{self.__class__.__name__}>:\n"
|
|
@@ -1079,7 +1083,7 @@ class _ObjectDetectionDataset(Dataset):
|
|
|
1079
1083
|
self.annotation_paths = annotation_paths
|
|
1080
1084
|
self.transform = transform
|
|
1081
1085
|
|
|
1082
|
-
# --- Propagate 'classes' if they exist
|
|
1086
|
+
# --- Propagate 'classes' if they exist ---
|
|
1083
1087
|
self.classes: List[str] = []
|
|
1084
1088
|
|
|
1085
1089
|
def __len__(self):
|
|
@@ -1166,7 +1170,7 @@ class _OD_PairedRandomHorizontalFlip:
|
|
|
1166
1170
|
return image, target
|
|
1167
1171
|
|
|
1168
1172
|
|
|
1169
|
-
class
|
|
1173
|
+
class DragonDatasetObjectDetection:
|
|
1170
1174
|
"""
|
|
1171
1175
|
Creates processed PyTorch datasets for object detection from image
|
|
1172
1176
|
and JSON annotation folders.
|
|
@@ -1179,7 +1183,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1179
1183
|
so this class provides a `collate_fn` to be used with a DataLoader.
|
|
1180
1184
|
|
|
1181
1185
|
Workflow:
|
|
1182
|
-
1. `maker =
|
|
1186
|
+
1. `maker = DragonDatasetObjectDetection.from_folders(img_dir, ann_dir)`
|
|
1183
1187
|
2. `maker.set_class_map({'background': 0, 'person': 1, 'car': 2})`
|
|
1184
1188
|
3. `maker.split_data(val_size=0.2)`
|
|
1185
1189
|
4. `maker.configure_transforms()`
|
|
@@ -1193,7 +1197,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1193
1197
|
"""
|
|
1194
1198
|
Typically not called directly. Use the class method `from_folders()` to create an instance.
|
|
1195
1199
|
"""
|
|
1196
|
-
|
|
1200
|
+
self._train_dataset = None
|
|
1201
|
+
self._test_dataset = None
|
|
1202
|
+
self._val_dataset = None
|
|
1197
1203
|
self.image_paths: List[Path] = []
|
|
1198
1204
|
self.annotation_paths: List[Path] = []
|
|
1199
1205
|
self.class_map: Dict[str, int] = {}
|
|
@@ -1206,7 +1212,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1206
1212
|
self._has_mean_std: bool = False
|
|
1207
1213
|
|
|
1208
1214
|
@classmethod
|
|
1209
|
-
def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> '
|
|
1215
|
+
def from_folders(cls, image_dir: Union[str, Path], annotation_dir: Union[str, Path]) -> 'DragonDatasetObjectDetection':
|
|
1210
1216
|
"""
|
|
1211
1217
|
Creates a maker instance by loading all matching image-annotation pairs
|
|
1212
1218
|
from two corresponding directories.
|
|
@@ -1223,7 +1229,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1223
1229
|
annotation files.
|
|
1224
1230
|
|
|
1225
1231
|
Returns:
|
|
1226
|
-
|
|
1232
|
+
DragonDatasetObjectDetection: A new instance with all pairs loaded.
|
|
1227
1233
|
"""
|
|
1228
1234
|
maker = cls()
|
|
1229
1235
|
img_path_obj = make_fullpath(image_dir, enforce="directory")
|
|
@@ -1270,9 +1276,9 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1270
1276
|
Logs a report of the types, sizes, and channels of image files
|
|
1271
1277
|
found in the directory.
|
|
1272
1278
|
"""
|
|
1273
|
-
|
|
1279
|
+
DragonDatasetVision.inspect_folder(path)
|
|
1274
1280
|
|
|
1275
|
-
def set_class_map(self, class_map: Dict[str, int]) -> '
|
|
1281
|
+
def set_class_map(self, class_map: Dict[str, int]) -> 'DragonDatasetObjectDetection':
|
|
1276
1282
|
"""
|
|
1277
1283
|
Sets a map of class_name -> pixel_value. This is used by the
|
|
1278
1284
|
MLTrainer for clear evaluation reports.
|
|
@@ -1300,7 +1306,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1300
1306
|
return []
|
|
1301
1307
|
|
|
1302
1308
|
def split_data(self, val_size: float = 0.2, test_size: float = 0.0,
|
|
1303
|
-
random_state: Optional[int] = 42) -> '
|
|
1309
|
+
random_state: Optional[int] = 42) -> 'DragonDatasetObjectDetection':
|
|
1304
1310
|
"""
|
|
1305
1311
|
Splits the loaded image-annotation pairs into train, validation, and test sets.
|
|
1306
1312
|
|
|
@@ -1310,7 +1316,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1310
1316
|
random_state (int | None): Seed for reproducible splits.
|
|
1311
1317
|
|
|
1312
1318
|
Returns:
|
|
1313
|
-
|
|
1319
|
+
DragonDatasetObjectDetection: The same instance, now with datasets created.
|
|
1314
1320
|
"""
|
|
1315
1321
|
if self._is_split:
|
|
1316
1322
|
_LOGGER.warning("Data has already been split.")
|
|
@@ -1364,7 +1370,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1364
1370
|
|
|
1365
1371
|
def configure_transforms(self,
|
|
1366
1372
|
mean: Optional[List[float]] = [0.485, 0.456, 0.406],
|
|
1367
|
-
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> '
|
|
1373
|
+
std: Optional[List[float]] = [0.229, 0.224, 0.225]) -> 'DragonDatasetObjectDetection':
|
|
1368
1374
|
"""
|
|
1369
1375
|
Configures and applies the image and target transformations.
|
|
1370
1376
|
|
|
@@ -1379,7 +1385,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1379
1385
|
std (List[float] | None): The std dev values for image normalization.
|
|
1380
1386
|
|
|
1381
1387
|
Returns:
|
|
1382
|
-
|
|
1388
|
+
DragonDatasetObjectDetection: The same instance, with transforms applied.
|
|
1383
1389
|
"""
|
|
1384
1390
|
if not self._is_split:
|
|
1385
1391
|
_LOGGER.error("Transforms must be configured AFTER splitting data. Call .split_data() first.")
|
|
@@ -1448,8 +1454,8 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1448
1454
|
raise RuntimeError()
|
|
1449
1455
|
|
|
1450
1456
|
if self._test_dataset:
|
|
1451
|
-
return self._train_dataset, self._val_dataset, self._test_dataset
|
|
1452
|
-
return self._train_dataset, self._val_dataset
|
|
1457
|
+
return self._train_dataset, self._val_dataset, self._test_dataset # type: ignore
|
|
1458
|
+
return self._train_dataset, self._val_dataset # type: ignore
|
|
1453
1459
|
|
|
1454
1460
|
@property
|
|
1455
1461
|
def collate_fn(self) -> Callable:
|
|
@@ -1496,7 +1502,7 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1496
1502
|
)
|
|
1497
1503
|
|
|
1498
1504
|
# Save the file
|
|
1499
|
-
|
|
1505
|
+
_save_recipe(recipe, file_path)
|
|
1500
1506
|
|
|
1501
1507
|
def images_per_dataset(self) -> str:
|
|
1502
1508
|
"""
|
|
@@ -1506,10 +1512,10 @@ class ObjectDetectionDatasetMaker(_BaseMaker):
|
|
|
1506
1512
|
train_len = len(self._train_dataset) if self._train_dataset else 0
|
|
1507
1513
|
val_len = len(self._val_dataset) if self._val_dataset else 0
|
|
1508
1514
|
test_len = len(self._test_dataset) if self._test_dataset else 0
|
|
1509
|
-
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images
|
|
1515
|
+
return f"Train | Validation | Test: {train_len} | {val_len} | {test_len} images"
|
|
1510
1516
|
else:
|
|
1511
1517
|
_LOGGER.warning("No datasets found.")
|
|
1512
|
-
return "No datasets found
|
|
1518
|
+
return "No datasets found"
|
|
1513
1519
|
|
|
1514
1520
|
def __repr__(self) -> str:
|
|
1515
1521
|
s = f"<{self.__class__.__name__}>:\n"
|
ml_tools/ML_vision_evaluation.py
CHANGED
|
@@ -18,7 +18,8 @@ from torchmetrics.detection import MeanAveragePrecision
|
|
|
18
18
|
from .path_manager import make_fullpath
|
|
19
19
|
from ._logger import _LOGGER
|
|
20
20
|
from ._script_info import _script_info
|
|
21
|
-
from .
|
|
21
|
+
from ._keys import VisionKeys
|
|
22
|
+
from .ML_configuration import SegmentationMetricsFormat
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
__all__ = [
|
|
@@ -26,12 +27,15 @@ __all__ = [
|
|
|
26
27
|
"object_detection_metrics"
|
|
27
28
|
]
|
|
28
29
|
|
|
30
|
+
DPI_value = 250
|
|
31
|
+
|
|
29
32
|
|
|
30
33
|
def segmentation_metrics(
|
|
31
34
|
y_true: np.ndarray,
|
|
32
35
|
y_pred: np.ndarray,
|
|
33
36
|
save_dir: Union[str, Path],
|
|
34
|
-
class_names: Optional[List[str]] = None
|
|
37
|
+
class_names: Optional[List[str]] = None,
|
|
38
|
+
config: Optional[SegmentationMetricsFormat] = None # Add config object
|
|
35
39
|
):
|
|
36
40
|
"""
|
|
37
41
|
Calculates and saves pixel-level metrics for segmentation tasks.
|
|
@@ -48,9 +52,18 @@ def segmentation_metrics(
|
|
|
48
52
|
y_pred (np.ndarray): Predicted masks (e.g., shape [N, H, W]).
|
|
49
53
|
save_dir (str | Path): Directory to save the metrics report and plots.
|
|
50
54
|
class_names (List[str] | None): Names of the classes for the report.
|
|
55
|
+
config (SegmentationMetricsFormat, optional): Formatting configuration object.
|
|
51
56
|
"""
|
|
52
57
|
save_dir_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
53
58
|
|
|
59
|
+
# --- Parse Config or use defaults ---
|
|
60
|
+
if config is None:
|
|
61
|
+
config = SegmentationMetricsFormat()
|
|
62
|
+
|
|
63
|
+
# --- Set Matplotlib font size ---
|
|
64
|
+
original_rc_params = plt.rcParams.copy()
|
|
65
|
+
plt.rcParams.update({'font.size': config.font_size})
|
|
66
|
+
|
|
54
67
|
# Get all unique class labels present in either true or pred
|
|
55
68
|
labels = np.unique(np.concatenate((np.unique(y_true), np.unique(y_pred)))).astype(int)
|
|
56
69
|
|
|
@@ -110,7 +123,7 @@ def segmentation_metrics(
|
|
|
110
123
|
report_lines.append(per_class_df.to_string(index=False, float_format="%.4f"))
|
|
111
124
|
|
|
112
125
|
report_string = "\n".join(report_lines)
|
|
113
|
-
print(report_string)
|
|
126
|
+
# print(report_string) # <-- I removed the print(report_string)
|
|
114
127
|
|
|
115
128
|
# Save text report
|
|
116
129
|
save_filename = VisionKeys.SEGMENTATION_REPORT + ".txt"
|
|
@@ -120,11 +133,11 @@ def segmentation_metrics(
|
|
|
120
133
|
|
|
121
134
|
# --- 3. Save Per-Class Metrics Heatmap ---
|
|
122
135
|
try:
|
|
123
|
-
plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=
|
|
136
|
+
plt.figure(figsize=(max(8, len(labels) * 0.5), 6), dpi=DPI_value)
|
|
124
137
|
sns.heatmap(
|
|
125
138
|
per_class_df.set_index('Class').T,
|
|
126
139
|
annot=True,
|
|
127
|
-
cmap=
|
|
140
|
+
cmap=config.heatmap_cmap, # Use config cmap
|
|
128
141
|
fmt='.3f',
|
|
129
142
|
linewidths=0.5
|
|
130
143
|
)
|
|
@@ -149,7 +162,11 @@ def segmentation_metrics(
|
|
|
149
162
|
confusion_matrix=cm,
|
|
150
163
|
display_labels=display_names
|
|
151
164
|
)
|
|
152
|
-
disp.plot(cmap=
|
|
165
|
+
disp.plot(cmap=config.cm_cmap, ax=ax_cm, xticks_rotation=45) # Use config cmap
|
|
166
|
+
|
|
167
|
+
# Manually update font size of cell texts
|
|
168
|
+
for text in disp.text_.flatten(): # type: ignore
|
|
169
|
+
text.set_fontsize(config.font_size)
|
|
153
170
|
|
|
154
171
|
ax_cm.set_title("Pixel-Level Confusion Matrix")
|
|
155
172
|
plt.tight_layout()
|
|
@@ -160,6 +177,9 @@ def segmentation_metrics(
|
|
|
160
177
|
plt.close(fig_cm)
|
|
161
178
|
except Exception as e:
|
|
162
179
|
_LOGGER.error(f"Could not generate confusion matrix: {e}")
|
|
180
|
+
|
|
181
|
+
# --- Restore RC params ---
|
|
182
|
+
plt.rcParams.update(original_rc_params)
|
|
163
183
|
|
|
164
184
|
|
|
165
185
|
def object_detection_metrics(
|