dragon-ml-toolbox 13.1.0__py3-none-any.whl → 14.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/METADATA +11 -2
- dragon_ml_toolbox-14.3.1.dist-info/RECORD +48 -0
- {dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +10 -0
- ml_tools/MICE_imputation.py +207 -5
- ml_tools/ML_datasetmaster.py +63 -205
- ml_tools/ML_evaluation.py +23 -15
- ml_tools/ML_evaluation_multi.py +5 -6
- ml_tools/ML_inference.py +0 -1
- ml_tools/ML_models.py +22 -6
- ml_tools/ML_models_advanced.py +323 -0
- ml_tools/ML_trainer.py +463 -20
- ml_tools/ML_utilities.py +302 -4
- ml_tools/ML_vision_datasetmaster.py +1395 -0
- ml_tools/ML_vision_evaluation.py +260 -0
- ml_tools/ML_vision_inference.py +428 -0
- ml_tools/ML_vision_models.py +627 -0
- ml_tools/ML_vision_transformers.py +58 -0
- ml_tools/_ML_vision_recipe.py +88 -0
- ml_tools/__init__.py +1 -0
- ml_tools/_schema.py +79 -2
- ml_tools/custom_logger.py +37 -14
- ml_tools/data_exploration.py +502 -93
- ml_tools/keys.py +42 -1
- ml_tools/math_utilities.py +1 -1
- ml_tools/serde.py +77 -15
- ml_tools/utilities.py +192 -3
- dragon_ml_toolbox-13.1.0.dist-info/RECORD +0 -41
- {dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-13.1.0.dist-info → dragon_ml_toolbox-14.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import torch
|
|
3
|
+
from torchvision import transforms
|
|
4
|
+
from typing import Dict, Any, List, Callable, Union
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .ML_vision_transformers import TRANSFORM_REGISTRY
|
|
8
|
+
from ._logger import _LOGGER
|
|
9
|
+
from .keys import VisionTransformRecipeKeys
|
|
10
|
+
from .path_manager import make_fullpath
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def save_recipe(recipe: Dict[str, Any], filepath: Path) -> None:
|
|
14
|
+
"""
|
|
15
|
+
Saves a transform recipe dictionary to a JSON file.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
recipe (Dict[str, Any]): The recipe dictionary to save.
|
|
19
|
+
filepath (str): The path to the output .json file.
|
|
20
|
+
"""
|
|
21
|
+
final_filepath = filepath.with_suffix(".json")
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
with open(final_filepath, 'w') as f:
|
|
25
|
+
json.dump(recipe, f, indent=4)
|
|
26
|
+
_LOGGER.info(f"Transform recipe saved as '{final_filepath.name}'.")
|
|
27
|
+
except Exception as e:
|
|
28
|
+
_LOGGER.error(f"Failed to save recipe to '{final_filepath}': {e}")
|
|
29
|
+
raise
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_recipe_and_build_transform(filepath: Union[str,Path]) -> transforms.Compose:
|
|
33
|
+
"""
|
|
34
|
+
Loads a transform recipe from a .json file and reconstructs the
|
|
35
|
+
torchvision.transforms.Compose pipeline.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
filepath (str): Path to the saved transform recipe .json file.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
transforms.Compose: The reconstructed transformation pipeline.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If a transform name in the recipe is not found in
|
|
45
|
+
torchvision.transforms or the custom TRANSFORM_REGISTRY.
|
|
46
|
+
"""
|
|
47
|
+
# validate filepath
|
|
48
|
+
final_filepath = make_fullpath(filepath, enforce="file")
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
with open(final_filepath, 'r') as f:
|
|
52
|
+
recipe = json.load(f)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
_LOGGER.error(f"Failed to load recipe from '{final_filepath}': {e}")
|
|
55
|
+
raise
|
|
56
|
+
|
|
57
|
+
pipeline_steps: List[Callable] = []
|
|
58
|
+
|
|
59
|
+
if VisionTransformRecipeKeys.PIPELINE not in recipe:
|
|
60
|
+
_LOGGER.error("Recipe file is invalid: missing 'pipeline' key.")
|
|
61
|
+
raise ValueError("Invalid recipe format.")
|
|
62
|
+
|
|
63
|
+
for step in recipe[VisionTransformRecipeKeys.PIPELINE]:
|
|
64
|
+
t_name = step[VisionTransformRecipeKeys.NAME]
|
|
65
|
+
t_kwargs = step[VisionTransformRecipeKeys.KWARGS]
|
|
66
|
+
|
|
67
|
+
transform_class: Any = None
|
|
68
|
+
|
|
69
|
+
# 1. Check standard torchvision transforms
|
|
70
|
+
if hasattr(transforms, t_name):
|
|
71
|
+
transform_class = getattr(transforms, t_name)
|
|
72
|
+
# 2. Check custom transforms
|
|
73
|
+
elif t_name in TRANSFORM_REGISTRY:
|
|
74
|
+
transform_class = TRANSFORM_REGISTRY[t_name]
|
|
75
|
+
# 3. Not found
|
|
76
|
+
else:
|
|
77
|
+
_LOGGER.error(f"Unknown transform '{t_name}' in recipe. Not found in torchvision.transforms or TRANSFORM_REGISTRY.")
|
|
78
|
+
raise ValueError(f"Unknown transform name: {t_name}")
|
|
79
|
+
|
|
80
|
+
# Instantiate the transform
|
|
81
|
+
try:
|
|
82
|
+
pipeline_steps.append(transform_class(**t_kwargs))
|
|
83
|
+
except Exception as e:
|
|
84
|
+
_LOGGER.error(f"Failed to instantiate transform '{t_name}' with kwargs {t_kwargs}: {e}")
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
_LOGGER.info(f"Successfully loaded and built transform pipeline from '{final_filepath.name}'.")
|
|
88
|
+
return transforms.Compose(pipeline_steps)
|
ml_tools/__init__.py
CHANGED
ml_tools/_schema.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
from typing import NamedTuple, Tuple, Optional, Dict
|
|
1
|
+
from typing import NamedTuple, Tuple, Optional, Dict, Union
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .custom_logger import save_list_strings
|
|
5
|
+
from .keys import DatasetKeys
|
|
6
|
+
from ._logger import _LOGGER
|
|
7
|
+
|
|
2
8
|
|
|
3
9
|
class FeatureSchema(NamedTuple):
|
|
4
10
|
"""Holds the final, definitive schema for the model pipeline."""
|
|
@@ -15,5 +21,76 @@ class FeatureSchema(NamedTuple):
|
|
|
15
21
|
# Map of {column_index: cardinality} for categorical features
|
|
16
22
|
categorical_index_map: Optional[Dict[int, int]]
|
|
17
23
|
|
|
18
|
-
#
|
|
24
|
+
# Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
|
|
19
25
|
categorical_mappings: Optional[Dict[str, Dict[str, int]]]
|
|
26
|
+
|
|
27
|
+
def _save_helper(self, artifact: Tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
|
|
28
|
+
to_save = list(artifact)
|
|
29
|
+
|
|
30
|
+
# empty check
|
|
31
|
+
if not to_save:
|
|
32
|
+
_LOGGER.warning(f"Skipping save for '{filename}': The feature list is empty.")
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
save_list_strings(list_strings=to_save,
|
|
36
|
+
directory=directory,
|
|
37
|
+
filename=filename,
|
|
38
|
+
verbose=verbose)
|
|
39
|
+
|
|
40
|
+
def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
41
|
+
"""
|
|
42
|
+
Saves all feature names to a text file.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
directory: The directory where the file will be saved.
|
|
46
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
47
|
+
"""
|
|
48
|
+
self._save_helper(artifact=self.feature_names,
|
|
49
|
+
directory=directory,
|
|
50
|
+
filename=DatasetKeys.FEATURE_NAMES,
|
|
51
|
+
verbose=verbose)
|
|
52
|
+
|
|
53
|
+
def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
54
|
+
"""
|
|
55
|
+
Saves continuous feature names to a text file.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
directory: The directory where the file will be saved.
|
|
59
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
60
|
+
"""
|
|
61
|
+
self._save_helper(artifact=self.continuous_feature_names,
|
|
62
|
+
directory=directory,
|
|
63
|
+
filename=DatasetKeys.CONTINUOUS_NAMES,
|
|
64
|
+
verbose=verbose)
|
|
65
|
+
|
|
66
|
+
def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
67
|
+
"""
|
|
68
|
+
Saves categorical feature names to a text file.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
directory: The directory where the file will be saved.
|
|
72
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
73
|
+
"""
|
|
74
|
+
self._save_helper(artifact=self.categorical_feature_names,
|
|
75
|
+
directory=directory,
|
|
76
|
+
filename=DatasetKeys.CATEGORICAL_NAMES,
|
|
77
|
+
verbose=verbose)
|
|
78
|
+
|
|
79
|
+
def save_artifacts(self, directory: Union[str,Path]):
|
|
80
|
+
"""
|
|
81
|
+
Saves feature names, categorical feature names, continuous feature names to separate text files.
|
|
82
|
+
"""
|
|
83
|
+
self.save_all_features(directory=directory, verbose=True)
|
|
84
|
+
self.save_continuous_features(directory=directory, verbose=True)
|
|
85
|
+
self.save_categorical_features(directory=directory, verbose=True)
|
|
86
|
+
|
|
87
|
+
def __repr__(self) -> str:
|
|
88
|
+
"""Returns a concise representation of the schema's contents."""
|
|
89
|
+
total = len(self.feature_names)
|
|
90
|
+
cont = len(self.continuous_feature_names)
|
|
91
|
+
cat = len(self.categorical_feature_names)
|
|
92
|
+
index_map = self.categorical_index_map is not None
|
|
93
|
+
cat_map = self.categorical_mappings is not None
|
|
94
|
+
return (
|
|
95
|
+
f"<FeatureSchema(total={total}, continuous={cont}, categorical={cat}, index_map={index_map}, categorical_map={cat_map})>"
|
|
96
|
+
)
|
ml_tools/custom_logger.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from typing import Union, List, Dict, Any
|
|
3
|
+
from typing import Union, List, Dict, Any, Literal
|
|
4
4
|
import traceback
|
|
5
5
|
import json
|
|
6
6
|
import csv
|
|
@@ -29,6 +29,8 @@ def custom_logger(
|
|
|
29
29
|
],
|
|
30
30
|
save_directory: Union[str, Path],
|
|
31
31
|
log_name: str,
|
|
32
|
+
add_timestamp: bool=True,
|
|
33
|
+
dict_as: Literal['auto', 'json', 'csv'] = 'auto',
|
|
32
34
|
) -> None:
|
|
33
35
|
"""
|
|
34
36
|
Logs various data types to corresponding output formats:
|
|
@@ -36,10 +38,10 @@ def custom_logger(
|
|
|
36
38
|
- list[Any] → .txt
|
|
37
39
|
Each element is written on a new line.
|
|
38
40
|
|
|
39
|
-
- dict[str, list[Any]] → .csv
|
|
41
|
+
- dict[str, list[Any]] → .csv (if dict_as='auto' or 'csv')
|
|
40
42
|
Dictionary is treated as tabular data; keys become columns, values become rows.
|
|
41
43
|
|
|
42
|
-
- dict[str, scalar] → .json
|
|
44
|
+
- dict[str, scalar] → .json (if dict_as='auto' or 'json')
|
|
43
45
|
Dictionary is treated as structured data and serialized as JSON.
|
|
44
46
|
|
|
45
47
|
- str → .log
|
|
@@ -49,29 +51,50 @@ def custom_logger(
|
|
|
49
51
|
Full traceback is logged for debugging purposes.
|
|
50
52
|
|
|
51
53
|
Args:
|
|
52
|
-
data: The data to be logged. Must be one of the supported types.
|
|
53
|
-
save_directory: Directory where the log will be saved. Created if it does not exist.
|
|
54
|
-
log_name: Base name for the log file.
|
|
54
|
+
data (Any): The data to be logged. Must be one of the supported types.
|
|
55
|
+
save_directory (str | Path): Directory where the log will be saved. Created if it does not exist.
|
|
56
|
+
log_name (str): Base name for the log file.
|
|
57
|
+
add_timestamp (bool): Whether to add a timestamp to the filename.
|
|
58
|
+
dict_as ('auto'|'json'|'csv'):
|
|
59
|
+
- 'auto': Guesses format (JSON or CSV) based on dictionary content.
|
|
60
|
+
- 'json': Forces .json format for any dictionary.
|
|
61
|
+
- 'csv': Forces .csv format. Will fail if dict values are not all lists.
|
|
55
62
|
|
|
56
63
|
Raises:
|
|
57
64
|
ValueError: If the data type is unsupported.
|
|
58
65
|
"""
|
|
59
66
|
try:
|
|
67
|
+
if not isinstance(data, BaseException) and not data:
|
|
68
|
+
_LOGGER.warning("Empty data received. No log file will be saved.")
|
|
69
|
+
return
|
|
70
|
+
|
|
60
71
|
save_path = make_fullpath(save_directory, make=True)
|
|
61
72
|
|
|
62
|
-
|
|
63
|
-
log_name = sanitize_filename(log_name)
|
|
73
|
+
sanitized_log_name = sanitize_filename(log_name)
|
|
64
74
|
|
|
65
|
-
|
|
66
|
-
|
|
75
|
+
if add_timestamp:
|
|
76
|
+
timestamp = datetime.now().strftime(r"%Y%m%d_%H%M%S")
|
|
77
|
+
base_path = save_path / f"{sanitized_log_name}_{timestamp}"
|
|
78
|
+
else:
|
|
79
|
+
base_path = save_path / sanitized_log_name
|
|
80
|
+
|
|
81
|
+
# Router
|
|
67
82
|
if isinstance(data, list):
|
|
68
83
|
_log_list_to_txt(data, base_path.with_suffix(".txt"))
|
|
69
84
|
|
|
70
85
|
elif isinstance(data, dict):
|
|
71
|
-
if
|
|
72
|
-
_log_dict_to_csv(data, base_path.with_suffix(".csv"))
|
|
73
|
-
else:
|
|
86
|
+
if dict_as == 'json':
|
|
74
87
|
_log_dict_to_json(data, base_path.with_suffix(".json"))
|
|
88
|
+
|
|
89
|
+
elif dict_as == 'csv':
|
|
90
|
+
# This will raise a ValueError if data is not all lists
|
|
91
|
+
_log_dict_to_csv(data, base_path.with_suffix(".csv"))
|
|
92
|
+
|
|
93
|
+
else: # 'auto' mode
|
|
94
|
+
if all(isinstance(v, list) for v in data.values()):
|
|
95
|
+
_log_dict_to_csv(data, base_path.with_suffix(".csv"))
|
|
96
|
+
else:
|
|
97
|
+
_log_dict_to_json(data, base_path.with_suffix(".json"))
|
|
75
98
|
|
|
76
99
|
elif isinstance(data, str):
|
|
77
100
|
_log_string_to_log(data, base_path.with_suffix(".log"))
|
|
@@ -83,7 +106,7 @@ def custom_logger(
|
|
|
83
106
|
_LOGGER.error("Unsupported data type. Must be list, dict, str, or BaseException.")
|
|
84
107
|
raise ValueError()
|
|
85
108
|
|
|
86
|
-
_LOGGER.info(f"Log saved
|
|
109
|
+
_LOGGER.info(f"Log saved as: '{base_path.name}'")
|
|
87
110
|
|
|
88
111
|
except Exception:
|
|
89
112
|
_LOGGER.exception(f"Log not saved.")
|