dragon-ml-toolbox 10.1.1__py3-none-any.whl → 14.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/METADATA +38 -63
- dragon_ml_toolbox-14.2.0.dist-info/RECORD +48 -0
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE +1 -1
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +11 -0
- ml_tools/ETL_cleaning.py +175 -59
- ml_tools/ETL_engineering.py +506 -70
- ml_tools/GUI_tools.py +2 -1
- ml_tools/MICE_imputation.py +212 -7
- ml_tools/ML_callbacks.py +73 -40
- ml_tools/ML_datasetmaster.py +267 -284
- ml_tools/ML_evaluation.py +119 -58
- ml_tools/ML_evaluation_multi.py +107 -32
- ml_tools/ML_inference.py +15 -5
- ml_tools/ML_models.py +234 -170
- ml_tools/ML_models_advanced.py +323 -0
- ml_tools/ML_optimization.py +321 -97
- ml_tools/ML_scaler.py +10 -5
- ml_tools/ML_trainer.py +585 -40
- ml_tools/ML_utilities.py +528 -0
- ml_tools/ML_vision_datasetmaster.py +1315 -0
- ml_tools/ML_vision_evaluation.py +260 -0
- ml_tools/ML_vision_inference.py +428 -0
- ml_tools/ML_vision_models.py +627 -0
- ml_tools/ML_vision_transformers.py +58 -0
- ml_tools/PSO_optimization.py +10 -7
- ml_tools/RNN_forecast.py +2 -0
- ml_tools/SQL.py +22 -9
- ml_tools/VIF_factor.py +4 -3
- ml_tools/_ML_vision_recipe.py +88 -0
- ml_tools/__init__.py +1 -0
- ml_tools/_logger.py +0 -2
- ml_tools/_schema.py +96 -0
- ml_tools/constants.py +79 -0
- ml_tools/custom_logger.py +164 -16
- ml_tools/data_exploration.py +1092 -109
- ml_tools/ensemble_evaluation.py +48 -1
- ml_tools/ensemble_inference.py +6 -7
- ml_tools/ensemble_learning.py +4 -3
- ml_tools/handle_excel.py +1 -0
- ml_tools/keys.py +80 -0
- ml_tools/math_utilities.py +259 -0
- ml_tools/optimization_tools.py +198 -24
- ml_tools/path_manager.py +144 -45
- ml_tools/serde.py +192 -0
- ml_tools/utilities.py +287 -227
- dragon_ml_toolbox-10.1.1.dist-info/RECORD +0 -36
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/top_level.txt +0 -0
ml_tools/serde.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import joblib
|
|
2
|
+
from joblib.externals.loky.process_executor import TerminatedWorkerError
|
|
3
|
+
from typing import Any, Union, TypeVar, get_origin, Type, Optional
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .path_manager import make_fullpath, sanitize_filename
|
|
7
|
+
from ._script_info import _script_info
|
|
8
|
+
from ._logger import _LOGGER
|
|
9
|
+
from ._schema import FeatureSchema
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"serialize_object_filename",
|
|
14
|
+
"serialize_object",
|
|
15
|
+
"deserialize_object",
|
|
16
|
+
"serialize_schema",
|
|
17
|
+
"deserialize_schema"
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Base types that have a generic `type()` log.
|
|
22
|
+
_SIMPLE_TYPES = (list, dict, tuple, set, str, int, float, bool)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
|
|
26
|
+
"""
|
|
27
|
+
Serializes a Python object using joblib; suitable for Python built-ins, numpy, and pandas.
|
|
28
|
+
|
|
29
|
+
Parameters:
|
|
30
|
+
obj (Any) : The Python object to serialize.
|
|
31
|
+
save_dir (str | Path) : Directory path where the serialized object will be saved.
|
|
32
|
+
filename (str) : Name for the output file, extension will be appended if needed.
|
|
33
|
+
"""
|
|
34
|
+
if obj is None:
|
|
35
|
+
_LOGGER.warning(f"Attempted to serialize a None object. Skipping save for '{filename}'.")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
save_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
40
|
+
sanitized_name = sanitize_filename(filename)
|
|
41
|
+
full_path = save_path / sanitized_name
|
|
42
|
+
except (IOError, OSError, TypeError) as e:
|
|
43
|
+
_LOGGER.error(f"Failed to construct save path from dir='{save_dir}' and filename='{filename}'. Error: {e}")
|
|
44
|
+
if raise_on_error:
|
|
45
|
+
raise e
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# call serialize_object with the fully constructed path.
|
|
49
|
+
serialize_object(obj=obj,
|
|
50
|
+
file_path=full_path,
|
|
51
|
+
verbose=verbose,
|
|
52
|
+
raise_on_error=raise_on_error)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Serializes a Python object using joblib to a specific file path.
|
|
58
|
+
|
|
59
|
+
Suitable for Python built-ins, numpy, and pandas.
|
|
60
|
+
|
|
61
|
+
Parameters:
|
|
62
|
+
obj (Any) : The Python object to serialize.
|
|
63
|
+
file_path (Path) : The full file path to save the object to.
|
|
64
|
+
'.joblib' extension will be appended if missing.
|
|
65
|
+
raise_on_error (bool) : If True, raises exceptions on failure.
|
|
66
|
+
"""
|
|
67
|
+
if obj is None:
|
|
68
|
+
_LOGGER.warning(f"Attempted to serialize a None object. Skipping save for '{file_path}'.")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
# Ensure the extension is correct
|
|
73
|
+
file_path = file_path.with_suffix('.joblib')
|
|
74
|
+
|
|
75
|
+
# Ensure the parent directory exists
|
|
76
|
+
_save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
|
|
77
|
+
|
|
78
|
+
# Dump the object
|
|
79
|
+
joblib.dump(obj, file_path)
|
|
80
|
+
|
|
81
|
+
except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
|
|
82
|
+
_LOGGER.error(f"Failed to serialize object of type '{type(obj)}' to '{file_path}'. Error: {e}")
|
|
83
|
+
if raise_on_error:
|
|
84
|
+
raise e
|
|
85
|
+
return None
|
|
86
|
+
else:
|
|
87
|
+
if verbose:
|
|
88
|
+
if type(obj) in _SIMPLE_TYPES:
|
|
89
|
+
_LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
|
|
90
|
+
else:
|
|
91
|
+
_LOGGER.info(f"Object '{obj}' saved to '{file_path}'")
|
|
92
|
+
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Define a TypeVar to link the expected type to the return type of deserialization
|
|
97
|
+
T = TypeVar('T')
|
|
98
|
+
|
|
99
|
+
def deserialize_object(
|
|
100
|
+
filepath: Union[str, Path],
|
|
101
|
+
expected_type: Optional[Type[T]] = None,
|
|
102
|
+
verbose: bool = True,
|
|
103
|
+
) -> T:
|
|
104
|
+
"""
|
|
105
|
+
Loads a serialized object from a .joblib file.
|
|
106
|
+
|
|
107
|
+
Parameters:
|
|
108
|
+
filepath (str | Path): Full path to the serialized .joblib file.
|
|
109
|
+
expected_type (Type[T] | None): The expected type of the object.
|
|
110
|
+
If provided, the function raises a TypeError if the loaded object
|
|
111
|
+
is not an instance of this type. It correctly handles generics
|
|
112
|
+
like `list[str]` by checking the base type (e.g., `list`).
|
|
113
|
+
Defaults to None, which skips the type check.
|
|
114
|
+
verbose (bool): If True, logs success messages.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
(Any): The deserialized Python object, which will match the `expected_type` if provided.
|
|
118
|
+
"""
|
|
119
|
+
true_filepath = make_fullpath(filepath, enforce="file")
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
obj = joblib.load(true_filepath)
|
|
123
|
+
except (IOError, OSError, EOFError, TypeError, ValueError) as e:
|
|
124
|
+
_LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
|
|
125
|
+
raise e
|
|
126
|
+
else:
|
|
127
|
+
# --- Type Validation Step ---
|
|
128
|
+
if expected_type:
|
|
129
|
+
# get_origin handles generics (e.g., list[str] -> list)
|
|
130
|
+
# If it's not a generic, get_origin returns None, so we use the type itself.
|
|
131
|
+
type_to_check = get_origin(expected_type) or expected_type
|
|
132
|
+
|
|
133
|
+
# Can't do an isinstance check on 'Any', skip it.
|
|
134
|
+
if type_to_check is not Any and not isinstance(obj, type_to_check):
|
|
135
|
+
error_msg = (
|
|
136
|
+
f"Type mismatch: Expected an instance of '{expected_type}', but found '{type(obj)}' in '{true_filepath}'."
|
|
137
|
+
)
|
|
138
|
+
_LOGGER.error(error_msg)
|
|
139
|
+
raise TypeError()
|
|
140
|
+
|
|
141
|
+
if verbose:
|
|
142
|
+
# log special objects
|
|
143
|
+
if type(obj) in _SIMPLE_TYPES:
|
|
144
|
+
_LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
|
|
145
|
+
else:
|
|
146
|
+
_LOGGER.info(f"Loaded object '{obj}' from '{true_filepath}'.")
|
|
147
|
+
|
|
148
|
+
return obj # type: ignore
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def serialize_schema(schema: FeatureSchema, file_path: Path):
|
|
152
|
+
"""
|
|
153
|
+
Serializes a FeatureSchema object to a .joblib file.
|
|
154
|
+
|
|
155
|
+
This is a high-level wrapper around `serialize_object` that
|
|
156
|
+
specifically handles `FeatureSchema` instances and ensures
|
|
157
|
+
errors are raised on failure.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
schema (FeatureSchema): The schema object to serialize.
|
|
161
|
+
file_path (Path): The full file path to save the schema to.
|
|
162
|
+
"""
|
|
163
|
+
serialize_object(obj=schema,
|
|
164
|
+
file_path=file_path,
|
|
165
|
+
verbose=True,
|
|
166
|
+
raise_on_error=True)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def deserialize_schema(file_path: Path):
|
|
170
|
+
"""
|
|
171
|
+
Deserializes a FeatureSchema object from a .joblib file.
|
|
172
|
+
|
|
173
|
+
This is a high-level wrapper around `deserialize_object` that
|
|
174
|
+
validates the loaded object is an instance of `FeatureSchema`.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
file_path (Path): The full file path of the serialized schema.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
FeatureSchema: The deserialized schema object.
|
|
181
|
+
|
|
182
|
+
Raises:
|
|
183
|
+
TypeError: If the deserialized object is not an instance of `FeatureSchema`.
|
|
184
|
+
"""
|
|
185
|
+
schema = deserialize_object(filepath=file_path,
|
|
186
|
+
expected_type=FeatureSchema,
|
|
187
|
+
verbose=True)
|
|
188
|
+
return schema
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def info():
|
|
192
|
+
_script_info(__all__)
|