dragon-ml-toolbox 10.1.1__py3-none-any.whl → 14.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (48) hide show
  1. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/METADATA +38 -63
  2. dragon_ml_toolbox-14.2.0.dist-info/RECORD +48 -0
  3. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE +1 -1
  4. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +11 -0
  5. ml_tools/ETL_cleaning.py +175 -59
  6. ml_tools/ETL_engineering.py +506 -70
  7. ml_tools/GUI_tools.py +2 -1
  8. ml_tools/MICE_imputation.py +212 -7
  9. ml_tools/ML_callbacks.py +73 -40
  10. ml_tools/ML_datasetmaster.py +267 -284
  11. ml_tools/ML_evaluation.py +119 -58
  12. ml_tools/ML_evaluation_multi.py +107 -32
  13. ml_tools/ML_inference.py +15 -5
  14. ml_tools/ML_models.py +234 -170
  15. ml_tools/ML_models_advanced.py +323 -0
  16. ml_tools/ML_optimization.py +321 -97
  17. ml_tools/ML_scaler.py +10 -5
  18. ml_tools/ML_trainer.py +585 -40
  19. ml_tools/ML_utilities.py +528 -0
  20. ml_tools/ML_vision_datasetmaster.py +1315 -0
  21. ml_tools/ML_vision_evaluation.py +260 -0
  22. ml_tools/ML_vision_inference.py +428 -0
  23. ml_tools/ML_vision_models.py +627 -0
  24. ml_tools/ML_vision_transformers.py +58 -0
  25. ml_tools/PSO_optimization.py +10 -7
  26. ml_tools/RNN_forecast.py +2 -0
  27. ml_tools/SQL.py +22 -9
  28. ml_tools/VIF_factor.py +4 -3
  29. ml_tools/_ML_vision_recipe.py +88 -0
  30. ml_tools/__init__.py +1 -0
  31. ml_tools/_logger.py +0 -2
  32. ml_tools/_schema.py +96 -0
  33. ml_tools/constants.py +79 -0
  34. ml_tools/custom_logger.py +164 -16
  35. ml_tools/data_exploration.py +1092 -109
  36. ml_tools/ensemble_evaluation.py +48 -1
  37. ml_tools/ensemble_inference.py +6 -7
  38. ml_tools/ensemble_learning.py +4 -3
  39. ml_tools/handle_excel.py +1 -0
  40. ml_tools/keys.py +80 -0
  41. ml_tools/math_utilities.py +259 -0
  42. ml_tools/optimization_tools.py +198 -24
  43. ml_tools/path_manager.py +144 -45
  44. ml_tools/serde.py +192 -0
  45. ml_tools/utilities.py +287 -227
  46. dragon_ml_toolbox-10.1.1.dist-info/RECORD +0 -36
  47. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/WHEEL +0 -0
  48. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/top_level.txt +0 -0
ml_tools/serde.py ADDED
@@ -0,0 +1,192 @@
1
+ import joblib
2
+ from joblib.externals.loky.process_executor import TerminatedWorkerError
3
+ from typing import Any, Union, TypeVar, get_origin, Type, Optional
4
+ from pathlib import Path
5
+
6
+ from .path_manager import make_fullpath, sanitize_filename
7
+ from ._script_info import _script_info
8
+ from ._logger import _LOGGER
9
+ from ._schema import FeatureSchema
10
+
11
+
12
+ __all__ = [
13
+ "serialize_object_filename",
14
+ "serialize_object",
15
+ "deserialize_object",
16
+ "serialize_schema",
17
+ "deserialize_schema"
18
+ ]
19
+
20
+
21
+ # Base types that have a generic `type()` log.
22
+ _SIMPLE_TYPES = (list, dict, tuple, set, str, int, float, bool)
23
+
24
+
25
+ def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
26
+ """
27
+ Serializes a Python object using joblib; suitable for Python built-ins, numpy, and pandas.
28
+
29
+ Parameters:
30
+ obj (Any) : The Python object to serialize.
31
+ save_dir (str | Path) : Directory path where the serialized object will be saved.
32
+ filename (str) : Name for the output file, extension will be appended if needed.
33
+ """
34
+ if obj is None:
35
+ _LOGGER.warning(f"Attempted to serialize a None object. Skipping save for '{filename}'.")
36
+ return
37
+
38
+ try:
39
+ save_path = make_fullpath(save_dir, make=True, enforce="directory")
40
+ sanitized_name = sanitize_filename(filename)
41
+ full_path = save_path / sanitized_name
42
+ except (IOError, OSError, TypeError) as e:
43
+ _LOGGER.error(f"Failed to construct save path from dir='{save_dir}' and filename='{filename}'. Error: {e}")
44
+ if raise_on_error:
45
+ raise e
46
+ return None
47
+
48
+ # call serialize_object with the fully constructed path.
49
+ serialize_object(obj=obj,
50
+ file_path=full_path,
51
+ verbose=verbose,
52
+ raise_on_error=raise_on_error)
53
+
54
+
55
+ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
56
+ """
57
+ Serializes a Python object using joblib to a specific file path.
58
+
59
+ Suitable for Python built-ins, numpy, and pandas.
60
+
61
+ Parameters:
62
+ obj (Any) : The Python object to serialize.
63
+ file_path (Path) : The full file path to save the object to.
64
+ '.joblib' extension will be appended if missing.
65
+ raise_on_error (bool) : If True, raises exceptions on failure.
66
+ """
67
+ if obj is None:
68
+ _LOGGER.warning(f"Attempted to serialize a None object. Skipping save for '{file_path}'.")
69
+ return
70
+
71
+ try:
72
+ # Ensure the extension is correct
73
+ file_path = file_path.with_suffix('.joblib')
74
+
75
+ # Ensure the parent directory exists
76
+ _save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
77
+
78
+ # Dump the object
79
+ joblib.dump(obj, file_path)
80
+
81
+ except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
82
+ _LOGGER.error(f"Failed to serialize object of type '{type(obj)}' to '{file_path}'. Error: {e}")
83
+ if raise_on_error:
84
+ raise e
85
+ return None
86
+ else:
87
+ if verbose:
88
+ if type(obj) in _SIMPLE_TYPES:
89
+ _LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
90
+ else:
91
+ _LOGGER.info(f"Object '{obj}' saved to '{file_path}'")
92
+
93
+ return None
94
+
95
+
96
+ # Define a TypeVar to link the expected type to the return type of deserialization
97
+ T = TypeVar('T')
98
+
99
+ def deserialize_object(
100
+ filepath: Union[str, Path],
101
+ expected_type: Optional[Type[T]] = None,
102
+ verbose: bool = True,
103
+ ) -> T:
104
+ """
105
+ Loads a serialized object from a .joblib file.
106
+
107
+ Parameters:
108
+ filepath (str | Path): Full path to the serialized .joblib file.
109
+ expected_type (Type[T] | None): The expected type of the object.
110
+ If provided, the function raises a TypeError if the loaded object
111
+ is not an instance of this type. It correctly handles generics
112
+ like `list[str]` by checking the base type (e.g., `list`).
113
+ Defaults to None, which skips the type check.
114
+ verbose (bool): If True, logs success messages.
115
+
116
+ Returns:
117
+ (Any): The deserialized Python object, which will match the `expected_type` if provided.
118
+ """
119
+ true_filepath = make_fullpath(filepath, enforce="file")
120
+
121
+ try:
122
+ obj = joblib.load(true_filepath)
123
+ except (IOError, OSError, EOFError, TypeError, ValueError) as e:
124
+ _LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
125
+ raise e
126
+ else:
127
+ # --- Type Validation Step ---
128
+ if expected_type:
129
+ # get_origin handles generics (e.g., list[str] -> list)
130
+ # If it's not a generic, get_origin returns None, so we use the type itself.
131
+ type_to_check = get_origin(expected_type) or expected_type
132
+
133
+ # Can't do an isinstance check on 'Any', skip it.
134
+ if type_to_check is not Any and not isinstance(obj, type_to_check):
135
+ error_msg = (
136
+ f"Type mismatch: Expected an instance of '{expected_type}', but found '{type(obj)}' in '{true_filepath}'."
137
+ )
138
+ _LOGGER.error(error_msg)
139
+ raise TypeError()
140
+
141
+ if verbose:
142
+ # log special objects
143
+ if type(obj) in _SIMPLE_TYPES:
144
+ _LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
145
+ else:
146
+ _LOGGER.info(f"Loaded object '{obj}' from '{true_filepath}'.")
147
+
148
+ return obj # type: ignore
149
+
150
+
151
+ def serialize_schema(schema: FeatureSchema, file_path: Path):
152
+ """
153
+ Serializes a FeatureSchema object to a .joblib file.
154
+
155
+ This is a high-level wrapper around `serialize_object` that
156
+ specifically handles `FeatureSchema` instances and ensures
157
+ errors are raised on failure.
158
+
159
+ Args:
160
+ schema (FeatureSchema): The schema object to serialize.
161
+ file_path (Path): The full file path to save the schema to.
162
+ """
163
+ serialize_object(obj=schema,
164
+ file_path=file_path,
165
+ verbose=True,
166
+ raise_on_error=True)
167
+
168
+
169
+ def deserialize_schema(file_path: Path):
170
+ """
171
+ Deserializes a FeatureSchema object from a .joblib file.
172
+
173
+ This is a high-level wrapper around `deserialize_object` that
174
+ validates the loaded object is an instance of `FeatureSchema`.
175
+
176
+ Args:
177
+ file_path (Path): The full file path of the serialized schema.
178
+
179
+ Returns:
180
+ FeatureSchema: The deserialized schema object.
181
+
182
+ Raises:
183
+ TypeError: If the deserialized object is not an instance of `FeatureSchema`.
184
+ """
185
+ schema = deserialize_object(filepath=file_path,
186
+ expected_type=FeatureSchema,
187
+ verbose=True)
188
+ return schema
189
+
190
+
191
+ def info():
192
+ _script_info(__all__)