dragon-ml-toolbox 13.1.0__tar.gz → 13.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-13.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-13.2.1}/PKG-INFO +1 -1
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/__init__.py +1 -0
- dragon_ml_toolbox-13.2.1/ml_tools/_schema.py +85 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/keys.py +4 -1
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/serde.py +54 -12
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/pyproject.toml +1 -1
- dragon_ml_toolbox-13.1.0/ml_tools/_schema.py +0 -19
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/LICENSE +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/README.md +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ETL_cleaning.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ML_utilities.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/constants.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/custom_logger.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/data_exploration.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/math_utilities.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/setup.cfg +0 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from typing import NamedTuple, Tuple, Optional, Dict, Union
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .custom_logger import save_list_strings
|
|
5
|
+
from .keys import DatasetKeys
|
|
6
|
+
from ._logger import _LOGGER
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FeatureSchema(NamedTuple):
|
|
10
|
+
"""Holds the final, definitive schema for the model pipeline."""
|
|
11
|
+
|
|
12
|
+
# The final, ordered list of all feature names
|
|
13
|
+
feature_names: Tuple[str, ...]
|
|
14
|
+
|
|
15
|
+
# List of all continuous feature names
|
|
16
|
+
continuous_feature_names: Tuple[str, ...]
|
|
17
|
+
|
|
18
|
+
# List of all categorical feature names
|
|
19
|
+
categorical_feature_names: Tuple[str, ...]
|
|
20
|
+
|
|
21
|
+
# Map of {column_index: cardinality} for categorical features
|
|
22
|
+
categorical_index_map: Optional[Dict[int, int]]
|
|
23
|
+
|
|
24
|
+
# Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
|
|
25
|
+
categorical_mappings: Optional[Dict[str, Dict[str, int]]]
|
|
26
|
+
|
|
27
|
+
def _save_helper(self, artifact: Tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
|
|
28
|
+
to_save = list(artifact)
|
|
29
|
+
|
|
30
|
+
# empty check
|
|
31
|
+
if not to_save:
|
|
32
|
+
_LOGGER.warning(f"Skipping save for '{filename}': The feature list is empty.")
|
|
33
|
+
return
|
|
34
|
+
|
|
35
|
+
save_list_strings(list_strings=to_save,
|
|
36
|
+
directory=directory,
|
|
37
|
+
filename=filename,
|
|
38
|
+
verbose=verbose)
|
|
39
|
+
|
|
40
|
+
def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
41
|
+
"""
|
|
42
|
+
Saves all feature names to a text file.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
directory: The directory where the file will be saved.
|
|
46
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
47
|
+
"""
|
|
48
|
+
self._save_helper(artifact=self.feature_names,
|
|
49
|
+
directory=directory,
|
|
50
|
+
filename=DatasetKeys.FEATURE_NAMES,
|
|
51
|
+
verbose=verbose)
|
|
52
|
+
|
|
53
|
+
def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
54
|
+
"""
|
|
55
|
+
Saves continuous feature names to a text file.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
directory: The directory where the file will be saved.
|
|
59
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
60
|
+
"""
|
|
61
|
+
self._save_helper(artifact=self.continuous_feature_names,
|
|
62
|
+
directory=directory,
|
|
63
|
+
filename=DatasetKeys.CONTINUOUS_NAMES,
|
|
64
|
+
verbose=verbose)
|
|
65
|
+
|
|
66
|
+
def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
|
|
67
|
+
"""
|
|
68
|
+
Saves categorical feature names to a text file.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
directory: The directory where the file will be saved.
|
|
72
|
+
verbose: If True, prints a confirmation message upon saving.
|
|
73
|
+
"""
|
|
74
|
+
self._save_helper(artifact=self.categorical_feature_names,
|
|
75
|
+
directory=directory,
|
|
76
|
+
filename=DatasetKeys.CATEGORICAL_NAMES,
|
|
77
|
+
verbose=verbose)
|
|
78
|
+
|
|
79
|
+
def save_artifacts(self, directory: Union[str,Path]):
|
|
80
|
+
"""
|
|
81
|
+
Saves feature names, categorical feature names, continuous feature names to separate text files.
|
|
82
|
+
"""
|
|
83
|
+
self.save_all_features(directory=directory, verbose=True)
|
|
84
|
+
self.save_continuous_features(directory=directory, verbose=True)
|
|
85
|
+
self.save_categorical_features(directory=directory, verbose=True)
|
|
@@ -55,10 +55,13 @@ class PytorchArtifactPathKeys:
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class DatasetKeys:
|
|
58
|
-
"""Keys for saving dataset artifacts"""
|
|
58
|
+
"""Keys for saving dataset artifacts. Also used by FeatureSchema"""
|
|
59
59
|
FEATURE_NAMES = "feature_names"
|
|
60
60
|
TARGET_NAMES = "target_names"
|
|
61
61
|
SCALER_PREFIX = "scaler_"
|
|
62
|
+
# Feature Schema
|
|
63
|
+
CONTINUOUS_NAMES = "continuous_feature_names"
|
|
64
|
+
CATEGORICAL_NAMES = "categorical_feature_names"
|
|
62
65
|
|
|
63
66
|
|
|
64
67
|
class SHAPKeys:
|
|
@@ -6,12 +6,15 @@ from pathlib import Path
|
|
|
6
6
|
from .path_manager import make_fullpath, sanitize_filename
|
|
7
7
|
from ._script_info import _script_info
|
|
8
8
|
from ._logger import _LOGGER
|
|
9
|
+
from ._schema import FeatureSchema
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"serialize_object_filename",
|
|
13
14
|
"serialize_object",
|
|
14
15
|
"deserialize_object",
|
|
16
|
+
"serialize_schema",
|
|
17
|
+
"deserialize_schema"
|
|
15
18
|
]
|
|
16
19
|
|
|
17
20
|
|
|
@@ -25,21 +28,20 @@ def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str
|
|
|
25
28
|
filename (str) : Name for the output file, extension will be appended if needed.
|
|
26
29
|
"""
|
|
27
30
|
try:
|
|
28
|
-
save_path = make_fullpath(save_dir, make=True)
|
|
31
|
+
save_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
29
32
|
sanitized_name = sanitize_filename(filename)
|
|
30
|
-
if not sanitized_name.endswith('.joblib'):
|
|
31
|
-
sanitized_name = sanitized_name + ".joblib"
|
|
32
33
|
full_path = save_path / sanitized_name
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
_LOGGER.error(f"Failed to serialize object of type '{type(obj)}'.")
|
|
34
|
+
except (IOError, OSError, TypeError) as e:
|
|
35
|
+
_LOGGER.error(f"Failed to construct save path from dir='{save_dir}' and filename='{filename}'. Error: {e}")
|
|
36
36
|
if raise_on_error:
|
|
37
37
|
raise e
|
|
38
38
|
return None
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
|
|
40
|
+
# call serialize_object with the fully constructed path.
|
|
41
|
+
serialize_object(obj=obj,
|
|
42
|
+
file_path=full_path,
|
|
43
|
+
verbose=verbose,
|
|
44
|
+
raise_on_error=raise_on_error)
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
|
|
@@ -56,8 +58,7 @@ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_e
|
|
|
56
58
|
"""
|
|
57
59
|
try:
|
|
58
60
|
# Ensure the extension is correct
|
|
59
|
-
|
|
60
|
-
file_path = file_path.with_suffix(file_path.suffix + '.joblib')
|
|
61
|
+
file_path = file_path.with_suffix('.joblib')
|
|
61
62
|
|
|
62
63
|
# Ensure the parent directory exists
|
|
63
64
|
_save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
|
|
@@ -126,5 +127,46 @@ def deserialize_object(
|
|
|
126
127
|
|
|
127
128
|
return obj
|
|
128
129
|
|
|
130
|
+
|
|
131
|
+
def serialize_schema(schema: FeatureSchema, file_path: Path):
|
|
132
|
+
"""
|
|
133
|
+
Serializes a FeatureSchema object to a .joblib file.
|
|
134
|
+
|
|
135
|
+
This is a high-level wrapper around `serialize_object` that
|
|
136
|
+
specifically handles `FeatureSchema` instances and ensures
|
|
137
|
+
errors are raised on failure.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
schema (FeatureSchema): The schema object to serialize.
|
|
141
|
+
file_path (Path): The full file path to save the schema to.
|
|
142
|
+
"""
|
|
143
|
+
serialize_object(obj=schema,
|
|
144
|
+
file_path=file_path,
|
|
145
|
+
verbose=True,
|
|
146
|
+
raise_on_error=True)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def deserialize_schema(file_path: Path):
|
|
150
|
+
"""
|
|
151
|
+
Deserializes a FeatureSchema object from a .joblib file.
|
|
152
|
+
|
|
153
|
+
This is a high-level wrapper around `deserialize_object` that
|
|
154
|
+
validates the loaded object is an instance of `FeatureSchema`.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
file_path (Path): The full file path of the serialized schema.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
FeatureSchema: The deserialized schema object.
|
|
161
|
+
|
|
162
|
+
Raises:
|
|
163
|
+
TypeError: If the deserialized object is not an instance of `FeatureSchema`.
|
|
164
|
+
"""
|
|
165
|
+
schema = deserialize_object(filepath=file_path,
|
|
166
|
+
expected_type=FeatureSchema,
|
|
167
|
+
verbose=True)
|
|
168
|
+
return schema
|
|
169
|
+
|
|
170
|
+
|
|
129
171
|
def info():
|
|
130
172
|
_script_info(__all__)
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
from typing import NamedTuple, Tuple, Optional, Dict
|
|
2
|
-
|
|
3
|
-
class FeatureSchema(NamedTuple):
|
|
4
|
-
"""Holds the final, definitive schema for the model pipeline."""
|
|
5
|
-
|
|
6
|
-
# The final, ordered list of all feature names
|
|
7
|
-
feature_names: Tuple[str, ...]
|
|
8
|
-
|
|
9
|
-
# List of all continuous feature names
|
|
10
|
-
continuous_feature_names: Tuple[str, ...]
|
|
11
|
-
|
|
12
|
-
# List of all categorical feature names
|
|
13
|
-
categorical_feature_names: Tuple[str, ...]
|
|
14
|
-
|
|
15
|
-
# Map of {column_index: cardinality} for categorical features
|
|
16
|
-
categorical_index_map: Optional[Dict[int, int]]
|
|
17
|
-
|
|
18
|
-
# The original string-to-int mappings (e.g., {'color': {'red': 0, 'blue': 1}})
|
|
19
|
-
categorical_mappings: Optional[Dict[str, Dict[str, int]]]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/requires.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.1}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|