dragon-ml-toolbox 13.1.0__tar.gz → 13.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (47) hide show
  1. {dragon_ml_toolbox-13.1.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-13.2.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/__init__.py +1 -0
  4. dragon_ml_toolbox-13.2.0/ml_tools/_schema.py +70 -0
  5. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/keys.py +4 -1
  6. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/serde.py +54 -12
  7. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/pyproject.toml +1 -1
  8. dragon_ml_toolbox-13.1.0/ml_tools/_schema.py +0 -19
  9. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/LICENSE +0 -0
  10. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/LICENSE-THIRD-PARTY.md +0 -0
  11. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/README.md +0 -0
  12. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  13. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  14. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  15. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  16. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ETL_cleaning.py +0 -0
  17. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ETL_engineering.py +0 -0
  18. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/GUI_tools.py +0 -0
  19. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/MICE_imputation.py +0 -0
  20. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_callbacks.py +0 -0
  21. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_datasetmaster.py +0 -0
  22. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_evaluation.py +0 -0
  23. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_evaluation_multi.py +0 -0
  24. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_inference.py +0 -0
  25. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_models.py +0 -0
  26. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_optimization.py +0 -0
  27. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_scaler.py +0 -0
  28. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_trainer.py +0 -0
  29. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ML_utilities.py +0 -0
  30. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/PSO_optimization.py +0 -0
  31. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/RNN_forecast.py +0 -0
  32. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/SQL.py +0 -0
  33. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/VIF_factor.py +0 -0
  34. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/_logger.py +0 -0
  35. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/_script_info.py +0 -0
  36. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/constants.py +0 -0
  37. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/custom_logger.py +0 -0
  38. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/data_exploration.py +0 -0
  39. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ensemble_evaluation.py +0 -0
  40. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ensemble_inference.py +0 -0
  41. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/ensemble_learning.py +0 -0
  42. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/handle_excel.py +0 -0
  43. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/math_utilities.py +0 -0
  44. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/optimization_tools.py +0 -0
  45. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/path_manager.py +0 -0
  46. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/ml_tools/utilities.py +0 -0
  47. {dragon_ml_toolbox-13.1.0 → dragon_ml_toolbox-13.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.1.0
3
+ Version: 13.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.1.0
3
+ Version: 13.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1 +1,2 @@
1
1
  from .custom_logger import custom_logger
2
+ from ._schema import FeatureSchema
@@ -0,0 +1,70 @@
1
+ from typing import NamedTuple, Tuple, Optional, Dict, Union
2
+ from pathlib import Path
3
+
4
+ from .custom_logger import save_list_strings
5
+ from .keys import DatasetKeys
6
+
7
+
8
+ class FeatureSchema(NamedTuple):
9
+ """Holds the final, definitive schema for the model pipeline."""
10
+
11
+ # The final, ordered list of all feature names
12
+ feature_names: Tuple[str, ...]
13
+
14
+ # List of all continuous feature names
15
+ continuous_feature_names: Tuple[str, ...]
16
+
17
+ # List of all categorical feature names
18
+ categorical_feature_names: Tuple[str, ...]
19
+
20
+ # Map of {column_index: cardinality} for categorical features
21
+ categorical_index_map: Optional[Dict[int, int]]
22
+
23
+ # Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
24
+ categorical_mappings: Optional[Dict[str, Dict[str, int]]]
25
+
26
+ def _save_helper(self, artifact: Tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
27
+ to_save = list(artifact)
28
+ save_list_strings(list_strings=to_save,
29
+ directory=directory,
30
+ filename=filename,
31
+ verbose=verbose)
32
+
33
+ def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
34
+ """
35
+ Saves all feature names to a text file.
36
+
37
+ Args:
38
+ directory: The directory where the file will be saved.
39
+ verbose: If True, prints a confirmation message upon saving.
40
+ """
41
+ self._save_helper(artifact=self.feature_names,
42
+ directory=directory,
43
+ filename=DatasetKeys.FEATURE_NAMES,
44
+ verbose=verbose)
45
+
46
+ def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
47
+ """
48
+ Saves continuous feature names to a text file.
49
+
50
+ Args:
51
+ directory: The directory where the file will be saved.
52
+ verbose: If True, prints a confirmation message upon saving.
53
+ """
54
+ self._save_helper(artifact=self.continuous_feature_names,
55
+ directory=directory,
56
+ filename=DatasetKeys.CONTINUOUS_NAMES,
57
+ verbose=verbose)
58
+
59
+ def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
60
+ """
61
+ Saves categorical feature names to a text file.
62
+
63
+ Args:
64
+ directory: The directory where the file will be saved.
65
+ verbose: If True, prints a confirmation message upon saving.
66
+ """
67
+ self._save_helper(artifact=self.categorical_feature_names,
68
+ directory=directory,
69
+ filename=DatasetKeys.CATEGORICAL_NAMES,
70
+ verbose=verbose)
@@ -55,10 +55,13 @@ class PytorchArtifactPathKeys:
55
55
 
56
56
 
57
57
  class DatasetKeys:
58
- """Keys for saving dataset artifacts"""
58
+ """Keys for saving dataset artifacts. Also used by FeatureSchema"""
59
59
  FEATURE_NAMES = "feature_names"
60
60
  TARGET_NAMES = "target_names"
61
61
  SCALER_PREFIX = "scaler_"
62
+ # Feature Schema
63
+ CONTINUOUS_NAMES = "continuous_feature_names"
64
+ CATEGORICAL_NAMES = "categorical_feature_names"
62
65
 
63
66
 
64
67
  class SHAPKeys:
@@ -6,12 +6,15 @@ from pathlib import Path
6
6
  from .path_manager import make_fullpath, sanitize_filename
7
7
  from ._script_info import _script_info
8
8
  from ._logger import _LOGGER
9
+ from ._schema import FeatureSchema
9
10
 
10
11
 
11
12
  __all__ = [
12
13
  "serialize_object_filename",
13
14
  "serialize_object",
14
15
  "deserialize_object",
16
+ "serialize_schema",
17
+ "deserialize_schema"
15
18
  ]
16
19
 
17
20
 
@@ -25,21 +28,20 @@ def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str
25
28
  filename (str) : Name for the output file, extension will be appended if needed.
26
29
  """
27
30
  try:
28
- save_path = make_fullpath(save_dir, make=True)
31
+ save_path = make_fullpath(save_dir, make=True, enforce="directory")
29
32
  sanitized_name = sanitize_filename(filename)
30
- if not sanitized_name.endswith('.joblib'):
31
- sanitized_name = sanitized_name + ".joblib"
32
33
  full_path = save_path / sanitized_name
33
- joblib.dump(obj, full_path)
34
- except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
35
- _LOGGER.error(f"Failed to serialize object of type '{type(obj)}'.")
34
+ except (IOError, OSError, TypeError) as e:
35
+ _LOGGER.error(f"Failed to construct save path from dir='{save_dir}' and filename='{filename}'. Error: {e}")
36
36
  if raise_on_error:
37
37
  raise e
38
38
  return None
39
- else:
40
- if verbose:
41
- _LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
42
- return None
39
+
40
+ # call serialize_object with the fully constructed path.
41
+ serialize_object(obj=obj,
42
+ file_path=full_path,
43
+ verbose=verbose,
44
+ raise_on_error=raise_on_error)
43
45
 
44
46
 
45
47
  def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
@@ -56,8 +58,7 @@ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_e
56
58
  """
57
59
  try:
58
60
  # Ensure the extension is correct
59
- if file_path.suffix != '.joblib':
60
- file_path = file_path.with_suffix(file_path.suffix + '.joblib')
61
+ file_path = file_path.with_suffix('.joblib')
61
62
 
62
63
  # Ensure the parent directory exists
63
64
  _save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
@@ -126,5 +127,46 @@ def deserialize_object(
126
127
 
127
128
  return obj
128
129
 
130
+
131
+ def serialize_schema(schema: FeatureSchema, file_path: Path):
132
+ """
133
+ Serializes a FeatureSchema object to a .joblib file.
134
+
135
+ This is a high-level wrapper around `serialize_object` that
136
+ specifically handles `FeatureSchema` instances and ensures
137
+ errors are raised on failure.
138
+
139
+ Args:
140
+ schema (FeatureSchema): The schema object to serialize.
141
+ file_path (Path): The full file path to save the schema to.
142
+ """
143
+ serialize_object(obj=schema,
144
+ file_path=file_path,
145
+ verbose=True,
146
+ raise_on_error=True)
147
+
148
+
149
+ def deserialize_schema(file_path: Path):
150
+ """
151
+ Deserializes a FeatureSchema object from a .joblib file.
152
+
153
+ This is a high-level wrapper around `deserialize_object` that
154
+ validates the loaded object is an instance of `FeatureSchema`.
155
+
156
+ Args:
157
+ file_path (Path): The full file path of the serialized schema.
158
+
159
+ Returns:
160
+ FeatureSchema: The deserialized schema object.
161
+
162
+ Raises:
163
+ TypeError: If the deserialized object is not an instance of `FeatureSchema`.
164
+ """
165
+ schema = deserialize_object(filepath=file_path,
166
+ expected_type=FeatureSchema,
167
+ verbose=True)
168
+ return schema
169
+
170
+
129
171
  def info():
130
172
  _script_info(__all__)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "13.1.0"
3
+ version = "13.2.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }
@@ -1,19 +0,0 @@
1
- from typing import NamedTuple, Tuple, Optional, Dict
2
-
3
- class FeatureSchema(NamedTuple):
4
- """Holds the final, definitive schema for the model pipeline."""
5
-
6
- # The final, ordered list of all feature names
7
- feature_names: Tuple[str, ...]
8
-
9
- # List of all continuous feature names
10
- continuous_feature_names: Tuple[str, ...]
11
-
12
- # List of all categorical feature names
13
- categorical_feature_names: Tuple[str, ...]
14
-
15
- # Map of {column_index: cardinality} for categorical features
16
- categorical_index_map: Optional[Dict[int, int]]
17
-
18
- # The original string-to-int mappings (e.g., {'color': {'red': 0, 'blue': 1}})
19
- categorical_mappings: Optional[Dict[str, Dict[str, int]]]