dragon-ml-toolbox 13.1.0__py3-none-any.whl → 13.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 13.1.0
3
+ Version: 13.2.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-13.1.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-13.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
1
+ dragon_ml_toolbox-13.2.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-13.2.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
3
  ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
4
4
  ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
@@ -18,9 +18,9 @@ ml_tools/PSO_optimization.py,sha256=T-HWHMRJUnPvPwixdU5jif3_rnnI36TzcL8u3oSCwuA,
18
18
  ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
19
19
  ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
20
20
  ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
21
- ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
21
+ ml_tools/__init__.py,sha256=kJiankjz9_qXu7gU92mYqYg_anLvt-B6RtW0mMH8uGo,76
22
22
  ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
23
- ml_tools/_schema.py,sha256=MYYAO8CYygIvwv9TkGBAxzZpG7xQ2IV8_yB5zzFin0c,710
23
+ ml_tools/_schema.py,sha256=MloQLJvuYzWHbmHfp0rUx5vgTPdfusfQ1b83KY277bY,2767
24
24
  ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
25
25
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
26
26
  ml_tools/custom_logger.py,sha256=7tSAgRL7e-Ekm7rS1FLDocaPLCnaoKc7VSrtfwCtCEg,10067
@@ -29,13 +29,13 @@ ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNV
29
29
  ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
30
30
  ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
31
31
  ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
32
- ml_tools/keys.py,sha256=eJ4St5fl8uHstEGO1XVdP8G-ddwjOxV9zqG0D6W8pCI,2124
32
+ ml_tools/keys.py,sha256=oykUVLB4Wos3AZomowjtI8AFFC5xnMUH-icNHydRpOk,2275
33
33
  ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
34
34
  ml_tools/optimization_tools.py,sha256=TYFQ2nSnp7xxs-VyoZISWgnGJghFbsWasHjruegyJRs,12763
35
35
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
36
- ml_tools/serde.py,sha256=Wjf8N1thSfJ4r6Vm_pWxP2UTPcP2f3s2FiGz0z6kqKI,4925
36
+ ml_tools/serde.py,sha256=IHXHS2XwpEe5Y8mdWvR2SAykOriB8CysDuD70SvtikQ,6223
37
37
  ml_tools/utilities.py,sha256=OcAyV1tEcYAfOWlGjRgopsjDLxU3DcI5EynzvWV4q3A,15754
38
- dragon_ml_toolbox-13.1.0.dist-info/METADATA,sha256=8n0bhl_rSVdg6MDh51r7tl5JflbqIOdqZx5gjaBWk0o,6166
39
- dragon_ml_toolbox-13.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- dragon_ml_toolbox-13.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
- dragon_ml_toolbox-13.1.0.dist-info/RECORD,,
38
+ dragon_ml_toolbox-13.2.0.dist-info/METADATA,sha256=NzCUzaOdHUWqWZ7B96xs-rywNbalnz5MAlduLaowvX8,6166
39
+ dragon_ml_toolbox-13.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ dragon_ml_toolbox-13.2.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
+ dragon_ml_toolbox-13.2.0.dist-info/RECORD,,
ml_tools/__init__.py CHANGED
@@ -1 +1,2 @@
1
1
  from .custom_logger import custom_logger
2
+ from ._schema import FeatureSchema
ml_tools/_schema.py CHANGED
@@ -1,4 +1,9 @@
1
- from typing import NamedTuple, Tuple, Optional, Dict
1
+ from typing import NamedTuple, Tuple, Optional, Dict, Union
2
+ from pathlib import Path
3
+
4
+ from .custom_logger import save_list_strings
5
+ from .keys import DatasetKeys
6
+
2
7
 
3
8
  class FeatureSchema(NamedTuple):
4
9
  """Holds the final, definitive schema for the model pipeline."""
@@ -15,5 +20,51 @@ class FeatureSchema(NamedTuple):
15
20
  # Map of {column_index: cardinality} for categorical features
16
21
  categorical_index_map: Optional[Dict[int, int]]
17
22
 
18
- # The original string-to-int mappings (e.g., {'color': {'red': 0, 'blue': 1}})
23
+ # Map string-to-int category values (e.g., {'color': {'red': 0, 'blue': 1}})
19
24
  categorical_mappings: Optional[Dict[str, Dict[str, int]]]
25
+
26
+ def _save_helper(self, artifact: Tuple[str, ...], directory: Union[str,Path], filename: str, verbose: bool):
27
+ to_save = list(artifact)
28
+ save_list_strings(list_strings=to_save,
29
+ directory=directory,
30
+ filename=filename,
31
+ verbose=verbose)
32
+
33
+ def save_all_features(self, directory: Union[str,Path], verbose: bool=True):
34
+ """
35
+ Saves all feature names to a text file.
36
+
37
+ Args:
38
+ directory: The directory where the file will be saved.
39
+ verbose: If True, prints a confirmation message upon saving.
40
+ """
41
+ self._save_helper(artifact=self.feature_names,
42
+ directory=directory,
43
+ filename=DatasetKeys.FEATURE_NAMES,
44
+ verbose=verbose)
45
+
46
+ def save_continuous_features(self, directory: Union[str,Path], verbose: bool=True):
47
+ """
48
+ Saves continuous feature names to a text file.
49
+
50
+ Args:
51
+ directory: The directory where the file will be saved.
52
+ verbose: If True, prints a confirmation message upon saving.
53
+ """
54
+ self._save_helper(artifact=self.continuous_feature_names,
55
+ directory=directory,
56
+ filename=DatasetKeys.CONTINUOUS_NAMES,
57
+ verbose=verbose)
58
+
59
+ def save_categorical_features(self, directory: Union[str,Path], verbose: bool=True):
60
+ """
61
+ Saves categorical feature names to a text file.
62
+
63
+ Args:
64
+ directory: The directory where the file will be saved.
65
+ verbose: If True, prints a confirmation message upon saving.
66
+ """
67
+ self._save_helper(artifact=self.categorical_feature_names,
68
+ directory=directory,
69
+ filename=DatasetKeys.CATEGORICAL_NAMES,
70
+ verbose=verbose)
ml_tools/keys.py CHANGED
@@ -55,10 +55,13 @@ class PytorchArtifactPathKeys:
55
55
 
56
56
 
57
57
  class DatasetKeys:
58
- """Keys for saving dataset artifacts"""
58
+ """Keys for saving dataset artifacts. Also used by FeatureSchema"""
59
59
  FEATURE_NAMES = "feature_names"
60
60
  TARGET_NAMES = "target_names"
61
61
  SCALER_PREFIX = "scaler_"
62
+ # Feature Schema
63
+ CONTINUOUS_NAMES = "continuous_feature_names"
64
+ CATEGORICAL_NAMES = "categorical_feature_names"
62
65
 
63
66
 
64
67
  class SHAPKeys:
ml_tools/serde.py CHANGED
@@ -6,12 +6,15 @@ from pathlib import Path
6
6
  from .path_manager import make_fullpath, sanitize_filename
7
7
  from ._script_info import _script_info
8
8
  from ._logger import _LOGGER
9
+ from ._schema import FeatureSchema
9
10
 
10
11
 
11
12
  __all__ = [
12
13
  "serialize_object_filename",
13
14
  "serialize_object",
14
15
  "deserialize_object",
16
+ "serialize_schema",
17
+ "deserialize_schema"
15
18
  ]
16
19
 
17
20
 
@@ -25,21 +28,20 @@ def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str
25
28
  filename (str) : Name for the output file, extension will be appended if needed.
26
29
  """
27
30
  try:
28
- save_path = make_fullpath(save_dir, make=True)
31
+ save_path = make_fullpath(save_dir, make=True, enforce="directory")
29
32
  sanitized_name = sanitize_filename(filename)
30
- if not sanitized_name.endswith('.joblib'):
31
- sanitized_name = sanitized_name + ".joblib"
32
33
  full_path = save_path / sanitized_name
33
- joblib.dump(obj, full_path)
34
- except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
35
- _LOGGER.error(f"Failed to serialize object of type '{type(obj)}'.")
34
+ except (IOError, OSError, TypeError) as e:
35
+ _LOGGER.error(f"Failed to construct save path from dir='{save_dir}' and filename='{filename}'. Error: {e}")
36
36
  if raise_on_error:
37
37
  raise e
38
38
  return None
39
- else:
40
- if verbose:
41
- _LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
42
- return None
39
+
40
+ # call serialize_object with the fully constructed path.
41
+ serialize_object(obj=obj,
42
+ file_path=full_path,
43
+ verbose=verbose,
44
+ raise_on_error=raise_on_error)
43
45
 
44
46
 
45
47
  def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
@@ -56,8 +58,7 @@ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_e
56
58
  """
57
59
  try:
58
60
  # Ensure the extension is correct
59
- if file_path.suffix != '.joblib':
60
- file_path = file_path.with_suffix(file_path.suffix + '.joblib')
61
+ file_path = file_path.with_suffix('.joblib')
61
62
 
62
63
  # Ensure the parent directory exists
63
64
  _save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
@@ -126,5 +127,46 @@ def deserialize_object(
126
127
 
127
128
  return obj
128
129
 
130
+
131
+ def serialize_schema(schema: FeatureSchema, file_path: Path):
132
+ """
133
+ Serializes a FeatureSchema object to a .joblib file.
134
+
135
+ This is a high-level wrapper around `serialize_object` that
136
+ specifically handles `FeatureSchema` instances and ensures
137
+ errors are raised on failure.
138
+
139
+ Args:
140
+ schema (FeatureSchema): The schema object to serialize.
141
+ file_path (Path): The full file path to save the schema to.
142
+ """
143
+ serialize_object(obj=schema,
144
+ file_path=file_path,
145
+ verbose=True,
146
+ raise_on_error=True)
147
+
148
+
149
+ def deserialize_schema(file_path: Path):
150
+ """
151
+ Deserializes a FeatureSchema object from a .joblib file.
152
+
153
+ This is a high-level wrapper around `deserialize_object` that
154
+ validates the loaded object is an instance of `FeatureSchema`.
155
+
156
+ Args:
157
+ file_path (Path): The full file path of the serialized schema.
158
+
159
+ Returns:
160
+ FeatureSchema: The deserialized schema object.
161
+
162
+ Raises:
163
+ TypeError: If the deserialized object is not an instance of `FeatureSchema`.
164
+ """
165
+ schema = deserialize_object(filepath=file_path,
166
+ expected_type=FeatureSchema,
167
+ verbose=True)
168
+ return schema
169
+
170
+
129
171
  def info():
130
172
  _script_info(__all__)