dragon-ml-toolbox 10.10.1__py3-none-any.whl → 10.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/RECORD +8 -8
- ml_tools/path_manager.py +101 -43
- ml_tools/utilities.py +39 -5
- {dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dragon_ml_toolbox-10.
|
|
2
|
-
dragon_ml_toolbox-10.
|
|
1
|
+
dragon_ml_toolbox-10.11.1.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
+
dragon_ml_toolbox-10.11.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=lY4_rJPnLnMu7YBQaY-_iz1JRDcLdQzNCyeLAF1glJY,1837
|
|
3
3
|
ml_tools/ETL_cleaning.py,sha256=lSP5q6-ukGhJBPV8dlsqJvPXAzj4du_0J-SbtEd0Pjg,19292
|
|
4
4
|
ml_tools/ETL_engineering.py,sha256=a6KCWH6kRatZtjaFEF_o917ApPMK5_vRD-BjfCDAl-E,49400
|
|
5
5
|
ml_tools/GUI_tools.py,sha256=kEQWg-bog3pB5tI22gMGKWaCGHnz9TB2Lvvfhf5F2CI,45412
|
|
@@ -28,9 +28,9 @@ ml_tools/ensemble_learning.py,sha256=3s0kH4i_naj0IVl_T4knst-Hwg4TScWjEdsXX5KAi7I
|
|
|
28
28
|
ml_tools/handle_excel.py,sha256=He4UT15sCGhaG-JKfs7uYVAubxWjrqgJ6U7OhMR2fuE,14005
|
|
29
29
|
ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
|
|
30
30
|
ml_tools/optimization_tools.py,sha256=P3I6lIpvZ8Xf2kX5FvvBKBmrK2pB6idBpkTzfUJxTeE,5073
|
|
31
|
-
ml_tools/path_manager.py,sha256=
|
|
32
|
-
ml_tools/utilities.py,sha256=
|
|
33
|
-
dragon_ml_toolbox-10.
|
|
34
|
-
dragon_ml_toolbox-10.
|
|
35
|
-
dragon_ml_toolbox-10.
|
|
36
|
-
dragon_ml_toolbox-10.
|
|
31
|
+
ml_tools/path_manager.py,sha256=CCZSlHpUiuaHsMAYcmMGZ9GvbHNbbrTqYFicgWz6pRs,17883
|
|
32
|
+
ml_tools/utilities.py,sha256=uheMUjQJ1zI69gASsE-mCq4KlRPVGgrgqson02rGNYM,30755
|
|
33
|
+
dragon_ml_toolbox-10.11.1.dist-info/METADATA,sha256=x3e66l1-dXkoE6ldWAH77epdEMnqj6YAvSVKYDVFhHU,6969
|
|
34
|
+
dragon_ml_toolbox-10.11.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
35
|
+
dragon_ml_toolbox-10.11.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
36
|
+
dragon_ml_toolbox-10.11.1.dist-info/RECORD,,
|
ml_tools/path_manager.py
CHANGED
|
@@ -23,13 +23,33 @@ class PathManager:
|
|
|
23
23
|
"path database". It supports both development mode and applications
|
|
24
24
|
bundled with Pyinstaller or Nuitka.
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
All keys provided to the manager are automatically sanitized to ensure
|
|
27
|
+
they are valid Python identifiers. This allows for clean, attribute-style
|
|
28
|
+
access. The sanitization process involves replacing whitespace with
|
|
29
|
+
underscores and removing special characters.
|
|
27
30
|
"""
|
|
28
31
|
def __init__(
|
|
29
32
|
self,
|
|
30
33
|
anchor_file: str,
|
|
31
34
|
base_directories: Optional[List[str]] = None
|
|
32
35
|
):
|
|
36
|
+
"""
|
|
37
|
+
Sets up the core paths for a project by anchoring to a specific file.
|
|
38
|
+
|
|
39
|
+
The manager automatically registers a 'ROOT' path, which points to the
|
|
40
|
+
root of the package, and can pre-register common subdirectories found
|
|
41
|
+
directly within that root.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
anchor_file (str): The path to a file within your package, typically
|
|
45
|
+
the `__file__` of the script where PathManager
|
|
46
|
+
is instantiated. This is used to locate the
|
|
47
|
+
package root directory.
|
|
48
|
+
base_directories (List[str] | None): An optional list of strings,
|
|
49
|
+
where each string is the name
|
|
50
|
+
of a subdirectory to register
|
|
51
|
+
relative to the package root.
|
|
52
|
+
"""
|
|
33
53
|
resolved_anchor_path = Path(anchor_file).resolve()
|
|
34
54
|
self._package_name = resolved_anchor_path.parent.name
|
|
35
55
|
self._is_bundled, bundle_root = self._get_bundle_root()
|
|
@@ -43,13 +63,17 @@ class PathManager:
|
|
|
43
63
|
package_root = resolved_anchor_path.parent
|
|
44
64
|
|
|
45
65
|
# Register the root of the package itself
|
|
46
|
-
self.
|
|
66
|
+
self.ROOT = package_root
|
|
47
67
|
|
|
48
68
|
# Register all the base directories
|
|
49
69
|
if base_directories:
|
|
50
70
|
for dir_name in base_directories:
|
|
51
|
-
|
|
52
|
-
self.
|
|
71
|
+
sanitized_dir_name = self._sanitize_key(dir_name)
|
|
72
|
+
self._check_underscore_key(sanitized_dir_name)
|
|
73
|
+
setattr(self, sanitized_dir_name, package_root / sanitized_dir_name)
|
|
74
|
+
|
|
75
|
+
# Signal that initialization is complete.
|
|
76
|
+
self._initialized = True
|
|
53
77
|
|
|
54
78
|
def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
|
|
55
79
|
"""
|
|
@@ -72,47 +96,35 @@ class PathManager:
|
|
|
72
96
|
# --- Not Bundled ---
|
|
73
97
|
else:
|
|
74
98
|
return False, None
|
|
99
|
+
|
|
100
|
+
def _check_underscore_key(self, key: str) -> None:
|
|
101
|
+
if key.startswith("_"):
|
|
102
|
+
_LOGGER.error(f"Path key '{key}' cannot start with underscores.")
|
|
103
|
+
raise ValueError()
|
|
75
104
|
|
|
76
|
-
def
|
|
77
|
-
"""
|
|
78
|
-
Retrieves a stored path by its key.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
key (str): The key of the path to retrieve.
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
Path: The resolved, absolute Path object.
|
|
85
|
-
|
|
86
|
-
Raises:
|
|
87
|
-
KeyError: If the key is not found in the manager.
|
|
88
|
-
"""
|
|
89
|
-
try:
|
|
90
|
-
return self._paths[key]
|
|
91
|
-
except KeyError:
|
|
92
|
-
_LOGGER.error(f"Path key '{key}' not found.")
|
|
93
|
-
raise
|
|
94
|
-
|
|
95
|
-
def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
|
|
105
|
+
def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
|
|
96
106
|
"""
|
|
97
|
-
Adds new paths
|
|
107
|
+
Adds new paths in the manager.
|
|
98
108
|
|
|
99
109
|
Args:
|
|
100
110
|
new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
|
|
101
111
|
the identifiers and values are the
|
|
102
|
-
Path objects
|
|
103
|
-
overwrite (bool): If False (default), raises a KeyError if any
|
|
104
|
-
key in new_paths already exists. If True,
|
|
105
|
-
allows overwriting existing keys.
|
|
112
|
+
Path objects to store.
|
|
106
113
|
"""
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
114
|
+
# Pre-check
|
|
115
|
+
for key in new_paths:
|
|
116
|
+
sanitized_key = self._sanitize_key(key)
|
|
117
|
+
self._check_underscore_key(sanitized_key)
|
|
118
|
+
if hasattr(self, sanitized_key):
|
|
119
|
+
_LOGGER.error(f"Cannot add path for key '{sanitized_key}' ({key}): an attribute with this name already exists.")
|
|
120
|
+
raise KeyError()
|
|
121
|
+
|
|
122
|
+
# If no conflicts, add new paths
|
|
123
|
+
for key, value in new_paths.items():
|
|
124
|
+
self.__setattr__(key, value)
|
|
125
|
+
|
|
126
|
+
def _sanitize_key(self, key: str):
|
|
127
|
+
return sanitize_filename(key)
|
|
116
128
|
|
|
117
129
|
def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
|
|
118
130
|
"""
|
|
@@ -147,7 +159,7 @@ class PathManager:
|
|
|
147
159
|
if path.suffix: # It's a file, not a directory
|
|
148
160
|
continue
|
|
149
161
|
|
|
150
|
-
# ---
|
|
162
|
+
# --- CRITICAL CHECK ---
|
|
151
163
|
# Determine if the path is inside the main application package.
|
|
152
164
|
is_internal_path = package_root and path.is_relative_to(package_root)
|
|
153
165
|
|
|
@@ -186,15 +198,20 @@ class PathManager:
|
|
|
186
198
|
# --- Dictionary-Style Methods ---
|
|
187
199
|
def __getitem__(self, key: str) -> Path:
|
|
188
200
|
"""Allows dictionary-style getting, e.g., PM['my_key']"""
|
|
189
|
-
return self.
|
|
201
|
+
return self.__getattr__(key)
|
|
190
202
|
|
|
191
203
|
def __setitem__(self, key: str, value: Union[str, Path]):
|
|
192
|
-
"""Allows dictionary-style setting,
|
|
193
|
-
self.
|
|
204
|
+
"""Allows dictionary-style setting, e.g., PM['my_key'] = path"""
|
|
205
|
+
sanitized_key = self._sanitize_key(key)
|
|
206
|
+
self._check_underscore_key(sanitized_key)
|
|
207
|
+
self.__setattr__(sanitized_key, value)
|
|
194
208
|
|
|
195
209
|
def __contains__(self, key: str) -> bool:
|
|
196
210
|
"""Allows checking for a key's existence, e.g., if 'my_key' in PM"""
|
|
197
|
-
|
|
211
|
+
sanitized_key = self._sanitize_key(key)
|
|
212
|
+
true_false = sanitized_key in self._paths
|
|
213
|
+
# print(f"key {sanitized_key} in current path dictionary keys: {true_false}")
|
|
214
|
+
return true_false
|
|
198
215
|
|
|
199
216
|
def __len__(self) -> int:
|
|
200
217
|
"""Allows getting the number of paths, e.g., len(PM)"""
|
|
@@ -211,6 +228,47 @@ class PathManager:
|
|
|
211
228
|
def items(self):
|
|
212
229
|
"""Returns all registered (key, Path) pairs."""
|
|
213
230
|
return self._paths.items()
|
|
231
|
+
|
|
232
|
+
def __getattr__(self, name: str) -> Path:
|
|
233
|
+
"""
|
|
234
|
+
Allows attribute-style access to paths, e.g., PM.data.
|
|
235
|
+
"""
|
|
236
|
+
# Block access to private attributes
|
|
237
|
+
if name.startswith('_'):
|
|
238
|
+
_LOGGER.error(f"Access to private attribute '{name}' is not allowed, remove leading underscore.")
|
|
239
|
+
raise AttributeError()
|
|
240
|
+
|
|
241
|
+
sanitized_name = self._sanitize_key(name)
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
# Look for the key in our internal dictionary
|
|
245
|
+
return self._paths[sanitized_name]
|
|
246
|
+
except KeyError:
|
|
247
|
+
# If not found, raise an AttributeError
|
|
248
|
+
_LOGGER.error(f"'{type(self).__name__}' object has no attribute or path key '{sanitized_name}'")
|
|
249
|
+
raise AttributeError()
|
|
250
|
+
|
|
251
|
+
def __setattr__(self, name: str, value: Union[str, Path]):
|
|
252
|
+
"""Allows attribute-style setting of paths, e.g., PM.data = 'path/to/data'."""
|
|
253
|
+
# Check for internal attributes
|
|
254
|
+
if name.startswith('_'):
|
|
255
|
+
if hasattr(self, '_initialized') and self._initialized:
|
|
256
|
+
self._check_underscore_key(name)
|
|
257
|
+
return
|
|
258
|
+
else:
|
|
259
|
+
# During initialization, allow private attributes to be set.
|
|
260
|
+
super().__setattr__(name, value)
|
|
261
|
+
return
|
|
262
|
+
|
|
263
|
+
# Block overwriting of existing methods/attributes
|
|
264
|
+
sanitized_name = self._sanitize_key(name)
|
|
265
|
+
self._check_underscore_key(sanitized_name)
|
|
266
|
+
if hasattr(self, sanitized_name):
|
|
267
|
+
_LOGGER.error(f"Cannot overwrite existing attribute or method '{sanitized_name}' ({name}).")
|
|
268
|
+
raise AttributeError()
|
|
269
|
+
|
|
270
|
+
# If all checks pass, treat it as a public path.
|
|
271
|
+
self._paths[sanitized_name] = Path(value)
|
|
214
272
|
|
|
215
273
|
|
|
216
274
|
def make_fullpath(
|
ml_tools/utilities.py
CHANGED
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import polars as pl
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple, overload
|
|
6
|
+
from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple, overload, TypeVar, get_origin, Type
|
|
7
7
|
import joblib
|
|
8
8
|
from joblib.externals.loky.process_executor import TerminatedWorkerError
|
|
9
9
|
from .path_manager import sanitize_filename, make_fullpath, list_csv_paths, list_files_by_extension, list_subdirectories
|
|
@@ -444,16 +444,32 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
|
|
|
444
444
|
_LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
|
|
445
445
|
return None
|
|
446
446
|
|
|
447
|
-
|
|
448
|
-
|
|
447
|
+
# Define a TypeVar to link the expected type to the return type of deserialization
|
|
448
|
+
T = TypeVar('T')
|
|
449
|
+
|
|
450
|
+
def deserialize_object(
|
|
451
|
+
filepath: Union[str, Path],
|
|
452
|
+
expected_type: Optional[Type[T]] = None,
|
|
453
|
+
verbose: bool = True,
|
|
454
|
+
raise_on_error: bool = True
|
|
455
|
+
) -> Optional[T]:
|
|
449
456
|
"""
|
|
450
457
|
Loads a serialized object from a .joblib file.
|
|
451
458
|
|
|
452
459
|
Parameters:
|
|
453
460
|
filepath (str | Path): Full path to the serialized .joblib file.
|
|
461
|
+
expected_type (Type[T] | None): The expected type of the object.
|
|
462
|
+
If provided, the function raises a TypeError if the loaded object
|
|
463
|
+
is not an instance of this type. It correctly handles generics
|
|
464
|
+
like `list[str]` by checking the base type (e.g., `list`).
|
|
465
|
+
Defaults to None, which skips the type check.
|
|
466
|
+
verbose (bool): If True, logs success messages.
|
|
467
|
+
raise_on_error (bool): If True, raises exceptions on errors. If False, returns None instead.
|
|
454
468
|
|
|
455
469
|
Returns:
|
|
456
|
-
(Any | None): The deserialized Python object,
|
|
470
|
+
(Any | None): The deserialized Python object, which will match the
|
|
471
|
+
`expected_type` if provided. Returns None if an error
|
|
472
|
+
occurs and `raise_on_error` is False.
|
|
457
473
|
"""
|
|
458
474
|
true_filepath = make_fullpath(filepath)
|
|
459
475
|
|
|
@@ -465,8 +481,26 @@ def deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_e
|
|
|
465
481
|
raise e
|
|
466
482
|
return None
|
|
467
483
|
else:
|
|
484
|
+
# --- Type Validation Step ---
|
|
485
|
+
if expected_type:
|
|
486
|
+
# get_origin handles generics (e.g., list[str] -> list)
|
|
487
|
+
# If it's not a generic, get_origin returns None, so we use the type itself.
|
|
488
|
+
type_to_check = get_origin(expected_type) or expected_type
|
|
489
|
+
|
|
490
|
+
# Can't do an isinstance check on 'Any', skip it.
|
|
491
|
+
if type_to_check is not Any and not isinstance(obj, type_to_check):
|
|
492
|
+
error_msg = (
|
|
493
|
+
f"Type mismatch: Expected an instance of '{expected_type}', "
|
|
494
|
+
f"but found '{type(obj)}' in '{true_filepath}'."
|
|
495
|
+
)
|
|
496
|
+
_LOGGER.error(error_msg)
|
|
497
|
+
if raise_on_error:
|
|
498
|
+
raise TypeError()
|
|
499
|
+
return None
|
|
500
|
+
|
|
468
501
|
if verbose:
|
|
469
|
-
_LOGGER.info(f"Loaded object of type '{type(obj)}'.")
|
|
502
|
+
_LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
|
|
503
|
+
|
|
470
504
|
return obj
|
|
471
505
|
|
|
472
506
|
|
|
File without changes
|
{dragon_ml_toolbox-10.10.1.dist-info → dragon_ml_toolbox-10.11.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|