dragon-ml-toolbox 10.10.1__tar.gz → 10.11.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (41) hide show
  1. {dragon_ml_toolbox-10.10.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-10.11.1}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/path_manager.py +101 -43
  4. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/utilities.py +39 -5
  5. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/pyproject.toml +1 -1
  6. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/LICENSE +0 -0
  7. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/LICENSE-THIRD-PARTY.md +0 -0
  8. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/README.md +0 -0
  9. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  10. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  11. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  12. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  13. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ETL_cleaning.py +0 -0
  14. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ETL_engineering.py +0 -0
  15. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/GUI_tools.py +0 -0
  16. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/MICE_imputation.py +0 -0
  17. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_callbacks.py +0 -0
  18. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_datasetmaster.py +0 -0
  19. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_evaluation.py +0 -0
  20. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_evaluation_multi.py +0 -0
  21. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_inference.py +0 -0
  22. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_models.py +0 -0
  23. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_optimization.py +0 -0
  24. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_scaler.py +0 -0
  25. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ML_trainer.py +0 -0
  26. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/PSO_optimization.py +0 -0
  27. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/RNN_forecast.py +0 -0
  28. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/SQL.py +0 -0
  29. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/VIF_factor.py +0 -0
  30. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/__init__.py +0 -0
  31. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/_logger.py +0 -0
  32. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/_script_info.py +0 -0
  33. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/custom_logger.py +0 -0
  34. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/data_exploration.py +0 -0
  35. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ensemble_evaluation.py +0 -0
  36. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ensemble_inference.py +0 -0
  37. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/ensemble_learning.py +0 -0
  38. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/handle_excel.py +0 -0
  39. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/keys.py +0 -0
  40. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/ml_tools/optimization_tools.py +0 -0
  41. {dragon_ml_toolbox-10.10.1 → dragon_ml_toolbox-10.11.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.10.1
3
+ Version: 10.11.1
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 10.10.1
3
+ Version: 10.11.1
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -23,13 +23,33 @@ class PathManager:
23
23
  "path database". It supports both development mode and applications
24
24
  bundled with Pyinstaller or Nuitka.
25
25
 
26
- Supports python dictionary syntax.
26
+ All keys provided to the manager are automatically sanitized to ensure
27
+ they are valid Python identifiers. This allows for clean, attribute-style
28
+ access. The sanitization process involves replacing whitespace with
29
+ underscores and removing special characters.
27
30
  """
28
31
  def __init__(
29
32
  self,
30
33
  anchor_file: str,
31
34
  base_directories: Optional[List[str]] = None
32
35
  ):
36
+ """
37
+ Sets up the core paths for a project by anchoring to a specific file.
38
+
39
+ The manager automatically registers a 'ROOT' path, which points to the
40
+ root of the package, and can pre-register common subdirectories found
41
+ directly within that root.
42
+
43
+ Args:
44
+ anchor_file (str): The path to a file within your package, typically
45
+ the `__file__` of the script where PathManager
46
+ is instantiated. This is used to locate the
47
+ package root directory.
48
+ base_directories (List[str] | None): An optional list of strings,
49
+ where each string is the name
50
+ of a subdirectory to register
51
+ relative to the package root.
52
+ """
33
53
  resolved_anchor_path = Path(anchor_file).resolve()
34
54
  self._package_name = resolved_anchor_path.parent.name
35
55
  self._is_bundled, bundle_root = self._get_bundle_root()
@@ -43,13 +63,17 @@ class PathManager:
43
63
  package_root = resolved_anchor_path.parent
44
64
 
45
65
  # Register the root of the package itself
46
- self._paths["ROOT"] = package_root
66
+ self.ROOT = package_root
47
67
 
48
68
  # Register all the base directories
49
69
  if base_directories:
50
70
  for dir_name in base_directories:
51
- # This logic works for both dev mode and bundled mode
52
- self._paths[dir_name] = package_root / dir_name
71
+ sanitized_dir_name = self._sanitize_key(dir_name)
72
+ self._check_underscore_key(sanitized_dir_name)
73
+ setattr(self, sanitized_dir_name, package_root / sanitized_dir_name)
74
+
75
+ # Signal that initialization is complete.
76
+ self._initialized = True
53
77
 
54
78
  def _get_bundle_root(self) -> tuple[bool, Optional[str]]:
55
79
  """
@@ -72,47 +96,35 @@ class PathManager:
72
96
  # --- Not Bundled ---
73
97
  else:
74
98
  return False, None
99
+
100
+ def _check_underscore_key(self, key: str) -> None:
101
+ if key.startswith("_"):
102
+ _LOGGER.error(f"Path key '{key}' cannot start with underscores.")
103
+ raise ValueError()
75
104
 
76
- def get(self, key: str) -> Path:
77
- """
78
- Retrieves a stored path by its key.
79
-
80
- Args:
81
- key (str): The key of the path to retrieve.
82
-
83
- Returns:
84
- Path: The resolved, absolute Path object.
85
-
86
- Raises:
87
- KeyError: If the key is not found in the manager.
88
- """
89
- try:
90
- return self._paths[key]
91
- except KeyError:
92
- _LOGGER.error(f"Path key '{key}' not found.")
93
- raise
94
-
95
- def update(self, new_paths: Dict[str, Union[str, Path]], overwrite: bool = False) -> None:
105
+ def update(self, new_paths: Dict[str, Union[str, Path]]) -> None:
96
106
  """
97
- Adds new paths or overwrites existing ones in the manager.
107
+ Adds new paths in the manager.
98
108
 
99
109
  Args:
100
110
  new_paths (Dict[str, Union[str, Path]]): A dictionary where keys are
101
111
  the identifiers and values are the
102
- Path objects or strings to store.
103
- overwrite (bool): If False (default), raises a KeyError if any
104
- key in new_paths already exists. If True,
105
- allows overwriting existing keys.
112
+ Path objects to store.
106
113
  """
107
- if not overwrite:
108
- for key in new_paths:
109
- if key in self._paths:
110
- _LOGGER.error(f"Path key '{key}' already exists in the manager. To replace it, call update() with overwrite=True.")
111
- raise KeyError
112
-
113
- # Resolve any string paths to Path objects before storing
114
- resolved_new_paths = {k: Path(v) for k, v in new_paths.items()}
115
- self._paths.update(resolved_new_paths)
114
+ # Pre-check
115
+ for key in new_paths:
116
+ sanitized_key = self._sanitize_key(key)
117
+ self._check_underscore_key(sanitized_key)
118
+ if hasattr(self, sanitized_key):
119
+ _LOGGER.error(f"Cannot add path for key '{sanitized_key}' ({key}): an attribute with this name already exists.")
120
+ raise KeyError()
121
+
122
+ # If no conflicts, add new paths
123
+ for key, value in new_paths.items():
124
+ self.__setattr__(key, value)
125
+
126
+ def _sanitize_key(self, key: str):
127
+ return sanitize_filename(key)
116
128
 
117
129
  def make_dirs(self, keys: Optional[List[str]] = None, verbose: bool = False) -> None:
118
130
  """
@@ -147,7 +159,7 @@ class PathManager:
147
159
  if path.suffix: # It's a file, not a directory
148
160
  continue
149
161
 
150
- # --- THE CRITICAL CHECK ---
162
+ # --- CRITICAL CHECK ---
151
163
  # Determine if the path is inside the main application package.
152
164
  is_internal_path = package_root and path.is_relative_to(package_root)
153
165
 
@@ -186,15 +198,20 @@ class PathManager:
186
198
  # --- Dictionary-Style Methods ---
187
199
  def __getitem__(self, key: str) -> Path:
188
200
  """Allows dictionary-style getting, e.g., PM['my_key']"""
189
- return self.get(key)
201
+ return self.__getattr__(key)
190
202
 
191
203
  def __setitem__(self, key: str, value: Union[str, Path]):
192
- """Allows dictionary-style setting, does not allow overwriting, e.g., PM['my_key'] = path"""
193
- self.update({key: value}, overwrite=False)
204
+ """Allows dictionary-style setting, e.g., PM['my_key'] = path"""
205
+ sanitized_key = self._sanitize_key(key)
206
+ self._check_underscore_key(sanitized_key)
207
+ self.__setattr__(sanitized_key, value)
194
208
 
195
209
  def __contains__(self, key: str) -> bool:
196
210
  """Allows checking for a key's existence, e.g., if 'my_key' in PM"""
197
- return key in self._paths
211
+ sanitized_key = self._sanitize_key(key)
212
+ true_false = sanitized_key in self._paths
213
+ # print(f"key {sanitized_key} in current path dictionary keys: {true_false}")
214
+ return true_false
198
215
 
199
216
  def __len__(self) -> int:
200
217
  """Allows getting the number of paths, e.g., len(PM)"""
@@ -211,6 +228,47 @@ class PathManager:
211
228
  def items(self):
212
229
  """Returns all registered (key, Path) pairs."""
213
230
  return self._paths.items()
231
+
232
+ def __getattr__(self, name: str) -> Path:
233
+ """
234
+ Allows attribute-style access to paths, e.g., PM.data.
235
+ """
236
+ # Block access to private attributes
237
+ if name.startswith('_'):
238
+ _LOGGER.error(f"Access to private attribute '{name}' is not allowed, remove leading underscore.")
239
+ raise AttributeError()
240
+
241
+ sanitized_name = self._sanitize_key(name)
242
+
243
+ try:
244
+ # Look for the key in our internal dictionary
245
+ return self._paths[sanitized_name]
246
+ except KeyError:
247
+ # If not found, raise an AttributeError
248
+ _LOGGER.error(f"'{type(self).__name__}' object has no attribute or path key '{sanitized_name}'")
249
+ raise AttributeError()
250
+
251
+ def __setattr__(self, name: str, value: Union[str, Path]):
252
+ """Allows attribute-style setting of paths, e.g., PM.data = 'path/to/data'."""
253
+ # Check for internal attributes
254
+ if name.startswith('_'):
255
+ if hasattr(self, '_initialized') and self._initialized:
256
+ self._check_underscore_key(name)
257
+ return
258
+ else:
259
+ # During initialization, allow private attributes to be set.
260
+ super().__setattr__(name, value)
261
+ return
262
+
263
+ # Block overwriting of existing methods/attributes
264
+ sanitized_name = self._sanitize_key(name)
265
+ self._check_underscore_key(sanitized_name)
266
+ if hasattr(self, sanitized_name):
267
+ _LOGGER.error(f"Cannot overwrite existing attribute or method '{sanitized_name}' ({name}).")
268
+ raise AttributeError()
269
+
270
+ # If all checks pass, treat it as a public path.
271
+ self._paths[sanitized_name] = Path(value)
214
272
 
215
273
 
216
274
  def make_fullpath(
@@ -3,7 +3,7 @@ import numpy as np
3
3
  import pandas as pd
4
4
  import polars as pl
5
5
  from pathlib import Path
6
- from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple, overload
6
+ from typing import Literal, Union, Sequence, Optional, Any, Iterator, Tuple, overload, TypeVar, get_origin, Type
7
7
  import joblib
8
8
  from joblib.externals.loky.process_executor import TerminatedWorkerError
9
9
  from .path_manager import sanitize_filename, make_fullpath, list_csv_paths, list_files_by_extension, list_subdirectories
@@ -444,16 +444,32 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
444
444
  _LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
445
445
  return None
446
446
 
447
-
448
- def deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_error: bool=True) -> Optional[Any]:
447
+ # Define a TypeVar to link the expected type to the return type of deserialization
448
+ T = TypeVar('T')
449
+
450
+ def deserialize_object(
451
+ filepath: Union[str, Path],
452
+ expected_type: Optional[Type[T]] = None,
453
+ verbose: bool = True,
454
+ raise_on_error: bool = True
455
+ ) -> Optional[T]:
449
456
  """
450
457
  Loads a serialized object from a .joblib file.
451
458
 
452
459
  Parameters:
453
460
  filepath (str | Path): Full path to the serialized .joblib file.
461
+ expected_type (Type[T] | None): The expected type of the object.
462
+ If provided, the function raises a TypeError if the loaded object
463
+ is not an instance of this type. It correctly handles generics
464
+ like `list[str]` by checking the base type (e.g., `list`).
465
+ Defaults to None, which skips the type check.
466
+ verbose (bool): If True, logs success messages.
467
+ raise_on_error (bool): If True, raises exceptions on errors. If False, returns None instead.
454
468
 
455
469
  Returns:
456
- (Any | None): The deserialized Python object, or None if loading fails.
470
+ (Any | None): The deserialized Python object, which will match the
471
+ `expected_type` if provided. Returns None if an error
472
+ occurs and `raise_on_error` is False.
457
473
  """
458
474
  true_filepath = make_fullpath(filepath)
459
475
 
@@ -465,8 +481,26 @@ def deserialize_object(filepath: Union[str,Path], verbose: bool=True, raise_on_e
465
481
  raise e
466
482
  return None
467
483
  else:
484
+ # --- Type Validation Step ---
485
+ if expected_type:
486
+ # get_origin handles generics (e.g., list[str] -> list)
487
+ # If it's not a generic, get_origin returns None, so we use the type itself.
488
+ type_to_check = get_origin(expected_type) or expected_type
489
+
490
+ # Can't do an isinstance check on 'Any', skip it.
491
+ if type_to_check is not Any and not isinstance(obj, type_to_check):
492
+ error_msg = (
493
+ f"Type mismatch: Expected an instance of '{expected_type}', "
494
+ f"but found '{type(obj)}' in '{true_filepath}'."
495
+ )
496
+ _LOGGER.error(error_msg)
497
+ if raise_on_error:
498
+ raise TypeError()
499
+ return None
500
+
468
501
  if verbose:
469
- _LOGGER.info(f"Loaded object of type '{type(obj)}'.")
502
+ _LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
503
+
470
504
  return obj
471
505
 
472
506
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "10.10.1"
3
+ version = "10.11.1"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }