dragon-ml-toolbox 12.9.2__tar.gz → 12.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {dragon_ml_toolbox-12.9.2/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.11.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/custom_logger.py +125 -1
  4. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/serde.py +5 -13
  5. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/pyproject.toml +1 -1
  6. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/LICENSE +0 -0
  7. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/LICENSE-THIRD-PARTY.md +0 -0
  8. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/README.md +0 -0
  9. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  10. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  11. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  12. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  13. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ETL_cleaning.py +0 -0
  14. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ETL_engineering.py +0 -0
  15. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/GUI_tools.py +0 -0
  16. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/MICE_imputation.py +0 -0
  17. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_callbacks.py +0 -0
  18. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_datasetmaster.py +0 -0
  19. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_evaluation.py +0 -0
  20. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_evaluation_multi.py +0 -0
  21. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_inference.py +0 -0
  22. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_models.py +0 -0
  23. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_optimization.py +0 -0
  24. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_scaler.py +0 -0
  25. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_simple_optimization.py +0 -0
  26. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_trainer.py +0 -0
  27. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ML_utilities.py +0 -0
  28. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/PSO_optimization.py +0 -0
  29. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/RNN_forecast.py +0 -0
  30. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/SQL.py +0 -0
  31. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/VIF_factor.py +0 -0
  32. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/__init__.py +0 -0
  33. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/_logger.py +0 -0
  34. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/_script_info.py +0 -0
  35. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/constants.py +0 -0
  36. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/data_exploration.py +0 -0
  37. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ensemble_evaluation.py +0 -0
  38. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ensemble_inference.py +0 -0
  39. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/ensemble_learning.py +0 -0
  40. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/handle_excel.py +0 -0
  41. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/keys.py +0 -0
  42. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/math_utilities.py +0 -0
  43. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/optimization_tools.py +0 -0
  44. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/path_manager.py +0 -0
  45. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/ml_tools/utilities.py +0 -0
  46. {dragon_ml_toolbox-12.9.2 → dragon_ml_toolbox-12.11.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.9.2
3
+ Version: 12.11.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.9.2
3
+ Version: 12.11.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -4,6 +4,8 @@ from typing import Union, List, Dict, Any
4
4
  import traceback
5
5
  import json
6
6
  import csv
7
+ from itertools import zip_longest
8
+ from collections import Counter
7
9
 
8
10
  from .path_manager import sanitize_filename, make_fullpath
9
11
  from ._script_info import _script_info
@@ -13,7 +15,8 @@ from ._logger import _LOGGER
13
15
  __all__ = [
14
16
  "custom_logger",
15
17
  "save_list_strings",
16
- "load_list_strings"
18
+ "load_list_strings",
19
+ "compare_lists"
17
20
  ]
18
21
 
19
22
 
@@ -177,5 +180,126 @@ def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[st
177
180
  return loaded_strings
178
181
 
179
182
 
183
+ class _RobustEncoder(json.JSONEncoder):
184
+ """
185
+ Custom JSON encoder to handle non-serializable objects.
186
+
187
+ This handles:
188
+ 1. `type` objects (e.g., <class 'int'>) which result from
189
+ `check_type_only=True`.
190
+ 2. Any other custom class or object by falling back to its
191
+ string representation.
192
+ """
193
+ def default(self, o):
194
+ if isinstance(o, type):
195
+ return str(o)
196
+ try:
197
+ return super().default(o)
198
+ except TypeError:
199
+ return str(o)
200
+
201
+ def compare_lists(
202
+ list_A: list,
203
+ list_B: list,
204
+ save_dir: Union[str, Path],
205
+ strict: bool = False,
206
+ check_type_only: bool = False
207
+ ) -> dict:
208
+ """
209
+ Compares two lists and saves a JSON report of the differences.
210
+
211
+ Args:
212
+ list_A (list): The first list to compare.
213
+ list_B (list): The second list to compare.
214
+ save_dir (str | Path): The directory where the resulting report will be saved.
215
+ strict (bool):
216
+ - If False: Performs a "bag" comparison. Order does not matter, but duplicates do.
217
+ - If True: Performs a strict, positional comparison.
218
+
219
+ check_type_only (bool):
220
+ - If False: Compares items using `==` (`__eq__` operator).
221
+ - If True: Compares only the `type()` of the items.
222
+
223
+ Returns:
224
+ dict: A dictionary detailing the differences. (saved to `save_dir`).
225
+ """
226
+ MISSING_A_KEY = "missing_in_A"
227
+ MISSING_B_KEY = "missing_in_B"
228
+ MISMATCH_KEY = "mismatch"
229
+
230
+ results: dict[str, list] = {MISSING_A_KEY: [], MISSING_B_KEY: []}
231
+
232
+ # make directory
233
+ save_path = make_fullpath(input_path=save_dir, make=True, enforce="directory")
234
+
235
+ if strict:
236
+ # --- STRICT (Positional) Mode ---
237
+ results[MISMATCH_KEY] = []
238
+ sentinel = object()
239
+
240
+ if check_type_only:
241
+ compare_func = lambda a, b: type(a) == type(b)
242
+ else:
243
+ compare_func = lambda a, b: a == b
244
+
245
+ for index, (item_a, item_b) in enumerate(
246
+ zip_longest(list_A, list_B, fillvalue=sentinel)
247
+ ):
248
+ if item_a is sentinel:
249
+ results[MISSING_A_KEY].append({"index": index, "item": item_b})
250
+ elif item_b is sentinel:
251
+ results[MISSING_B_KEY].append({"index": index, "item": item_a})
252
+ elif not compare_func(item_a, item_b):
253
+ results[MISMATCH_KEY].append(
254
+ {
255
+ "index": index,
256
+ "list_A_item": item_a,
257
+ "list_B_item": item_b,
258
+ }
259
+ )
260
+
261
+ else:
262
+ # --- NON-STRICT (Bag) Mode ---
263
+ if check_type_only:
264
+ # Types are hashable, we can use Counter (O(N))
265
+ types_A_counts = Counter(type(item) for item in list_A)
266
+ types_B_counts = Counter(type(item) for item in list_B)
267
+
268
+ diff_A_B = types_A_counts - types_B_counts
269
+ for item_type, count in diff_A_B.items():
270
+ results[MISSING_B_KEY].extend([item_type] * count)
271
+
272
+ diff_B_A = types_B_counts - types_A_counts
273
+ for item_type, count in diff_B_A.items():
274
+ results[MISSING_A_KEY].extend([item_type] * count)
275
+
276
+ else:
277
+ # Items may be unhashable. Use O(N*M) .remove() method
278
+ temp_B = list(list_B)
279
+ missing_in_B = []
280
+
281
+ for item_a in list_A:
282
+ try:
283
+ temp_B.remove(item_a)
284
+ except ValueError:
285
+ missing_in_B.append(item_a)
286
+
287
+ results[MISSING_A_KEY] = temp_B
288
+ results[MISSING_B_KEY] = missing_in_B
289
+
290
+ # --- Save the Report ---
291
+ try:
292
+ full_path = save_path / "list_comparison.json"
293
+
294
+ # Write the report dictionary to the JSON file
295
+ with open(full_path, 'w', encoding='utf-8') as f:
296
+ json.dump(results, f, indent=4, cls=_RobustEncoder)
297
+
298
+ except Exception as e:
299
+ _LOGGER.error(f"Failed to save comparison report to {save_path}: \n{e}")
300
+
301
+ return results
302
+
303
+
180
304
  def info():
181
305
  _script_info(__all__)
@@ -83,8 +83,7 @@ def deserialize_object(
83
83
  filepath: Union[str, Path],
84
84
  expected_type: Optional[Type[T]] = None,
85
85
  verbose: bool = True,
86
- raise_on_error: bool = True
87
- ) -> Optional[T]:
86
+ ) -> T:
88
87
  """
89
88
  Loads a serialized object from a .joblib file.
90
89
 
@@ -96,22 +95,17 @@ def deserialize_object(
96
95
  like `list[str]` by checking the base type (e.g., `list`).
97
96
  Defaults to None, which skips the type check.
98
97
  verbose (bool): If True, logs success messages.
99
- raise_on_error (bool): If True, raises exceptions on errors. If False, returns None instead.
100
98
 
101
99
  Returns:
102
- (Any | None): The deserialized Python object, which will match the
103
- `expected_type` if provided. Returns None if an error
104
- occurs and `raise_on_error` is False.
100
+ (Any): The deserialized Python object, which will match the `expected_type` if provided.
105
101
  """
106
- true_filepath = make_fullpath(filepath)
102
+ true_filepath = make_fullpath(filepath, enforce="file")
107
103
 
108
104
  try:
109
105
  obj = joblib.load(true_filepath)
110
106
  except (IOError, OSError, EOFError, TypeError, ValueError) as e:
111
107
  _LOGGER.error(f"Failed to deserialize object from '{true_filepath}'.")
112
- if raise_on_error:
113
- raise e
114
- return None
108
+ raise e
115
109
  else:
116
110
  # --- Type Validation Step ---
117
111
  if expected_type:
@@ -126,9 +120,7 @@ def deserialize_object(
126
120
  f"but found '{type(obj)}' in '{true_filepath}'."
127
121
  )
128
122
  _LOGGER.error(error_msg)
129
- if raise_on_error:
130
- raise TypeError()
131
- return None
123
+ raise TypeError()
132
124
 
133
125
  if verbose:
134
126
  _LOGGER.info(f"Loaded object of type '{type(obj)}' from '{true_filepath}'.")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "12.9.2"
3
+ version = "12.11.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }