dragon-ml-toolbox 10.1.1__py3-none-any.whl → 14.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (48) hide show
  1. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/METADATA +38 -63
  2. dragon_ml_toolbox-14.2.0.dist-info/RECORD +48 -0
  3. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE +1 -1
  4. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +11 -0
  5. ml_tools/ETL_cleaning.py +175 -59
  6. ml_tools/ETL_engineering.py +506 -70
  7. ml_tools/GUI_tools.py +2 -1
  8. ml_tools/MICE_imputation.py +212 -7
  9. ml_tools/ML_callbacks.py +73 -40
  10. ml_tools/ML_datasetmaster.py +267 -284
  11. ml_tools/ML_evaluation.py +119 -58
  12. ml_tools/ML_evaluation_multi.py +107 -32
  13. ml_tools/ML_inference.py +15 -5
  14. ml_tools/ML_models.py +234 -170
  15. ml_tools/ML_models_advanced.py +323 -0
  16. ml_tools/ML_optimization.py +321 -97
  17. ml_tools/ML_scaler.py +10 -5
  18. ml_tools/ML_trainer.py +585 -40
  19. ml_tools/ML_utilities.py +528 -0
  20. ml_tools/ML_vision_datasetmaster.py +1315 -0
  21. ml_tools/ML_vision_evaluation.py +260 -0
  22. ml_tools/ML_vision_inference.py +428 -0
  23. ml_tools/ML_vision_models.py +627 -0
  24. ml_tools/ML_vision_transformers.py +58 -0
  25. ml_tools/PSO_optimization.py +10 -7
  26. ml_tools/RNN_forecast.py +2 -0
  27. ml_tools/SQL.py +22 -9
  28. ml_tools/VIF_factor.py +4 -3
  29. ml_tools/_ML_vision_recipe.py +88 -0
  30. ml_tools/__init__.py +1 -0
  31. ml_tools/_logger.py +0 -2
  32. ml_tools/_schema.py +96 -0
  33. ml_tools/constants.py +79 -0
  34. ml_tools/custom_logger.py +164 -16
  35. ml_tools/data_exploration.py +1092 -109
  36. ml_tools/ensemble_evaluation.py +48 -1
  37. ml_tools/ensemble_inference.py +6 -7
  38. ml_tools/ensemble_learning.py +4 -3
  39. ml_tools/handle_excel.py +1 -0
  40. ml_tools/keys.py +80 -0
  41. ml_tools/math_utilities.py +259 -0
  42. ml_tools/optimization_tools.py +198 -24
  43. ml_tools/path_manager.py +144 -45
  44. ml_tools/serde.py +192 -0
  45. ml_tools/utilities.py +287 -227
  46. dragon_ml_toolbox-10.1.1.dist-info/RECORD +0 -36
  47. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/WHEEL +0 -0
  48. {dragon_ml_toolbox-10.1.1.dist-info → dragon_ml_toolbox-14.2.0.dist-info}/top_level.txt +0 -0
ml_tools/custom_logger.py CHANGED
@@ -1,9 +1,12 @@
1
1
  from pathlib import Path
2
2
  from datetime import datetime
3
- from typing import Union, List, Dict, Any
3
+ from typing import Union, List, Dict, Any, Literal
4
4
  import traceback
5
5
  import json
6
6
  import csv
7
+ from itertools import zip_longest
8
+ from collections import Counter
9
+
7
10
  from .path_manager import sanitize_filename, make_fullpath
8
11
  from ._script_info import _script_info
9
12
  from ._logger import _LOGGER
@@ -12,7 +15,8 @@ from ._logger import _LOGGER
12
15
  __all__ = [
13
16
  "custom_logger",
14
17
  "save_list_strings",
15
- "load_list_strings"
18
+ "load_list_strings",
19
+ "compare_lists"
16
20
  ]
17
21
 
18
22
 
@@ -25,6 +29,8 @@ def custom_logger(
25
29
  ],
26
30
  save_directory: Union[str, Path],
27
31
  log_name: str,
32
+ add_timestamp: bool=True,
33
+ dict_as: Literal['auto', 'json', 'csv'] = 'auto',
28
34
  ) -> None:
29
35
  """
30
36
  Logs various data types to corresponding output formats:
@@ -32,10 +38,10 @@ def custom_logger(
32
38
  - list[Any] → .txt
33
39
  Each element is written on a new line.
34
40
 
35
- - dict[str, list[Any]] → .csv
41
+ - dict[str, list[Any]] → .csv (if dict_as='auto' or 'csv')
36
42
  Dictionary is treated as tabular data; keys become columns, values become rows.
37
43
 
38
- - dict[str, scalar] → .json
44
+ - dict[str, scalar] → .json (if dict_as='auto' or 'json')
39
45
  Dictionary is treated as structured data and serialized as JSON.
40
46
 
41
47
  - str → .log
@@ -45,29 +51,50 @@ def custom_logger(
45
51
  Full traceback is logged for debugging purposes.
46
52
 
47
53
  Args:
48
- data: The data to be logged. Must be one of the supported types.
49
- save_directory: Directory where the log will be saved. Created if it does not exist.
50
- log_name: Base name for the log file. Timestamp will be appended automatically.
54
+ data (Any): The data to be logged. Must be one of the supported types.
55
+ save_directory (str | Path): Directory where the log will be saved. Created if it does not exist.
56
+ log_name (str): Base name for the log file.
57
+ add_timestamp (bool): Whether to add a timestamp to the filename.
58
+ dict_as ('auto'|'json'|'csv'):
59
+ - 'auto': Guesses format (JSON or CSV) based on dictionary content.
60
+ - 'json': Forces .json format for any dictionary.
61
+ - 'csv': Forces .csv format. Will fail if dict values are not all lists.
51
62
 
52
63
  Raises:
53
64
  ValueError: If the data type is unsupported.
54
65
  """
55
66
  try:
67
+ if not isinstance(data, BaseException) and not data:
68
+ _LOGGER.warning("Empty data received. No log file will be saved.")
69
+ return
70
+
56
71
  save_path = make_fullpath(save_directory, make=True)
57
72
 
58
- timestamp = datetime.now().strftime(r"%Y%m%d_%H%M%S")
59
- log_name = sanitize_filename(log_name)
73
+ sanitized_log_name = sanitize_filename(log_name)
60
74
 
61
- base_path = save_path / f"{log_name}_{timestamp}"
62
-
75
+ if add_timestamp:
76
+ timestamp = datetime.now().strftime(r"%Y%m%d_%H%M%S")
77
+ base_path = save_path / f"{sanitized_log_name}_{timestamp}"
78
+ else:
79
+ base_path = save_path / sanitized_log_name
80
+
81
+ # Router
63
82
  if isinstance(data, list):
64
83
  _log_list_to_txt(data, base_path.with_suffix(".txt"))
65
84
 
66
85
  elif isinstance(data, dict):
67
- if all(isinstance(v, list) for v in data.values()):
68
- _log_dict_to_csv(data, base_path.with_suffix(".csv"))
69
- else:
86
+ if dict_as == 'json':
70
87
  _log_dict_to_json(data, base_path.with_suffix(".json"))
88
+
89
+ elif dict_as == 'csv':
90
+ # This will raise a ValueError if data is not all lists
91
+ _log_dict_to_csv(data, base_path.with_suffix(".csv"))
92
+
93
+ else: # 'auto' mode
94
+ if all(isinstance(v, list) for v in data.values()):
95
+ _log_dict_to_csv(data, base_path.with_suffix(".csv"))
96
+ else:
97
+ _log_dict_to_json(data, base_path.with_suffix(".json"))
71
98
 
72
99
  elif isinstance(data, str):
73
100
  _log_string_to_log(data, base_path.with_suffix(".log"))
@@ -79,7 +106,7 @@ def custom_logger(
79
106
  _LOGGER.error("Unsupported data type. Must be list, dict, str, or BaseException.")
80
107
  raise ValueError()
81
108
 
82
- _LOGGER.info(f"Log saved to: '{base_path}'")
109
+ _LOGGER.info(f"Log saved as: '{base_path.name}'")
83
110
 
84
111
  except Exception:
85
112
  _LOGGER.exception(f"Log not saved.")
@@ -171,10 +198,131 @@ def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[st
171
198
  raise ValueError()
172
199
 
173
200
  if verbose:
174
- _LOGGER.info(f"Text file loaded as list of strings.")
201
+ _LOGGER.info(f"Loaded '{target_path.name}' as list of strings.")
175
202
 
176
203
  return loaded_strings
177
204
 
178
205
 
206
+ class _RobustEncoder(json.JSONEncoder):
207
+ """
208
+ Custom JSON encoder to handle non-serializable objects.
209
+
210
+ This handles:
211
+ 1. `type` objects (e.g., <class 'int'>) which result from
212
+ `check_type_only=True`.
213
+ 2. Any other custom class or object by falling back to its
214
+ string representation.
215
+ """
216
+ def default(self, o):
217
+ if isinstance(o, type):
218
+ return str(o)
219
+ try:
220
+ return super().default(o)
221
+ except TypeError:
222
+ return str(o)
223
+
224
+ def compare_lists(
225
+ list_A: list,
226
+ list_B: list,
227
+ save_dir: Union[str, Path],
228
+ strict: bool = False,
229
+ check_type_only: bool = False
230
+ ) -> dict:
231
+ """
232
+ Compares two lists and saves a JSON report of the differences.
233
+
234
+ Args:
235
+ list_A (list): The first list to compare.
236
+ list_B (list): The second list to compare.
237
+ save_dir (str | Path): The directory where the resulting report will be saved.
238
+ strict (bool):
239
+ - If False: Performs a "bag" comparison. Order does not matter, but duplicates do.
240
+ - If True: Performs a strict, positional comparison.
241
+
242
+ check_type_only (bool):
243
+ - If False: Compares items using `==` (`__eq__` operator).
244
+ - If True: Compares only the `type()` of the items.
245
+
246
+ Returns:
247
+ dict: A dictionary detailing the differences. (saved to `save_dir`).
248
+ """
249
+ MISSING_A_KEY = "missing_in_A"
250
+ MISSING_B_KEY = "missing_in_B"
251
+ MISMATCH_KEY = "mismatch"
252
+
253
+ results: dict[str, list] = {MISSING_A_KEY: [], MISSING_B_KEY: []}
254
+
255
+ # make directory
256
+ save_path = make_fullpath(input_path=save_dir, make=True, enforce="directory")
257
+
258
+ if strict:
259
+ # --- STRICT (Positional) Mode ---
260
+ results[MISMATCH_KEY] = []
261
+ sentinel = object()
262
+
263
+ if check_type_only:
264
+ compare_func = lambda a, b: type(a) == type(b)
265
+ else:
266
+ compare_func = lambda a, b: a == b
267
+
268
+ for index, (item_a, item_b) in enumerate(
269
+ zip_longest(list_A, list_B, fillvalue=sentinel)
270
+ ):
271
+ if item_a is sentinel:
272
+ results[MISSING_A_KEY].append({"index": index, "item": item_b})
273
+ elif item_b is sentinel:
274
+ results[MISSING_B_KEY].append({"index": index, "item": item_a})
275
+ elif not compare_func(item_a, item_b):
276
+ results[MISMATCH_KEY].append(
277
+ {
278
+ "index": index,
279
+ "list_A_item": item_a,
280
+ "list_B_item": item_b,
281
+ }
282
+ )
283
+
284
+ else:
285
+ # --- NON-STRICT (Bag) Mode ---
286
+ if check_type_only:
287
+ # Types are hashable, we can use Counter (O(N))
288
+ types_A_counts = Counter(type(item) for item in list_A)
289
+ types_B_counts = Counter(type(item) for item in list_B)
290
+
291
+ diff_A_B = types_A_counts - types_B_counts
292
+ for item_type, count in diff_A_B.items():
293
+ results[MISSING_B_KEY].extend([item_type] * count)
294
+
295
+ diff_B_A = types_B_counts - types_A_counts
296
+ for item_type, count in diff_B_A.items():
297
+ results[MISSING_A_KEY].extend([item_type] * count)
298
+
299
+ else:
300
+ # Items may be unhashable. Use O(N*M) .remove() method
301
+ temp_B = list(list_B)
302
+ missing_in_B = []
303
+
304
+ for item_a in list_A:
305
+ try:
306
+ temp_B.remove(item_a)
307
+ except ValueError:
308
+ missing_in_B.append(item_a)
309
+
310
+ results[MISSING_A_KEY] = temp_B
311
+ results[MISSING_B_KEY] = missing_in_B
312
+
313
+ # --- Save the Report ---
314
+ try:
315
+ full_path = save_path / "list_comparison.json"
316
+
317
+ # Write the report dictionary to the JSON file
318
+ with open(full_path, 'w', encoding='utf-8') as f:
319
+ json.dump(results, f, indent=4, cls=_RobustEncoder)
320
+
321
+ except Exception as e:
322
+ _LOGGER.error(f"Failed to save comparison report to {save_path}: \n{e}")
323
+
324
+ return results
325
+
326
+
179
327
  def info():
180
328
  _script_info(__all__)