dragon-ml-toolbox 12.9.1__tar.gz → 12.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dragon-ml-toolbox might be problematic. Click here for more details.
- {dragon_ml_toolbox-12.9.1/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.10.0}/PKG-INFO +1 -1
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/custom_logger.py +125 -1
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/data_exploration.py +4 -4
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/pyproject.toml +1 -1
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/LICENSE +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/README.md +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ETL_cleaning.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ETL_engineering.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/GUI_tools.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/MICE_imputation.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_callbacks.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_datasetmaster.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_evaluation.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_evaluation_multi.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_inference.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_models.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_optimization.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_scaler.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_simple_optimization.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_trainer.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ML_utilities.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/PSO_optimization.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/RNN_forecast.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/SQL.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/VIF_factor.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/__init__.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/_logger.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/_script_info.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/constants.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ensemble_evaluation.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ensemble_inference.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/ensemble_learning.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/handle_excel.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/keys.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/math_utilities.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/optimization_tools.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/path_manager.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/serde.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/ml_tools/utilities.py +0 -0
- {dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/setup.cfg +0 -0
|
@@ -4,6 +4,8 @@ from typing import Union, List, Dict, Any
|
|
|
4
4
|
import traceback
|
|
5
5
|
import json
|
|
6
6
|
import csv
|
|
7
|
+
from itertools import zip_longest
|
|
8
|
+
from collections import Counter
|
|
7
9
|
|
|
8
10
|
from .path_manager import sanitize_filename, make_fullpath
|
|
9
11
|
from ._script_info import _script_info
|
|
@@ -13,7 +15,8 @@ from ._logger import _LOGGER
|
|
|
13
15
|
__all__ = [
|
|
14
16
|
"custom_logger",
|
|
15
17
|
"save_list_strings",
|
|
16
|
-
"load_list_strings"
|
|
18
|
+
"load_list_strings",
|
|
19
|
+
"compare_lists"
|
|
17
20
|
]
|
|
18
21
|
|
|
19
22
|
|
|
@@ -177,5 +180,126 @@ def load_list_strings(text_file: Union[str,Path], verbose: bool=True) -> list[st
|
|
|
177
180
|
return loaded_strings
|
|
178
181
|
|
|
179
182
|
|
|
183
|
+
class _RobustEncoder(json.JSONEncoder):
|
|
184
|
+
"""
|
|
185
|
+
Custom JSON encoder to handle non-serializable objects.
|
|
186
|
+
|
|
187
|
+
This handles:
|
|
188
|
+
1. `type` objects (e.g., <class 'int'>) which result from
|
|
189
|
+
`check_type_only=True`.
|
|
190
|
+
2. Any other custom class or object by falling back to its
|
|
191
|
+
string representation.
|
|
192
|
+
"""
|
|
193
|
+
def default(self, o):
|
|
194
|
+
if isinstance(o, type):
|
|
195
|
+
return str(o)
|
|
196
|
+
try:
|
|
197
|
+
return super().default(o)
|
|
198
|
+
except TypeError:
|
|
199
|
+
return str(o)
|
|
200
|
+
|
|
201
|
+
def compare_lists(
|
|
202
|
+
list_A: list,
|
|
203
|
+
list_B: list,
|
|
204
|
+
save_dir: Union[str, Path],
|
|
205
|
+
strict: bool = False,
|
|
206
|
+
check_type_only: bool = False
|
|
207
|
+
) -> dict:
|
|
208
|
+
"""
|
|
209
|
+
Compares two lists and saves a JSON report of the differences.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
list_A (list): The first list to compare.
|
|
213
|
+
list_B (list): The second list to compare.
|
|
214
|
+
save_dir (str | Path): The directory where the resulting report will be saved.
|
|
215
|
+
strict (bool):
|
|
216
|
+
- If False: Performs a "bag" comparison. Order does not matter, but duplicates do.
|
|
217
|
+
- If True: Performs a strict, positional comparison.
|
|
218
|
+
|
|
219
|
+
check_type_only (bool):
|
|
220
|
+
- If False: Compares items using `==` (`__eq__` operator).
|
|
221
|
+
- If True: Compares only the `type()` of the items.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
A dictionary detailing the differences. (saved to `save_dir`).
|
|
225
|
+
"""
|
|
226
|
+
MISSING_A_KEY = "missing_in_A"
|
|
227
|
+
MISSING_B_KEY = "missing_in_B"
|
|
228
|
+
MISMATCH_KEY = "mismatch"
|
|
229
|
+
|
|
230
|
+
results: dict[str, list] = {MISSING_A_KEY: [], MISSING_B_KEY: []}
|
|
231
|
+
|
|
232
|
+
# make directory
|
|
233
|
+
save_path = make_fullpath(input_path=save_dir, make=True, enforce="directory")
|
|
234
|
+
|
|
235
|
+
if strict:
|
|
236
|
+
# --- STRICT (Positional) Mode ---
|
|
237
|
+
results[MISMATCH_KEY] = []
|
|
238
|
+
sentinel = object()
|
|
239
|
+
|
|
240
|
+
if check_type_only:
|
|
241
|
+
compare_func = lambda a, b: type(a) == type(b)
|
|
242
|
+
else:
|
|
243
|
+
compare_func = lambda a, b: a == b
|
|
244
|
+
|
|
245
|
+
for index, (item_a, item_b) in enumerate(
|
|
246
|
+
zip_longest(list_A, list_B, fillvalue=sentinel)
|
|
247
|
+
):
|
|
248
|
+
if item_a is sentinel:
|
|
249
|
+
results[MISSING_A_KEY].append({"index": index, "item": item_b})
|
|
250
|
+
elif item_b is sentinel:
|
|
251
|
+
results[MISSING_B_KEY].append({"index": index, "item": item_a})
|
|
252
|
+
elif not compare_func(item_a, item_b):
|
|
253
|
+
results[MISMATCH_KEY].append(
|
|
254
|
+
{
|
|
255
|
+
"index": index,
|
|
256
|
+
"list_A_item": item_a,
|
|
257
|
+
"list_B_item": item_b,
|
|
258
|
+
}
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
else:
|
|
262
|
+
# --- NON-STRICT (Bag) Mode ---
|
|
263
|
+
if check_type_only:
|
|
264
|
+
# Types are hashable, we can use Counter (O(N))
|
|
265
|
+
types_A_counts = Counter(type(item) for item in list_A)
|
|
266
|
+
types_B_counts = Counter(type(item) for item in list_B)
|
|
267
|
+
|
|
268
|
+
diff_A_B = types_A_counts - types_B_counts
|
|
269
|
+
for item_type, count in diff_A_B.items():
|
|
270
|
+
results[MISSING_B_KEY].extend([item_type] * count)
|
|
271
|
+
|
|
272
|
+
diff_B_A = types_B_counts - types_A_counts
|
|
273
|
+
for item_type, count in diff_B_A.items():
|
|
274
|
+
results[MISSING_A_KEY].extend([item_type] * count)
|
|
275
|
+
|
|
276
|
+
else:
|
|
277
|
+
# Items may be unhashable. Use O(N*M) .remove() method
|
|
278
|
+
temp_B = list(list_B)
|
|
279
|
+
missing_in_B = []
|
|
280
|
+
|
|
281
|
+
for item_a in list_A:
|
|
282
|
+
try:
|
|
283
|
+
temp_B.remove(item_a)
|
|
284
|
+
except ValueError:
|
|
285
|
+
missing_in_B.append(item_a)
|
|
286
|
+
|
|
287
|
+
results[MISSING_A_KEY] = temp_B
|
|
288
|
+
results[MISSING_B_KEY] = missing_in_B
|
|
289
|
+
|
|
290
|
+
# --- Save the Report ---
|
|
291
|
+
try:
|
|
292
|
+
full_path = save_path / "list_comparison.json"
|
|
293
|
+
|
|
294
|
+
# Write the report dictionary to the JSON file
|
|
295
|
+
with open(full_path, 'w', encoding='utf-8') as f:
|
|
296
|
+
json.dump(results, f, indent=4, cls=_RobustEncoder)
|
|
297
|
+
|
|
298
|
+
except Exception as e:
|
|
299
|
+
_LOGGER.error(f"Failed to save comparison report to {save_path}: \n{e}")
|
|
300
|
+
|
|
301
|
+
return results
|
|
302
|
+
|
|
303
|
+
|
|
180
304
|
def info():
|
|
181
305
|
_script_info(__all__)
|
|
@@ -1024,7 +1024,7 @@ def reconstruct_one_hot(
|
|
|
1024
1024
|
df: pd.DataFrame,
|
|
1025
1025
|
features_to_reconstruct: List[Union[str, Tuple[str, Optional[str]]]],
|
|
1026
1026
|
separator: str = '_',
|
|
1027
|
-
baseline_category_name: str = "Other",
|
|
1027
|
+
baseline_category_name: Optional[str] = "Other",
|
|
1028
1028
|
drop_original: bool = True,
|
|
1029
1029
|
verbose: bool = True
|
|
1030
1030
|
) -> pd.DataFrame:
|
|
@@ -1056,7 +1056,7 @@ def reconstruct_one_hot(
|
|
|
1056
1056
|
separator (str):
|
|
1057
1057
|
The character separating the base name from the categorical value in
|
|
1058
1058
|
the column names (e.g., '_' in 'B_a').
|
|
1059
|
-
baseline_category_name (str):
|
|
1059
|
+
baseline_category_name (str | None):
|
|
1060
1060
|
The baseline category name to use by default if it is not explicitly provided.
|
|
1061
1061
|
drop_original (bool):
|
|
1062
1062
|
If True, the original one-hot encoded columns will be dropped from
|
|
@@ -1081,8 +1081,8 @@ def reconstruct_one_hot(
|
|
|
1081
1081
|
_LOGGER.error("Input must be a pandas DataFrame.")
|
|
1082
1082
|
raise TypeError()
|
|
1083
1083
|
|
|
1084
|
-
if not isinstance(baseline_category_name, str):
|
|
1085
|
-
_LOGGER.error("The baseline_category must be a string.")
|
|
1084
|
+
if not (baseline_category_name is None or isinstance(baseline_category_name, str)):
|
|
1085
|
+
_LOGGER.error("The baseline_category must be None or a string.")
|
|
1086
1086
|
raise TypeError()
|
|
1087
1087
|
|
|
1088
1088
|
new_df = df.copy()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/requires.txt
RENAMED
|
File without changes
|
{dragon_ml_toolbox-12.9.1 → dragon_ml_toolbox-12.10.0}/dragon_ml_toolbox.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|