dragon-ml-toolbox 20.14.0__py3-none-any.whl → 20.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/METADATA +1 -1
- {dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/RECORD +8 -8
- ml_tools/data_exploration/_features.py +20 -4
- ml_tools/data_exploration/_plotting.py +3 -0
- {dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/licenses/LICENSE-THIRD-PARTY.md +0 -0
- {dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 20.14.
|
|
3
|
+
Version: 20.14.1
|
|
4
4
|
Summary: Complete pipelines and helper tools for data science and machine learning projects.
|
|
5
5
|
Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
dragon_ml_toolbox-20.14.
|
|
2
|
-
dragon_ml_toolbox-20.14.
|
|
1
|
+
dragon_ml_toolbox-20.14.1.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
|
|
2
|
+
dragon_ml_toolbox-20.14.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
|
|
3
3
|
ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
|
|
5
5
|
ml_tools/ETL_cleaning/__init__.py,sha256=gLRHF-qzwpqKTvbbn9chIQELeUDh_XGpBRX28j-5IqI,545
|
|
@@ -107,8 +107,8 @@ ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHz
|
|
|
107
107
|
ml_tools/data_exploration/__init__.py,sha256=XNA8gcRx5ifrv092HA7HSpek8havlk_3RZi9aq9dSjg,1957
|
|
108
108
|
ml_tools/data_exploration/_analysis.py,sha256=JSoFJSkv4-_v9YxxmjHZ_PeFRneDENjSEo2sy_uC4oY,14196
|
|
109
109
|
ml_tools/data_exploration/_cleaning.py,sha256=pAZOXgGK35j7O8q6cnyTwYK1GLNnD04A8p2fSyMB1mg,20906
|
|
110
|
-
ml_tools/data_exploration/_features.py,sha256=
|
|
111
|
-
ml_tools/data_exploration/_plotting.py,sha256=
|
|
110
|
+
ml_tools/data_exploration/_features.py,sha256=_aBMW7eqSm6oUj54ftidsv9zdywOkc1eyZgITb82XF8,29237
|
|
111
|
+
ml_tools/data_exploration/_plotting.py,sha256=Vg9qS46akbAyrZAgBrPWg2p29V5vqqY4Bk4SHwZLZNI,19995
|
|
112
112
|
ml_tools/data_exploration/_schema_ops.py,sha256=Fd6fBGGv4OpxmJ1HG9pith6QL90z0tzssCvzkQxlEEQ,11083
|
|
113
113
|
ml_tools/ensemble_evaluation/__init__.py,sha256=t4Gr8EGEk8RLatyc92-S0BzbQvdvodzoF-qDAH2qjVg,546
|
|
114
114
|
ml_tools/ensemble_evaluation/_ensemble_evaluation.py,sha256=-sX9cLMaa0FOQDikmVv2lsCYtQ56Kftd3tILnNej0Hg,28346
|
|
@@ -143,7 +143,7 @@ ml_tools/utilities/__init__.py,sha256=h4lE3SQstg-opcQj6QSKhu-HkqSbmHExsWoM9vC5D9
|
|
|
143
143
|
ml_tools/utilities/_translate.py,sha256=U8hRPa3PmTpIf9n9yR3gBGmp_hkcsjQLwjAHSHc0WHs,10325
|
|
144
144
|
ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
|
|
145
145
|
ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
|
|
146
|
-
dragon_ml_toolbox-20.14.
|
|
147
|
-
dragon_ml_toolbox-20.14.
|
|
148
|
-
dragon_ml_toolbox-20.14.
|
|
149
|
-
dragon_ml_toolbox-20.14.
|
|
146
|
+
dragon_ml_toolbox-20.14.1.dist-info/METADATA,sha256=oV6v5gFhRVLpuJ3HgL7Qpn8_Dgk9DGkYcOjSfl2kIh0,7889
|
|
147
|
+
dragon_ml_toolbox-20.14.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
148
|
+
dragon_ml_toolbox-20.14.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
149
|
+
dragon_ml_toolbox-20.14.1.dist-info/RECORD,,
|
|
@@ -168,6 +168,13 @@ def split_continuous_categorical_targets(
|
|
|
168
168
|
f" - Categorical: {df_categorical.shape}\n"
|
|
169
169
|
f" - Targets: {df_targets.shape}"
|
|
170
170
|
)
|
|
171
|
+
|
|
172
|
+
if isinstance(df_continuous, pd.Series):
|
|
173
|
+
df_continuous = df_continuous.to_frame()
|
|
174
|
+
if isinstance(df_categorical, pd.Series):
|
|
175
|
+
df_categorical = df_categorical.to_frame()
|
|
176
|
+
if isinstance(df_targets, pd.Series):
|
|
177
|
+
df_targets = df_targets.to_frame()
|
|
171
178
|
|
|
172
179
|
return df_continuous, df_categorical, df_targets
|
|
173
180
|
|
|
@@ -271,6 +278,7 @@ def encode_classification_target(
|
|
|
271
278
|
df: pd.DataFrame,
|
|
272
279
|
target_col: str,
|
|
273
280
|
save_dir: Union[str, Path],
|
|
281
|
+
suffix: str = "",
|
|
274
282
|
verbose: int = 2
|
|
275
283
|
) -> tuple[pd.DataFrame, dict[str, int]]:
|
|
276
284
|
"""
|
|
@@ -283,6 +291,7 @@ def encode_classification_target(
|
|
|
283
291
|
df (pd.DataFrame): Input DataFrame.
|
|
284
292
|
target_col (str): Name of the target column to encode.
|
|
285
293
|
save_dir (str | Path): Directory where the class map JSON will be saved.
|
|
294
|
+
suffix (str): Suffix to append to the class map filename.
|
|
286
295
|
verbose (int): Verbosity level for logging.
|
|
287
296
|
|
|
288
297
|
Returns:
|
|
@@ -300,9 +309,17 @@ def encode_classification_target(
|
|
|
300
309
|
_LOGGER.error(f"Target column '{target_col}' contains {n_missing} missing values. Please handle them before encoding.")
|
|
301
310
|
raise ValueError()
|
|
302
311
|
|
|
312
|
+
# validate suffix and prepend underscore if needed
|
|
313
|
+
if suffix:
|
|
314
|
+
if not suffix.startswith("_"):
|
|
315
|
+
suffix = f"_{suffix}"
|
|
316
|
+
sanitized_suffix = suffix
|
|
317
|
+
else:
|
|
318
|
+
sanitized_suffix = ''
|
|
319
|
+
|
|
303
320
|
# Ensure directory exists
|
|
304
321
|
save_path = make_fullpath(save_dir, make=True, enforce="directory")
|
|
305
|
-
file_path = save_path / "class_map.json"
|
|
322
|
+
file_path = save_path / f"class_map{sanitized_suffix}.json"
|
|
306
323
|
|
|
307
324
|
# Get unique values and sort them to ensure deterministic encoding (0, 1, 2...)
|
|
308
325
|
# Convert to string to ensure the keys in JSON are strings
|
|
@@ -322,10 +339,9 @@ def encode_classification_target(
|
|
|
322
339
|
json.dump(class_map, f, indent=4)
|
|
323
340
|
|
|
324
341
|
if verbose >= 2:
|
|
325
|
-
_LOGGER.info(f"
|
|
326
|
-
|
|
342
|
+
_LOGGER.info(f"Target '{target_col}' encoded with {len(class_map)} classes. Saved to {file_path}.")
|
|
343
|
+
|
|
327
344
|
if verbose >= 3:
|
|
328
|
-
_LOGGER.info(f"Target '{target_col}' encoded with {len(class_map)} classes.")
|
|
329
345
|
# Print a preview
|
|
330
346
|
if len(class_map) <= 10:
|
|
331
347
|
print(f" Mapping: {class_map}")
|
|
@@ -475,6 +475,9 @@ def plot_correlation_heatmap(df: pd.DataFrame,
|
|
|
475
475
|
save_path = make_fullpath(save_dir, make=True)
|
|
476
476
|
# sanitize the plot title to save the file
|
|
477
477
|
sanitized_plot_title = sanitize_filename(plot_title)
|
|
478
|
+
# prepend method to filename
|
|
479
|
+
sanitized_plot_title = f"{method}_{sanitized_plot_title}"
|
|
480
|
+
|
|
478
481
|
plot_filename = sanitized_plot_title + ".svg"
|
|
479
482
|
|
|
480
483
|
full_path = save_path / plot_filename
|
|
File without changes
|
{dragon_ml_toolbox-20.14.0.dist-info → dragon_ml_toolbox-20.14.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|