dragon-ml-toolbox 20.14.0__py3-none-any.whl → 20.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 20.14.0
3
+ Version: 20.14.1
4
4
  Summary: Complete pipelines and helper tools for data science and machine learning projects.
5
5
  Author-email: Karl Luigi Loza Vidaurre <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,5 @@
1
- dragon_ml_toolbox-20.14.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-20.14.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
1
+ dragon_ml_toolbox-20.14.1.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-20.14.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=0-HBRMMgKuwtGy6nMJZvIn1fLxhx_ksyyVB2U_iyYZU,2818
3
3
  ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
5
5
  ml_tools/ETL_cleaning/__init__.py,sha256=gLRHF-qzwpqKTvbbn9chIQELeUDh_XGpBRX28j-5IqI,545
@@ -107,8 +107,8 @@ ml_tools/_core/_script_info.py,sha256=LtFGt10gEvCnhIRMKJPi2yXkiGLcdr7lE-oIP2XGHz
107
107
  ml_tools/data_exploration/__init__.py,sha256=XNA8gcRx5ifrv092HA7HSpek8havlk_3RZi9aq9dSjg,1957
108
108
  ml_tools/data_exploration/_analysis.py,sha256=JSoFJSkv4-_v9YxxmjHZ_PeFRneDENjSEo2sy_uC4oY,14196
109
109
  ml_tools/data_exploration/_cleaning.py,sha256=pAZOXgGK35j7O8q6cnyTwYK1GLNnD04A8p2fSyMB1mg,20906
110
- ml_tools/data_exploration/_features.py,sha256=twJ6OixU4ItRXA8rPJRfg2N9QVsbn38CFqJiLcXav1A,28664
111
- ml_tools/data_exploration/_plotting.py,sha256=zH1dPcIoAlOuww23xIoBCsQOAshPPv9OyGposOA2RvI,19883
110
+ ml_tools/data_exploration/_features.py,sha256=_aBMW7eqSm6oUj54ftidsv9zdywOkc1eyZgITb82XF8,29237
111
+ ml_tools/data_exploration/_plotting.py,sha256=Vg9qS46akbAyrZAgBrPWg2p29V5vqqY4Bk4SHwZLZNI,19995
112
112
  ml_tools/data_exploration/_schema_ops.py,sha256=Fd6fBGGv4OpxmJ1HG9pith6QL90z0tzssCvzkQxlEEQ,11083
113
113
  ml_tools/ensemble_evaluation/__init__.py,sha256=t4Gr8EGEk8RLatyc92-S0BzbQvdvodzoF-qDAH2qjVg,546
114
114
  ml_tools/ensemble_evaluation/_ensemble_evaluation.py,sha256=-sX9cLMaa0FOQDikmVv2lsCYtQ56Kftd3tILnNej0Hg,28346
@@ -143,7 +143,7 @@ ml_tools/utilities/__init__.py,sha256=h4lE3SQstg-opcQj6QSKhu-HkqSbmHExsWoM9vC5D9
143
143
  ml_tools/utilities/_translate.py,sha256=U8hRPa3PmTpIf9n9yR3gBGmp_hkcsjQLwjAHSHc0WHs,10325
144
144
  ml_tools/utilities/_utility_save_load.py,sha256=EFvFaTaHahDQWdJWZr-j7cHqRbG_Xrpc96228JhV-bs,16773
145
145
  ml_tools/utilities/_utility_tools.py,sha256=bN0J9d1S0W5wNzNntBWqDsJcEAK7-1OgQg3X2fwXns0,6918
146
- dragon_ml_toolbox-20.14.0.dist-info/METADATA,sha256=32IleSQa7t7E42ZB5rM32Lf1MlSAMtKkU-TFky3VckA,7889
147
- dragon_ml_toolbox-20.14.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
148
- dragon_ml_toolbox-20.14.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
149
- dragon_ml_toolbox-20.14.0.dist-info/RECORD,,
146
+ dragon_ml_toolbox-20.14.1.dist-info/METADATA,sha256=oV6v5gFhRVLpuJ3HgL7Qpn8_Dgk9DGkYcOjSfl2kIh0,7889
147
+ dragon_ml_toolbox-20.14.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
148
+ dragon_ml_toolbox-20.14.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
149
+ dragon_ml_toolbox-20.14.1.dist-info/RECORD,,
@@ -168,6 +168,13 @@ def split_continuous_categorical_targets(
168
168
  f" - Categorical: {df_categorical.shape}\n"
169
169
  f" - Targets: {df_targets.shape}"
170
170
  )
171
+
172
+ if isinstance(df_continuous, pd.Series):
173
+ df_continuous = df_continuous.to_frame()
174
+ if isinstance(df_categorical, pd.Series):
175
+ df_categorical = df_categorical.to_frame()
176
+ if isinstance(df_targets, pd.Series):
177
+ df_targets = df_targets.to_frame()
171
178
 
172
179
  return df_continuous, df_categorical, df_targets
173
180
 
@@ -271,6 +278,7 @@ def encode_classification_target(
271
278
  df: pd.DataFrame,
272
279
  target_col: str,
273
280
  save_dir: Union[str, Path],
281
+ suffix: str = "",
274
282
  verbose: int = 2
275
283
  ) -> tuple[pd.DataFrame, dict[str, int]]:
276
284
  """
@@ -283,6 +291,7 @@ def encode_classification_target(
283
291
  df (pd.DataFrame): Input DataFrame.
284
292
  target_col (str): Name of the target column to encode.
285
293
  save_dir (str | Path): Directory where the class map JSON will be saved.
294
+ suffix (str): Suffix to append to the class map filename.
286
295
  verbose (int): Verbosity level for logging.
287
296
 
288
297
  Returns:
@@ -300,9 +309,17 @@ def encode_classification_target(
300
309
  _LOGGER.error(f"Target column '{target_col}' contains {n_missing} missing values. Please handle them before encoding.")
301
310
  raise ValueError()
302
311
 
312
+ # validate suffix and prepend underscore if needed
313
+ if suffix:
314
+ if not suffix.startswith("_"):
315
+ suffix = f"_{suffix}"
316
+ sanitized_suffix = suffix
317
+ else:
318
+ sanitized_suffix = ''
319
+
303
320
  # Ensure directory exists
304
321
  save_path = make_fullpath(save_dir, make=True, enforce="directory")
305
- file_path = save_path / "class_map.json"
322
+ file_path = save_path / f"class_map{sanitized_suffix}.json"
306
323
 
307
324
  # Get unique values and sort them to ensure deterministic encoding (0, 1, 2...)
308
325
  # Convert to string to ensure the keys in JSON are strings
@@ -322,10 +339,9 @@ def encode_classification_target(
322
339
  json.dump(class_map, f, indent=4)
323
340
 
324
341
  if verbose >= 2:
325
- _LOGGER.info(f"Class mapping saved to: '{file_path}'")
326
-
342
+ _LOGGER.info(f"Target '{target_col}' encoded with {len(class_map)} classes. Saved to {file_path}.")
343
+
327
344
  if verbose >= 3:
328
- _LOGGER.info(f"Target '{target_col}' encoded with {len(class_map)} classes.")
329
345
  # Print a preview
330
346
  if len(class_map) <= 10:
331
347
  print(f" Mapping: {class_map}")
@@ -475,6 +475,9 @@ def plot_correlation_heatmap(df: pd.DataFrame,
475
475
  save_path = make_fullpath(save_dir, make=True)
476
476
  # sanitize the plot title to save the file
477
477
  sanitized_plot_title = sanitize_filename(plot_title)
478
+ # prepend method to filename
479
+ sanitized_plot_title = f"{method}_{sanitized_plot_title}"
480
+
478
481
  plot_filename = sanitized_plot_title + ".svg"
479
482
 
480
483
  full_path = save_path / plot_filename