dragon-ml-toolbox 12.5.0__py3-none-any.whl → 12.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.5.0
3
+ Version: 12.6.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,41 +1,41 @@
1
- dragon_ml_toolbox-12.5.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
- dragon_ml_toolbox-12.5.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
- ml_tools/ETL_cleaning.py,sha256=PLRSR-VYnt1nNT9XrcWq40SE0VzHCw7DQ8v9czfSQsU,20366
4
- ml_tools/ETL_engineering.py,sha256=l0I6Og9o4s6EODdk0kZXjbbC-a3vVPYy1FopP2BkQSQ,54909
1
+ dragon_ml_toolbox-12.6.0.dist-info/licenses/LICENSE,sha256=L35WDmmLZNTlJvxF6Vy7Uy4SYNi6rCfWUqlTHpoRMoU,1081
2
+ dragon_ml_toolbox-12.6.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=iy2r_R7wjzsCbz_Q_jMsp_jfZ6oP8XW9QhwzRBH0mGY,1904
3
+ ml_tools/ETL_cleaning.py,sha256=2VBRllV8F-ZiPylPp8Az2gwn5ztgazN0BH5OKnRUhV0,20402
4
+ ml_tools/ETL_engineering.py,sha256=KfYqgsxupAx6e_TxwO1LZXeu5mFkIhVXJrNjP3CzIZc,54927
5
5
  ml_tools/GUI_tools.py,sha256=Va6ig-dHULPVRwQYYtH3fvY5XPIoqRcJpRW8oXC55Hw,45413
6
- ml_tools/MICE_imputation.py,sha256=eNN7JuT43bydAJ5E2k2A5sDjYDu3X8kCHtMdFBkzjR0,11699
6
+ ml_tools/MICE_imputation.py,sha256=X273Qlgoqqg7KTmoKd75YDyAPB0UIbTzGP3xsCmRh3E,11717
7
7
  ml_tools/ML_callbacks.py,sha256=-XRIZEy3CPJWTHcoReyIw53FZlTs3pWcTVVnncTQQSc,13909
8
8
  ml_tools/ML_datasetmaster.py,sha256=t6q6mU9lz2rYKTVPKjA7yZ5ImV7_NykiciHaYnqIEpA,30822
9
9
  ml_tools/ML_evaluation.py,sha256=tLswOPgH4G1KExSMn0876YtNkbxPh-W3J4MYOjomMWA,16208
10
10
  ml_tools/ML_evaluation_multi.py,sha256=6OZyQ4SM9ALh38mOABmiHgIQDWcovsD_iOo7Bg9YZCE,12516
11
11
  ml_tools/ML_inference.py,sha256=ymFvncFsU10PExq87xnEj541DKV5ck0nMuK8ToJHzVQ,23067
12
12
  ml_tools/ML_models.py,sha256=pSCV6KbmVnPZr49Kbyg7g25CYaWBWJr6IinBHKgVKGw,28042
13
- ml_tools/ML_optimization.py,sha256=-Rb7ffp-VS6Bv5U0Dw6nSTNp2bGu7BaBQi04mTmSdEE,22942
13
+ ml_tools/ML_optimization.py,sha256=es3TlQbY7RYgJMZnznkjYGbUxFnAqzZxE_g3_qLK9Q8,22960
14
14
  ml_tools/ML_scaler.py,sha256=tw6onj9o8_kk3FQYb930HUzvv1zsFZe2YZJdF3LtHkU,7538
15
- ml_tools/ML_simple_optimization.py,sha256=X96zX6XPu3ggrcOapuG69jsiZJczJNihS1rcwi9OsBI,18159
15
+ ml_tools/ML_simple_optimization.py,sha256=W2mce1XFCuiOHTOjOsCNbETISHn5MwYlYsTIXH5hMMo,18177
16
16
  ml_tools/ML_trainer.py,sha256=_g48w5Ak-wQr5fGHdJqlcpnzv3gWyL1ghkOhy9VOZbo,23930
17
17
  ml_tools/ML_utilities.py,sha256=35DfZzAwfDwVwfRECD8X_2ynsU2NCpTdNJSmza6oAzQ,8712
18
18
  ml_tools/PSO_optimization.py,sha256=fVHeemqilBS0zrGV25E5yKwDlGdd2ZKa18d8CZ6Q6Fk,22961
19
19
  ml_tools/RNN_forecast.py,sha256=Qa2KoZfdAvSjZ4yE78N4BFXtr3tTr0Gx7tQJZPotsh0,1967
20
20
  ml_tools/SQL.py,sha256=vXLPGfVVg8bfkbBE3HVfyEclVbdJy0TBhuQONtMwSCQ,11234
21
- ml_tools/VIF_factor.py,sha256=dizjK0zmgOMuLBnJ66y5Sll5do6wjGWhAPVzJF1uwhQ,10404
21
+ ml_tools/VIF_factor.py,sha256=at5IVqPvicja2-DNSTSIIy3SkzDWCmLzo3qTG_qr5n8,10422
22
22
  ml_tools/__init__.py,sha256=q0y9faQ6e17XCQ7eUiCZ1FJ4Bg5EQqLjZ9f_l5REUUY,41
23
23
  ml_tools/_logger.py,sha256=dlp5cGbzooK9YSNSZYB4yjZrOaQUGW8PTrM411AOvL8,4717
24
24
  ml_tools/_script_info.py,sha256=21r83LV3RubsNZ_RTEUON6RbDf7Mh4_udweNcvdF_Fk,212
25
25
  ml_tools/constants.py,sha256=3br5Rk9cL2IUo638eJuMOGdbGQaWssaUecYEvSeRBLM,3322
26
26
  ml_tools/custom_logger.py,sha256=xot-VeZFigKjcVxADgzvI54vZO_MqMMejo7JmDED8Xo,5892
27
- ml_tools/data_exploration.py,sha256=OwPJHAM6MpkUD-S76XF4v-NRXjw5-pizKNHpyKj5OwE,46809
27
+ ml_tools/data_exploration.py,sha256=xvuWHrxk5HOtY7_WmtXEUNf1d_TUxuNPFPFyN7QJxCY,46836
28
28
  ml_tools/ensemble_evaluation.py,sha256=FGHSe8LBI8_w8LjNeJWOcYQ1UK_mc6fVah8gmSvNVGg,26853
29
29
  ml_tools/ensemble_inference.py,sha256=0yLmLNj45RVVoSCLH1ZYJG9IoAhTkWUqEZmLOQTFGTY,9348
30
- ml_tools/ensemble_learning.py,sha256=aTPeKthO4zRWBEaQJOUj8jEqVHiHjjOMXuiEWjI9NxM,21946
30
+ ml_tools/ensemble_learning.py,sha256=vsIED7nlheYI4w2SBzP6SC1AnNeMfn-2A1Gqw5EfxsM,21964
31
31
  ml_tools/handle_excel.py,sha256=pfdAPb9ywegFkM9T54bRssDOsX-K7rSeV0RaMz7lEAo,14006
32
32
  ml_tools/keys.py,sha256=FDpbS3Jb0pjrVvvp2_8nZi919mbob_-xwuy5OOtKM_A,1848
33
33
  ml_tools/math_utilities.py,sha256=PxoOrnuj6Ntp7_TJqyDWi0JX03WpAO5iaFNK2Oeq5I4,8800
34
34
  ml_tools/optimization_tools.py,sha256=ewYMAdSGlFxYALAGFXn-MsHpvW_Sbx6I-sKg9Kp6rB8,13533
35
35
  ml_tools/path_manager.py,sha256=CyDU16pOKmC82jPubqJPT6EBt-u-3rGVbxyPIZCvDDY,18432
36
36
  ml_tools/serde.py,sha256=UIshIesHRFmxr8F6B3LxGG8bYc1HHK-nlE3kENSZL18,5288
37
- ml_tools/utilities.py,sha256=gef62GLK7ev5BWkkQekeJoVZqwf2mIuOlOfyCw6WdtE,13882
38
- dragon_ml_toolbox-12.5.0.dist-info/METADATA,sha256=dGSxxwPh1WKW5Tnnq2XCo35VyAnAajajhOgckHRseKQ,6166
39
- dragon_ml_toolbox-12.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- dragon_ml_toolbox-12.5.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
- dragon_ml_toolbox-12.5.0.dist-info/RECORD,,
37
+ ml_tools/utilities.py,sha256=_uCYOo5f6Zp6wxabrHwDfjvaipqziD7BWSdKhjl2VGU,13923
38
+ dragon_ml_toolbox-12.6.0.dist-info/METADATA,sha256=Bg2mEGcvqeyB4Pr-M3ETjAM3GOwr7FBH0s57XpuBij0,6166
39
+ dragon_ml_toolbox-12.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ dragon_ml_toolbox-12.6.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
41
+ dragon_ml_toolbox-12.6.0.dist-info/RECORD,,
ml_tools/ETL_cleaning.py CHANGED
@@ -5,7 +5,7 @@ from typing import Union, List, Dict
5
5
 
6
6
  from .path_manager import sanitize_filename, make_fullpath
7
7
  from .data_exploration import drop_macro
8
- from .utilities import save_dataframe, load_dataframe
8
+ from .utilities import save_dataframe_filename, load_dataframe
9
9
  from ._script_info import _script_info
10
10
  from ._logger import _LOGGER
11
11
 
@@ -263,7 +263,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
263
263
  df_final = _cleaner_core(df_in=df, all_lowercase=all_lowercase)
264
264
 
265
265
  # Save cleaned dataframe
266
- save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
266
+ save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
267
267
 
268
268
  _LOGGER.info(f"Data successfully cleaned.")
269
269
 
@@ -329,7 +329,7 @@ def basic_clean_drop(input_filepath: Union[str,Path], output_filepath: Union[str
329
329
  threshold=threshold)
330
330
 
331
331
  # Save cleaned dataframe
332
- save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
332
+ save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
333
333
 
334
334
  _LOGGER.info(f"Data successfully cleaned.")
335
335
 
@@ -494,7 +494,7 @@ class DataFrameCleaner:
494
494
  if isinstance(output_filepath, str):
495
495
  output_filepath = make_fullpath(input_path=output_filepath, enforce="file")
496
496
 
497
- save_dataframe(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
497
+ save_dataframe_filename(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
498
498
 
499
499
  return None
500
500
 
@@ -3,7 +3,7 @@ import re
3
3
  from pathlib import Path
4
4
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
5
5
 
6
- from .utilities import load_dataframe, save_dataframe
6
+ from .utilities import load_dataframe, save_dataframe_filename
7
7
  from .path_manager import make_fullpath
8
8
  from ._script_info import _script_info
9
9
  from ._logger import _LOGGER
@@ -230,7 +230,7 @@ class DataProcessor:
230
230
  df_processed = self.transform(df)
231
231
 
232
232
  # save processed df
233
- save_dataframe(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
233
+ save_dataframe_filename(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
234
234
 
235
235
  def __str__(self) -> str:
236
236
  """
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from plotnine import ggplot, labs, theme, element_blank # type: ignore
7
7
  from typing import Optional, Union
8
8
 
9
- from .utilities import load_dataframe, merge_dataframes, save_dataframe
9
+ from .utilities import load_dataframe, merge_dataframes, save_dataframe_filename
10
10
  from .math_utilities import threshold_binary_values
11
11
  from .path_manager import sanitize_filename, make_fullpath, list_csv_paths
12
12
  from ._logger import _LOGGER
@@ -75,7 +75,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
75
75
  def save_imputed_datasets(save_dir: Union[str, Path], imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
76
76
  for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
77
77
  merged_df = merge_dataframes(imputed_df, df_targets, direction="horizontal", verbose=False)
78
- save_dataframe(df=merged_df, save_dir=save_dir, filename=subname)
78
+ save_dataframe_filename(df=merged_df, save_dir=save_dir, filename=subname)
79
79
 
80
80
 
81
81
  #Get names of features that had missing values before imputation
@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
18
18
  from .keys import PyTorchInferenceKeys
19
19
  from .SQL import DatabaseManager
20
20
  from .optimization_tools import _save_result
21
- from .utilities import save_dataframe
21
+ from .utilities import save_dataframe_filename
22
22
  from .math_utilities import discretize_categorical_values
23
23
 
24
24
 
@@ -513,7 +513,7 @@ def _run_single_optimization_rep(
513
513
 
514
514
  def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
515
515
  log_dataframe = logger.to_dataframe()
516
- save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
516
+ save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
517
517
 
518
518
 
519
519
  def info():
@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
18
18
  from .keys import PyTorchInferenceKeys
19
19
  from .SQL import DatabaseManager
20
20
  from .optimization_tools import _save_result
21
- from .utilities import save_dataframe
21
+ from .utilities import save_dataframe_filename
22
22
  from .math_utilities import threshold_binary_values
23
23
 
24
24
  """
@@ -406,7 +406,7 @@ def s_run_optimization(
406
406
 
407
407
  def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
408
408
  log_dataframe = logger.to_dataframe()
409
- save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
409
+ save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
410
410
 
411
411
 
412
412
  def info():
ml_tools/VIF_factor.py CHANGED
@@ -7,7 +7,7 @@ from statsmodels.tools.tools import add_constant
7
7
  import warnings
8
8
  from pathlib import Path
9
9
 
10
- from .utilities import yield_dataframes_from_dir, save_dataframe
10
+ from .utilities import yield_dataframes_from_dir, save_dataframe_filename
11
11
  from .path_manager import sanitize_filename, make_fullpath
12
12
  from ._logger import _LOGGER
13
13
  from ._script_info import _script_info
@@ -229,7 +229,7 @@ def compute_vif_multi(input_directory: Union[str, Path],
229
229
  result_df, dropped_cols = drop_vif_based(df=df, vif_df=vif_dataframe)
230
230
 
231
231
  if len(dropped_cols) > 0:
232
- save_dataframe(df=result_df, save_dir=output_dataset_path, filename=new_filename)
232
+ save_dataframe_filename(df=result_df, save_dir=output_dataset_path, filename=new_filename)
233
233
 
234
234
 
235
235
  def info():
@@ -10,7 +10,7 @@ import re
10
10
  from .path_manager import sanitize_filename, make_fullpath
11
11
  from ._script_info import _script_info
12
12
  from ._logger import _LOGGER
13
- from .utilities import save_dataframe
13
+ from .utilities import save_dataframe_filename
14
14
 
15
15
 
16
16
  # Keep track of all available tools, show using `info()`
@@ -269,7 +269,7 @@ def drop_macro(df: pd.DataFrame,
269
269
 
270
270
  # Log initial state
271
271
  missing_data = show_null_columns(df=df_clean)
272
- save_dataframe(df=missing_data.reset_index(drop=False),
272
+ save_dataframe_filename(df=missing_data.reset_index(drop=False),
273
273
  save_dir=log_directory,
274
274
  filename="Missing_Data_start")
275
275
 
@@ -298,7 +298,7 @@ def drop_macro(df: pd.DataFrame,
298
298
 
299
299
  # log final state
300
300
  missing_data = show_null_columns(df=df_clean)
301
- save_dataframe(df=missing_data.reset_index(drop=False),
301
+ save_dataframe_filename(df=missing_data.reset_index(drop=False),
302
302
  save_dir=log_directory,
303
303
  filename="Missing_Data_final")
304
304
 
@@ -14,7 +14,7 @@ from sklearn.model_selection import train_test_split
14
14
  from sklearn.base import clone
15
15
 
16
16
  from .utilities import yield_dataframes_from_dir, train_dataset_yielder
17
- from .serde import serialize_object
17
+ from .serde import serialize_object_filename
18
18
  from .path_manager import sanitize_filename, make_fullpath
19
19
  from ._script_info import _script_info
20
20
  from .keys import EnsembleKeys
@@ -411,7 +411,7 @@ def _save_model(trained_model, model_name: str, target_name:str, feature_names:
411
411
  EnsembleKeys.FEATURES: feature_names,
412
412
  EnsembleKeys.TARGET: target_name}
413
413
 
414
- serialize_object(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
414
+ serialize_object_filename(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
415
415
 
416
416
 
417
417
  # TRAIN EVALUATE PIPELINE
ml_tools/utilities.py CHANGED
@@ -14,8 +14,8 @@ __all__ = [
14
14
  "load_dataframe",
15
15
  "yield_dataframes_from_dir",
16
16
  "merge_dataframes",
17
+ "save_dataframe_filename",
17
18
  "save_dataframe",
18
- "save_dataframe_path",
19
19
  "distribute_dataset_by_target",
20
20
  "train_dataset_orchestrator",
21
21
  "train_dataset_yielder"
@@ -210,7 +210,7 @@ def merge_dataframes(
210
210
  return merged_df
211
211
 
212
212
 
213
- def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
213
+ def save_dataframe_filename(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
214
214
  """
215
215
  Saves a pandas or polars DataFrame to a CSV file.
216
216
 
@@ -250,11 +250,11 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Pa
250
250
  _LOGGER.info(f"Saved dataset: '{filename}' with shape: {df.shape}")
251
251
 
252
252
 
253
- def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
253
+ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
254
254
  """
255
255
  Saves a DataFrame to a specified full path.
256
256
 
257
- This function is a convenience wrapper for `save_dataframe()`. It takes a
257
+ This function is a wrapper for `save_dataframe_filename()`. It takes a
258
258
  single `pathlib.Path` object pointing to a `.csv` file.
259
259
 
260
260
  Args:
@@ -265,9 +265,9 @@ def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
265
265
  _LOGGER.error('A path object pointing to a .csv file must be provided.')
266
266
  raise ValueError()
267
267
 
268
- save_dataframe(df=df,
269
- save_dir=full_path.parent,
270
- filename=full_path.name)
268
+ save_dataframe_filename(df=df,
269
+ save_dir=full_path.parent,
270
+ filename=full_path.name)
271
271
 
272
272
 
273
273
  def distribute_dataset_by_target(
@@ -351,7 +351,7 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
351
351
  filename = df_dir.name + '_' + target_name + '_' + df_name
352
352
  else:
353
353
  filename = target_name + '_' + df_name
354
- save_dataframe(df=df, save_dir=save_dir, filename=filename)
354
+ save_dataframe_filename(df=df, save_dir=save_dir, filename=filename)
355
355
  total_saved += 1
356
356
  except Exception as e:
357
357
  _LOGGER.error(f"Failed to process file '{df_path}'. Reason: {e}")