dragon-ml-toolbox 12.4.0__tar.gz → 12.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show
  1. {dragon_ml_toolbox-12.4.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.6.0}/PKG-INFO +1 -1
  2. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0/dragon_ml_toolbox.egg-info}/PKG-INFO +1 -1
  3. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ETL_cleaning.py +4 -4
  4. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ETL_engineering.py +2 -2
  5. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/MICE_imputation.py +2 -2
  6. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_optimization.py +2 -2
  7. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_simple_optimization.py +2 -2
  8. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/VIF_factor.py +2 -2
  9. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/data_exploration.py +3 -3
  10. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ensemble_learning.py +2 -2
  11. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/serde.py +37 -1
  12. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/utilities.py +8 -8
  13. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/pyproject.toml +1 -1
  14. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/LICENSE +0 -0
  15. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/LICENSE-THIRD-PARTY.md +0 -0
  16. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/README.md +0 -0
  17. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/dragon_ml_toolbox.egg-info/SOURCES.txt +0 -0
  18. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  19. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/dragon_ml_toolbox.egg-info/requires.txt +0 -0
  20. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  21. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/GUI_tools.py +0 -0
  22. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_callbacks.py +0 -0
  23. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_datasetmaster.py +0 -0
  24. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_evaluation.py +0 -0
  25. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_evaluation_multi.py +0 -0
  26. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_inference.py +0 -0
  27. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_models.py +0 -0
  28. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_scaler.py +0 -0
  29. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_trainer.py +0 -0
  30. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_utilities.py +0 -0
  31. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/PSO_optimization.py +0 -0
  32. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/RNN_forecast.py +0 -0
  33. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/SQL.py +0 -0
  34. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/__init__.py +0 -0
  35. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/_logger.py +0 -0
  36. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/_script_info.py +0 -0
  37. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/constants.py +0 -0
  38. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/custom_logger.py +0 -0
  39. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ensemble_evaluation.py +0 -0
  40. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ensemble_inference.py +0 -0
  41. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/handle_excel.py +0 -0
  42. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/keys.py +0 -0
  43. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/math_utilities.py +0 -0
  44. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/optimization_tools.py +0 -0
  45. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/path_manager.py +0 -0
  46. {dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.4.0
3
+ Version: 12.6.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 12.4.0
3
+ Version: 12.6.0
4
4
  Summary: A collection of tools for data science and machine learning projects.
5
5
  Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
6
6
  License-Expression: MIT
@@ -5,7 +5,7 @@ from typing import Union, List, Dict
5
5
 
6
6
  from .path_manager import sanitize_filename, make_fullpath
7
7
  from .data_exploration import drop_macro
8
- from .utilities import save_dataframe, load_dataframe
8
+ from .utilities import save_dataframe_filename, load_dataframe
9
9
  from ._script_info import _script_info
10
10
  from ._logger import _LOGGER
11
11
 
@@ -263,7 +263,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
263
263
  df_final = _cleaner_core(df_in=df, all_lowercase=all_lowercase)
264
264
 
265
265
  # Save cleaned dataframe
266
- save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
266
+ save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
267
267
 
268
268
  _LOGGER.info(f"Data successfully cleaned.")
269
269
 
@@ -329,7 +329,7 @@ def basic_clean_drop(input_filepath: Union[str,Path], output_filepath: Union[str
329
329
  threshold=threshold)
330
330
 
331
331
  # Save cleaned dataframe
332
- save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
332
+ save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
333
333
 
334
334
  _LOGGER.info(f"Data successfully cleaned.")
335
335
 
@@ -494,7 +494,7 @@ class DataFrameCleaner:
494
494
  if isinstance(output_filepath, str):
495
495
  output_filepath = make_fullpath(input_path=output_filepath, enforce="file")
496
496
 
497
- save_dataframe(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
497
+ save_dataframe_filename(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
498
498
 
499
499
  return None
500
500
 
@@ -3,7 +3,7 @@ import re
3
3
  from pathlib import Path
4
4
  from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
5
5
 
6
- from .utilities import load_dataframe, save_dataframe
6
+ from .utilities import load_dataframe, save_dataframe_filename
7
7
  from .path_manager import make_fullpath
8
8
  from ._script_info import _script_info
9
9
  from ._logger import _LOGGER
@@ -230,7 +230,7 @@ class DataProcessor:
230
230
  df_processed = self.transform(df)
231
231
 
232
232
  # save processed df
233
- save_dataframe(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
233
+ save_dataframe_filename(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
234
234
 
235
235
  def __str__(self) -> str:
236
236
  """
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from plotnine import ggplot, labs, theme, element_blank # type: ignore
7
7
  from typing import Optional, Union
8
8
 
9
- from .utilities import load_dataframe, merge_dataframes, save_dataframe
9
+ from .utilities import load_dataframe, merge_dataframes, save_dataframe_filename
10
10
  from .math_utilities import threshold_binary_values
11
11
  from .path_manager import sanitize_filename, make_fullpath, list_csv_paths
12
12
  from ._logger import _LOGGER
@@ -75,7 +75,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
75
75
  def save_imputed_datasets(save_dir: Union[str, Path], imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
76
76
  for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
77
77
  merged_df = merge_dataframes(imputed_df, df_targets, direction="horizontal", verbose=False)
78
- save_dataframe(df=merged_df, save_dir=save_dir, filename=subname)
78
+ save_dataframe_filename(df=merged_df, save_dir=save_dir, filename=subname)
79
79
 
80
80
 
81
81
  #Get names of features that had missing values before imputation
@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
18
18
  from .keys import PyTorchInferenceKeys
19
19
  from .SQL import DatabaseManager
20
20
  from .optimization_tools import _save_result
21
- from .utilities import save_dataframe
21
+ from .utilities import save_dataframe_filename
22
22
  from .math_utilities import discretize_categorical_values
23
23
 
24
24
 
@@ -513,7 +513,7 @@ def _run_single_optimization_rep(
513
513
 
514
514
  def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
515
515
  log_dataframe = logger.to_dataframe()
516
- save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
516
+ save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
517
517
 
518
518
 
519
519
  def info():
@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
18
18
  from .keys import PyTorchInferenceKeys
19
19
  from .SQL import DatabaseManager
20
20
  from .optimization_tools import _save_result
21
- from .utilities import save_dataframe
21
+ from .utilities import save_dataframe_filename
22
22
  from .math_utilities import threshold_binary_values
23
23
 
24
24
  """
@@ -406,7 +406,7 @@ def s_run_optimization(
406
406
 
407
407
  def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
408
408
  log_dataframe = logger.to_dataframe()
409
- save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
409
+ save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
410
410
 
411
411
 
412
412
  def info():
@@ -7,7 +7,7 @@ from statsmodels.tools.tools import add_constant
7
7
  import warnings
8
8
  from pathlib import Path
9
9
 
10
- from .utilities import yield_dataframes_from_dir, save_dataframe
10
+ from .utilities import yield_dataframes_from_dir, save_dataframe_filename
11
11
  from .path_manager import sanitize_filename, make_fullpath
12
12
  from ._logger import _LOGGER
13
13
  from ._script_info import _script_info
@@ -229,7 +229,7 @@ def compute_vif_multi(input_directory: Union[str, Path],
229
229
  result_df, dropped_cols = drop_vif_based(df=df, vif_df=vif_dataframe)
230
230
 
231
231
  if len(dropped_cols) > 0:
232
- save_dataframe(df=result_df, save_dir=output_dataset_path, filename=new_filename)
232
+ save_dataframe_filename(df=result_df, save_dir=output_dataset_path, filename=new_filename)
233
233
 
234
234
 
235
235
  def info():
@@ -10,7 +10,7 @@ import re
10
10
  from .path_manager import sanitize_filename, make_fullpath
11
11
  from ._script_info import _script_info
12
12
  from ._logger import _LOGGER
13
- from .utilities import save_dataframe
13
+ from .utilities import save_dataframe_filename
14
14
 
15
15
 
16
16
  # Keep track of all available tools, show using `info()`
@@ -269,7 +269,7 @@ def drop_macro(df: pd.DataFrame,
269
269
 
270
270
  # Log initial state
271
271
  missing_data = show_null_columns(df=df_clean)
272
- save_dataframe(df=missing_data.reset_index(drop=False),
272
+ save_dataframe_filename(df=missing_data.reset_index(drop=False),
273
273
  save_dir=log_directory,
274
274
  filename="Missing_Data_start")
275
275
 
@@ -298,7 +298,7 @@ def drop_macro(df: pd.DataFrame,
298
298
 
299
299
  # log final state
300
300
  missing_data = show_null_columns(df=df_clean)
301
- save_dataframe(df=missing_data.reset_index(drop=False),
301
+ save_dataframe_filename(df=missing_data.reset_index(drop=False),
302
302
  save_dir=log_directory,
303
303
  filename="Missing_Data_final")
304
304
 
@@ -14,7 +14,7 @@ from sklearn.model_selection import train_test_split
14
14
  from sklearn.base import clone
15
15
 
16
16
  from .utilities import yield_dataframes_from_dir, train_dataset_yielder
17
- from .serde import serialize_object
17
+ from .serde import serialize_object_filename
18
18
  from .path_manager import sanitize_filename, make_fullpath
19
19
  from ._script_info import _script_info
20
20
  from .keys import EnsembleKeys
@@ -411,7 +411,7 @@ def _save_model(trained_model, model_name: str, target_name:str, feature_names:
411
411
  EnsembleKeys.FEATURES: feature_names,
412
412
  EnsembleKeys.TARGET: target_name}
413
413
 
414
- serialize_object(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
414
+ serialize_object_filename(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
415
415
 
416
416
 
417
417
  # TRAIN EVALUATE PIPELINE
@@ -9,12 +9,13 @@ from ._logger import _LOGGER
9
9
 
10
10
 
11
11
  __all__ = [
12
+ "serialize_object_filename",
12
13
  "serialize_object",
13
14
  "deserialize_object",
14
15
  ]
15
16
 
16
17
 
17
- def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
18
+ def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
18
19
  """
19
20
  Serializes a Python object using joblib; suitable for Python built-ins, numpy, and pandas.
20
21
 
@@ -40,6 +41,41 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
40
41
  _LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
41
42
  return None
42
43
 
44
+
45
+ def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
46
+ """
47
+ Serializes a Python object using joblib to a specific file path.
48
+
49
+ Suitable for Python built-ins, numpy, and pandas.
50
+
51
+ Parameters:
52
+ obj (Any) : The Python object to serialize.
53
+ file_path (Path) : The full file path to save the object to.
54
+ '.joblib' extension will be appended if missing.
55
+ raise_on_error (bool) : If True, raises exceptions on failure.
56
+ """
57
+ try:
58
+ # Ensure the extension is correct
59
+ if file_path.suffix != '.joblib':
60
+ file_path = file_path.with_suffix(file_path.suffix + '.joblib')
61
+
62
+ # Ensure the parent directory exists
63
+ _save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
64
+
65
+ # Dump the object
66
+ joblib.dump(obj, file_path)
67
+
68
+ except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
69
+ _LOGGER.error(f"Failed to serialize object of type '{type(obj)}' to '{file_path}'. Error: {e}")
70
+ if raise_on_error:
71
+ raise e
72
+ return None
73
+ else:
74
+ if verbose:
75
+ _LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
76
+ return None
77
+
78
+
43
79
  # Define a TypeVar to link the expected type to the return type of deserialization
44
80
  T = TypeVar('T')
45
81
 
@@ -14,8 +14,8 @@ __all__ = [
14
14
  "load_dataframe",
15
15
  "yield_dataframes_from_dir",
16
16
  "merge_dataframes",
17
+ "save_dataframe_filename",
17
18
  "save_dataframe",
18
- "save_dataframe_path",
19
19
  "distribute_dataset_by_target",
20
20
  "train_dataset_orchestrator",
21
21
  "train_dataset_yielder"
@@ -210,7 +210,7 @@ def merge_dataframes(
210
210
  return merged_df
211
211
 
212
212
 
213
- def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
213
+ def save_dataframe_filename(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
214
214
  """
215
215
  Saves a pandas or polars DataFrame to a CSV file.
216
216
 
@@ -250,11 +250,11 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Pa
250
250
  _LOGGER.info(f"Saved dataset: '{filename}' with shape: {df.shape}")
251
251
 
252
252
 
253
- def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
253
+ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
254
254
  """
255
255
  Saves a DataFrame to a specified full path.
256
256
 
257
- This function is a convenience wrapper for `save_dataframe()`. It takes a
257
+ This function is a wrapper for `save_dataframe_filename()`. It takes a
258
258
  single `pathlib.Path` object pointing to a `.csv` file.
259
259
 
260
260
  Args:
@@ -265,9 +265,9 @@ def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
265
265
  _LOGGER.error('A path object pointing to a .csv file must be provided.')
266
266
  raise ValueError()
267
267
 
268
- save_dataframe(df=df,
269
- save_dir=full_path.parent,
270
- filename=full_path.name)
268
+ save_dataframe_filename(df=df,
269
+ save_dir=full_path.parent,
270
+ filename=full_path.name)
271
271
 
272
272
 
273
273
  def distribute_dataset_by_target(
@@ -351,7 +351,7 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
351
351
  filename = df_dir.name + '_' + target_name + '_' + df_name
352
352
  else:
353
353
  filename = target_name + '_' + df_name
354
- save_dataframe(df=df, save_dir=save_dir, filename=filename)
354
+ save_dataframe_filename(df=df, save_dir=save_dir, filename=filename)
355
355
  total_saved += 1
356
356
  except Exception as e:
357
357
  _LOGGER.error(f"Failed to process file '{df_path}'. Reason: {e}")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "12.4.0"
3
+ version = "12.6.0"
4
4
  description = "A collection of tools for data science and machine learning projects."
5
5
  authors = [
6
6
  { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }