PyPI - dragon-ml-toolbox - Versions diffs - 12.4.0__tar.gz → 12.6.0__tar.gz - Mend

dragon-ml-toolbox 12.4.0tar.gz → 12.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (46) hide show

{dragon_ml_toolbox-12.4.0/dragon_ml_toolbox.egg-info → dragon_ml_toolbox-12.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.4.0
+Version: 12.6.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0/dragon_ml_toolbox.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 12.4.0
+Version: 12.6.0
 Summary: A collection of tools for data science and machine learning projects.
 Author-email: "Karl L. Loza Vidaurre" <luigiloza@gmail.com>
 License-Expression: MIT

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ETL_cleaning.py RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import Union, List, Dict
 from .path_manager import sanitize_filename, make_fullpath
 from .data_exploration import drop_macro
-from .utilities import save_dataframe, load_dataframe
+from .utilities import save_dataframe_filename, load_dataframe
 from ._script_info import _script_info
 from ._logger import _LOGGER
@@ -263,7 +263,7 @@ def basic_clean(input_filepath: Union[str,Path], output_filepath: Union[str,Path
     df_final = _cleaner_core(df_in=df, all_lowercase=all_lowercase)
     # Save cleaned dataframe
-    save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
+    save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
     _LOGGER.info(f"Data successfully cleaned.")
@@ -329,7 +329,7 @@ def basic_clean_drop(input_filepath: Union[str,Path], output_filepath: Union[str
                           threshold=threshold)
     # Save cleaned dataframe
-    save_dataframe(df=df_final, save_dir=output_path.parent, filename=output_path.name)
+    save_dataframe_filename(df=df_final, save_dir=output_path.parent, filename=output_path.name)
     _LOGGER.info(f"Data successfully cleaned.")
@@ -494,7 +494,7 @@ class DataFrameCleaner:
         if isinstance(output_filepath, str):
             output_filepath = make_fullpath(input_path=output_filepath, enforce="file")
-        save_dataframe(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
+        save_dataframe_filename(df=df_clean, save_dir=output_filepath.parent, filename=output_filepath.name)
         return None

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ETL_engineering.py RENAMED Viewed

@@ -3,7 +3,7 @@ import re
 from pathlib import Path
 from typing import Literal, Union, Optional, Any, Callable, List, Dict, Tuple
-from .utilities import load_dataframe, save_dataframe
+from .utilities import load_dataframe, save_dataframe_filename
 from .path_manager import make_fullpath
 from ._script_info import _script_info
 from ._logger import _LOGGER
@@ -230,7 +230,7 @@ class DataProcessor:
         df_processed = self.transform(df)
         # save processed df
-        save_dataframe(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
+        save_dataframe_filename(df=df_processed, save_dir=out_path.parent, filename=out_path.name)
     def __str__(self) -> str:
         """

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/MICE_imputation.py RENAMED Viewed

@@ -6,7 +6,7 @@ import numpy as np
 from plotnine import ggplot, labs, theme, element_blank # type: ignore
 from typing import Optional, Union
-from .utilities import load_dataframe, merge_dataframes, save_dataframe
+from .utilities import load_dataframe, merge_dataframes, save_dataframe_filename
 from .math_utilities import threshold_binary_values
 from .path_manager import sanitize_filename, make_fullpath, list_csv_paths
 from ._logger import _LOGGER
@@ -75,7 +75,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
 def save_imputed_datasets(save_dir: Union[str, Path], imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
     for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
         merged_df = merge_dataframes(imputed_df, df_targets, direction="horizontal", verbose=False)
-        save_dataframe(df=merged_df, save_dir=save_dir, filename=subname)
+        save_dataframe_filename(df=merged_df, save_dir=save_dir, filename=subname)
 #Get names of features that had missing values before imputation

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_optimization.py RENAMED Viewed

@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
 from .keys import PyTorchInferenceKeys
 from .SQL import DatabaseManager
 from .optimization_tools import _save_result
-from .utilities import save_dataframe
+from .utilities import save_dataframe_filename
 from .math_utilities import discretize_categorical_values
@@ -513,7 +513,7 @@ def _run_single_optimization_rep(
 def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
     log_dataframe = logger.to_dataframe()
-    save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
+    save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
 def info():

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ML_simple_optimization.py RENAMED Viewed

@@ -18,7 +18,7 @@ from .ML_inference import PyTorchInferenceHandler
 from .keys import PyTorchInferenceKeys
 from .SQL import DatabaseManager
 from .optimization_tools import _save_result
-from .utilities import save_dataframe
+from .utilities import save_dataframe_filename
 from .math_utilities import threshold_binary_values
 """
@@ -406,7 +406,7 @@ def s_run_optimization(
 def _handle_pandas_log(logger: PandasLogger, save_path: Path, target_name: str):
     log_dataframe = logger.to_dataframe()
-    save_dataframe(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
+    save_dataframe_filename(df=log_dataframe, save_dir=save_path / "EvolutionLogs", filename=target_name)
 def info():

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/VIF_factor.py RENAMED Viewed

@@ -7,7 +7,7 @@ from statsmodels.tools.tools import add_constant
 import warnings
 from pathlib import Path
-from .utilities import yield_dataframes_from_dir, save_dataframe
+from .utilities import yield_dataframes_from_dir, save_dataframe_filename
 from .path_manager import sanitize_filename, make_fullpath
 from ._logger import _LOGGER
 from ._script_info import _script_info
@@ -229,7 +229,7 @@ def compute_vif_multi(input_directory: Union[str, Path],
             result_df, dropped_cols = drop_vif_based(df=df, vif_df=vif_dataframe)
             if len(dropped_cols) > 0:
-                save_dataframe(df=result_df, save_dir=output_dataset_path, filename=new_filename)
+                save_dataframe_filename(df=result_df, save_dir=output_dataset_path, filename=new_filename)
 def info():

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/data_exploration.py RENAMED Viewed

@@ -10,7 +10,7 @@ import re
 from .path_manager import sanitize_filename, make_fullpath
 from ._script_info import _script_info
 from ._logger import _LOGGER
-from .utilities import save_dataframe
+from .utilities import save_dataframe_filename
 # Keep track of all available tools, show using `info()`
@@ -269,7 +269,7 @@ def drop_macro(df: pd.DataFrame,
     # Log initial state
     missing_data = show_null_columns(df=df_clean)
-    save_dataframe(df=missing_data.reset_index(drop=False),
+    save_dataframe_filename(df=missing_data.reset_index(drop=False),
                    save_dir=log_directory,
                    filename="Missing_Data_start")
@@ -298,7 +298,7 @@ def drop_macro(df: pd.DataFrame,
     # log final state
     missing_data = show_null_columns(df=df_clean)
-    save_dataframe(df=missing_data.reset_index(drop=False),
+    save_dataframe_filename(df=missing_data.reset_index(drop=False),
                    save_dir=log_directory,
                    filename="Missing_Data_final")

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/ensemble_learning.py RENAMED Viewed

@@ -14,7 +14,7 @@ from sklearn.model_selection import train_test_split
 from sklearn.base import clone
 from .utilities import yield_dataframes_from_dir, train_dataset_yielder
-from .serde import serialize_object
+from .serde import serialize_object_filename
 from .path_manager import sanitize_filename, make_fullpath
 from ._script_info import _script_info
 from .keys import EnsembleKeys
@@ -411,7 +411,7 @@ def _save_model(trained_model, model_name: str, target_name:str, feature_names:
                EnsembleKeys.FEATURES: feature_names,
                EnsembleKeys.TARGET: target_name}
-    serialize_object(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
+    serialize_object_filename(obj=to_save, save_dir=save_directory, filename=filename, verbose=False, raise_on_error=True)
 # TRAIN EVALUATE PIPELINE

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/serde.py RENAMED Viewed

@@ -9,12 +9,13 @@ from ._logger import _LOGGER
 __all__ = [
+    "serialize_object_filename",
     "serialize_object",
     "deserialize_object",
 ]
-def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
+def serialize_object_filename(obj: Any, save_dir: Union[str,Path], filename: str, verbose: bool=True, raise_on_error: bool=False) -> None:
     """
     Serializes a Python object using joblib; suitable for Python built-ins, numpy, and pandas.
@@ -40,6 +41,41 @@ def serialize_object(obj: Any, save_dir: Union[str,Path], filename: str, verbose
             _LOGGER.info(f"Object of type '{type(obj)}' saved to '{full_path}'")
         return None
+def serialize_object(obj: Any, file_path: Path, verbose: bool = True, raise_on_error: bool = False) -> None:
+    """
+    Serializes a Python object using joblib to a specific file path.
+    Suitable for Python built-ins, numpy, and pandas.
+    Parameters:
+        obj (Any) : The Python object to serialize.
+        file_path (Path) : The full file path to save the object to.
+                           '.joblib' extension will be appended if missing.
+        raise_on_error (bool) : If True, raises exceptions on failure.
+    """
+    try:
+        # Ensure the extension is correct
+        if file_path.suffix != '.joblib':
+            file_path = file_path.with_suffix(file_path.suffix + '.joblib')
+        # Ensure the parent directory exists
+        _save_dir = make_fullpath(file_path.parent, make=True, enforce="directory")
+        # Dump the object
+        joblib.dump(obj, file_path)
+    except (IOError, OSError, TypeError, TerminatedWorkerError) as e:
+        _LOGGER.error(f"Failed to serialize object of type '{type(obj)}' to '{file_path}'. Error: {e}")
+        if raise_on_error:
+            raise e
+        return None
+    else:
+        if verbose:
+            _LOGGER.info(f"Object of type '{type(obj)}' saved to '{file_path}'")
+        return None
 # Define a TypeVar to link the expected type to the return type of deserialization
 T = TypeVar('T')

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/ml_tools/utilities.py RENAMED Viewed

@@ -14,8 +14,8 @@ __all__ = [
     "load_dataframe",
     "yield_dataframes_from_dir",
     "merge_dataframes",
+    "save_dataframe_filename",
     "save_dataframe",
-    "save_dataframe_path",
     "distribute_dataset_by_target",
     "train_dataset_orchestrator",
     "train_dataset_yielder"
@@ -210,7 +210,7 @@ def merge_dataframes(
     return merged_df
-def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
+def save_dataframe_filename(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Path], filename: str) -> None:
     """
     Saves a pandas or polars DataFrame to a CSV file.
@@ -250,11 +250,11 @@ def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], save_dir: Union[str,Pa
     _LOGGER.info(f"Saved dataset: '{filename}' with shape: {df.shape}")
-def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
+def save_dataframe(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
     """
     Saves a DataFrame to a specified full path.
-    This function is a convenience wrapper for `save_dataframe()`. It takes a
+    This function is a wrapper for `save_dataframe_filename()`. It takes a
     single `pathlib.Path` object pointing to a `.csv` file.
     Args:
@@ -265,9 +265,9 @@ def save_dataframe_path(df: Union[pd.DataFrame, pl.DataFrame], full_path: Path):
         _LOGGER.error('A path object pointing to a .csv file must be provided.')
         raise ValueError()
-    save_dataframe(df=df,
-                   save_dir=full_path.parent,
-                   filename=full_path.name)
+    save_dataframe_filename(df=df,
+                            save_dir=full_path.parent,
+                            filename=full_path.name)
 def distribute_dataset_by_target(
@@ -351,7 +351,7 @@ def train_dataset_orchestrator(list_of_dirs: list[Union[str,Path]],
                         filename = df_dir.name + '_' + target_name + '_' + df_name
                     else:
                         filename = target_name + '_' + df_name
-                    save_dataframe(df=df, save_dir=save_dir, filename=filename)
+                    save_dataframe_filename(df=df, save_dir=save_dir, filename=filename)
                     total_saved += 1
             except Exception as e:
                 _LOGGER.error(f"Failed to process file '{df_path}'. Reason: {e}")

{dragon_ml_toolbox-12.4.0 → dragon_ml_toolbox-12.6.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "dragon-ml-toolbox"
-version = "12.4.0"
+version = "12.6.0"
 description = "A collection of tools for data science and machine learning projects."
 authors = [
     { name = "Karl L. Loza Vidaurre", email = "luigiloza@gmail.com" }