PyPI - dragon-ml-toolbox - Versions diffs - 1.4.8__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

dragon-ml-toolbox 1.4.8py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (16) hide show

{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/METADATA +24 -14
dragon_ml_toolbox-2.1.0.dist-info/RECORD +20 -0
{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md +5 -4
ml_tools/MICE_imputation.py +27 -28
ml_tools/PSO_optimization.py +490 -0
ml_tools/VIF_factor.py +20 -17
ml_tools/{particle_swarm_optimization.py → _particle_swarm_optimization.py} +5 -0
ml_tools/data_exploration.py +58 -32
ml_tools/ensemble_learning.py +40 -42
ml_tools/handle_excel.py +98 -78
ml_tools/logger.py +13 -11
ml_tools/utilities.py +134 -46
dragon_ml_toolbox-1.4.8.dist-info/RECORD +0 -19
{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/WHEEL +0 -0
{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/licenses/LICENSE +0 -0
{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/top_level.txt +0 -0

{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 1.4.8
+Version: 2.1.0
 Summary: A collection of tools for data science and machine learning projects
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
 Project-URL: Changelog, https://github.com/DrAg0n-BoRn/ML_tools/blob/master/CHANGELOG.md
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent
-Requires-Python: >=3.9
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: LICENSE-THIRD-PARTY.md
@@ -32,9 +32,10 @@ Requires-Dist: joblib
 Requires-Dist: xgboost
 Requires-Dist: lightgbm<=4.5.0
 Requires-Dist: shap
+Requires-Dist: tqdm>=4.0
+Requires-Dist: Pillow
 Provides-Extra: pytorch
 Requires-Dist: torch; extra == "pytorch"
-Requires-Dist: Pillow; extra == "pytorch"
 Requires-Dist: torchvision; extra == "pytorch"
 Dynamic: license-file
@@ -49,7 +50,7 @@ A collection of Python utilities for data science and machine learning, structur
 ## Installation
-**Python 3.9+ recommended.**
+**Python 3.10+ recommended.**
 ### Via PyPI
@@ -59,6 +60,16 @@ Install the latest stable release from PyPI:
 pip install dragon-ml-tools
 ```
+### Via GitHub (Editable)
+Clone the repository and install in editable mode with optional dependencies:
+```bash
+git clone https://github.com/DrAg0n-BoRn/ML_tools.git
+cd ML_tools
+pip install -e .
+```
 ### Via conda-forge
 Install from the conda-forge channel:
@@ -66,22 +77,21 @@ Install from the conda-forge channel:
 ```bash
 conda install -c conda-forge dragon-ml-toolbox
 ```
+**Note:** This version is outdated or broken due to dependency incompatibilities.
-#### Optional dependencies
+## Optional dependencies
+**PyTorch**, which provides different builds depending on the **platform** and **hardware acceleration** (e.g., CUDA for NVIDIA GPUs on Linux/Windows, or MPS for Apple Silicon on macOS).
+Install the default CPU-only version with
 ```bash
 pip install dragon-ml-tools[pytorch]
 ```
-### Via GitHub (Editable)
-Clone the repository and install in editable mode with optional dependencies:
+To make use of GPU acceleration use the official PyTorch installation instructions:
-```bash
-git clone https://github.com/DrAg0n-BoRn/ML_tools.git
-cd ML_tools
-pip install -e .
-```
+[PyTorch Instructions](https://pytorch.org/get-started/locally/)
 ## Usage
@@ -101,7 +111,7 @@ ensemble_learning
 handle_excel
 logger
 MICE_imputation
-particle_swarm_optimization
+PSO_optimization
 trainer
 utilities
 VIF_factor

dragon_ml_toolbox-2.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+dragon_ml_toolbox-2.1.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-2.1.0.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=6cfpIeQ6D4Mcs10nkogQrkVyq1T7i2qXjjNHFoUMOyE,1892
+ml_tools/MICE_imputation.py,sha256=1fovHycZMdZ6OgVh_bk8-r3wGi4rqf6rS10LOEWYaQo,11177
+ml_tools/PSO_optimization.py,sha256=vty1dZDY7P2iGUuE_oojyGdgM1EkDj5kXCfCxRMdk28,20957
+ml_tools/VIF_factor.py,sha256=lpM3Z2X_iZfXUWbCbURoeI0Tb196lU0bAsRo7q6AzBM,10235
+ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ml_tools/_particle_swarm_optimization.py,sha256=b_eNNkA89Y40hj76KauivT8KLScH1B9wF2IXptOqkOw,22220
+ml_tools/data_exploration.py,sha256=CDUVRTHfww105IXDRpBQ81KZWx5HXSsA-FVsVYBzNw8,21298
+ml_tools/datasetmaster.py,sha256=EFUEX-tqq94Ak1rXXYR-XaX85olrxvF2EuytdzUK7y0,29131
+ml_tools/ensemble_learning.py,sha256=q9jbu7SupvXz61sURFQ9V2-7gUsLbA3cSgyb2MQFyyc,37351
+ml_tools/handle_excel.py,sha256=Uasx-DX7RNVQSzGHVJhX7UQ9RgBbX5H1ud1Hw_y8Kp4,12944
+ml_tools/logger.py,sha256=_k7WJdpFJj3IsjOgvjLJgUFZyF8RK3Jlgp5tAu_dLQU,4767
+ml_tools/pytorch_models.py,sha256=bpWZsrSwCvHJQkR6UfoPpElsMv9AvmiNErNHC8NYB_I,10132
+ml_tools/trainer.py,sha256=WAZ4EdrZuTOAnGXRWV3XcLNce4s7EKGf2-qchLC08Ik,15702
+ml_tools/utilities.py,sha256=5vVXqIH-jiY4PHUAoDI1o26mZYPsmrWO6I97Fs3oC90,18661
+ml_tools/vision_helpers.py,sha256=idQ-Ugp1IdsvwXiYyhYa9G3rTRTm37YRpkQDLEpANHM,7701
+dragon_ml_toolbox-2.1.0.dist-info/METADATA,sha256=LDXrXkR1nm6WiEVHudCy7wI0dwkMejT0NzPuYptGSmw,2974
+dragon_ml_toolbox-2.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-2.1.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-2.1.0.dist-info/RECORD,,

{dragon_ml_toolbox-1.4.8.dist-info → dragon_ml_toolbox-2.1.0.dist-info}/licenses/LICENSE-THIRD-PARTY.md RENAMED Viewed

@@ -5,10 +5,10 @@ This project depends on the following third-party packages. Each is governed by
 - [pandas](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
 - [numpy](https://github.com/numpy/numpy/blob/main/LICENSE.txt)
 - [matplotlib](https://github.com/matplotlib/matplotlib/blob/main/LICENSE/LICENSE)
-- [seaborn](https://github.com/mwaskom/seaborn/blob/main/LICENSE)
+- [seaborn](https://github.com/mwaskom/seaborn/blob/master/LICENSE.md)
 - [statsmodels](https://github.com/statsmodels/statsmodels/blob/main/LICENSE.txt)
-- [ipython](https://github.com/ipython/ipython/blob/main/COPYING.rst)
-- [ipykernel](https://github.com/ipython/ipykernel/blob/main/COPYING.rst)
+- [ipython](https://github.com/ipython/ipython/blob/main/LICENSE)
+- [ipykernel](https://github.com/ipython/ipykernel/blob/main/LICENSE)
 - [notebook](https://github.com/jupyter/notebook/blob/main/LICENSE)
 - [jupyterlab](https://github.com/jupyterlab/jupyterlab/blob/main/LICENSE)
 - [ipywidgets](https://github.com/jupyter-widgets/ipywidgets/blob/main/LICENSE)
@@ -24,5 +24,6 @@ This project depends on the following third-party packages. Each is governed by
 - [openpyxl](https://github.com/chronossc/openpyxl/blob/main/LICENSE)
 - [miceforest](https://github.com/AnotherSamWilson/miceforest/blob/main/LICENSE)
 - [polars](https://github.com/pola-rs/polars/blob/main/LICENSE)
-- [plotnine](https://github.com/has2k1/plotnine/blob/main/LICENSE.txt)
+- [plotnine](https://github.com/has2k1/plotnine/blob/main/LICENSE)
 - [pyswarm](https://pythonhosted.org/pyswarm/#license)
+- [tqdm](https://github.com/tqdm/tqdm/blob/master/LICENSE)

ml_tools/MICE_imputation.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import pandas as pd
 import miceforest as mf
-import os
+from pathlib import Path
 import matplotlib.pyplot as plt
 import numpy as np
-from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values
+from .utilities import load_dataframe, list_csv_paths, sanitize_filename, _script_info, merge_dataframes, save_dataframe, threshold_binary_values, make_fullpath
 from plotnine import ggplot, labs, theme, element_blank # type: ignore
-from typing import Optional
+from typing import Optional, Union
 __all__ = [
@@ -60,7 +60,7 @@ def apply_mice(df: pd.DataFrame, df_name: str, binary_columns: Optional[list[str
     return kernel, imputed_datasets, imputed_dataset_names
-def save_imputed_datasets(save_dir: str, imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
+def save_imputed_datasets(save_dir: Union[str, Path], imputed_datasets: list, df_targets: pd.DataFrame, imputed_dataset_names: list[str]):
     for imputed_df, subname in zip(imputed_datasets, imputed_dataset_names):
         merged_df = merge_dataframes(imputed_df, df_targets, direction="horizontal", verbose=False)
         save_dataframe(df=merged_df, save_dir=save_dir, filename=subname)
@@ -72,7 +72,7 @@ def get_na_column_names(df: pd.DataFrame):
 #Convergence diagnostic
-def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_names: list[str], column_names: list[str], root_dir: str, fontsize: int=16):
+def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_names: list[str], column_names: list[str], root_dir: Union[str,Path], fontsize: int=16):
     """
     Generate and save convergence diagnostic plots for imputed variables.
@@ -90,7 +90,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
         raise ValueError(f"Expected {dataset_count} names in imputed_dataset_names, got {len(imputed_dataset_names)}")
     # Check path
-    os.makedirs(root_dir, exist_ok=True)
+    root_path = make_fullpath(root_dir, make=True)
     # Styling parameters
     label_font = {'size': fontsize, 'weight': 'bold'}
@@ -99,8 +99,7 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
     for dataset_id, imputed_dataset_name in zip(range(dataset_count), imputed_dataset_names):
         #Check directory for current dataset
         dataset_file_dir = f"Convergence_Metrics_{imputed_dataset_name}"
-        local_save_dir = os.path.join(root_dir, dataset_file_dir)
-        os.makedirs(local_save_dir, exist_ok=True)
+        local_save_dir = make_fullpath(input_path=root_path / dataset_file_dir, make=True)
         for feature_name in column_names:
             means_per_iteration = []
@@ -121,8 +120,8 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
             plt.grid(True)
             feature_save_name = sanitize_filename(feature_name)
-            save_path = os.path.join(local_save_dir, feature_save_name + ".svg")
+            feature_save_name = feature_save_name + ".svg"
+            save_path = local_save_dir / feature_save_name
             plt.savefig(save_path, bbox_inches='tight', format="svg")
             plt.close()
@@ -130,18 +129,17 @@ def get_convergence_diagnostic(kernel: mf.ImputationKernel, imputed_dataset_name
 # Imputed distributions
-def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_dir: str, column_names: list[str], one_plot: bool=False, fontsize: int=14):
+def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_dir: Union[str, Path], column_names: list[str], one_plot: bool=False, fontsize: int=14):
     '''
     It works using miceforest's authors implementation of the method `.plot_imputed_distributions()`.
     Set `one_plot=True` to save a single image including all feature distribution plots instead.
     '''
     # Check path
-    os.makedirs(root_dir, exist_ok=True)
+    root_path = make_fullpath(root_dir, make=True)
     local_dir_name = f"Distribution_Metrics_{df_name}_imputed"
-    local_save_dir = os.path.join(root_dir, local_dir_name)
-    if not os.path.isdir(local_save_dir):
-        os.makedirs(local_save_dir)
+    local_save_dir = make_fullpath(root_path / local_dir_name, make=True)
     # Styling parameters
     legend_kwargs = {'frameon': True, 'facecolor': 'white', 'framealpha': 0.8}
@@ -191,9 +189,11 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
         # sanitize savename
         feature_save_name = sanitize_filename(filename)
+        feature_save_name = feature_save_name + ".svg"
+        new_save_path = local_save_dir / feature_save_name
         fig.savefig(
-            os.path.join(local_save_dir, feature_save_name + ".svg"),
+            new_save_path,
             format='svg',
             bbox_inches='tight',
             pad_inches=0.1
@@ -213,8 +213,8 @@ def get_imputed_distributions(kernel: mf.ImputationKernel, df_name: str, root_di
     print(f"{local_dir_name} completed.")
-def run_mice_pipeline(df_path_or_dir: str, target_columns: list[str],
-                      save_datasets_dir: str, save_metrics_dir: str,
+def run_mice_pipeline(df_path_or_dir: Union[str,Path], target_columns: list[str],
+                      save_datasets_dir: Union[str,Path], save_metrics_dir: Union[str,Path],
                       binary_columns: Optional[list[str]]=None,
                       resulting_datasets: int=1,
                       iterations: int=20,
@@ -230,15 +230,14 @@ def run_mice_pipeline(df_path_or_dir: str, target_columns: list[str],
     Target columns must be skipped from the imputation. Binary columns will be thresholded after imputation.
     """
     # Check paths
-    os.makedirs(save_datasets_dir, exist_ok=True)
-    os.makedirs(save_metrics_dir, exist_ok=True)
+    save_datasets_path = make_fullpath(save_datasets_dir, make=True)
+    save_metrics_path = make_fullpath(save_metrics_dir, make=True)
-    if os.path.isfile(df_path_or_dir):
-        all_file_paths = [df_path_or_dir]
-    elif os.path.isdir(df_path_or_dir):
-        all_file_paths = list(list_csv_paths(df_path_or_dir).values())
+    input_path = make_fullpath(df_path_or_dir)
+    if input_path.is_file():
+        all_file_paths = [input_path]
     else:
-        raise ValueError(f"Invalid path or directory: {df_path_or_dir}")
+        all_file_paths = list(list_csv_paths(input_path).values())
     for df_path in all_file_paths:
         df, df_name = load_dataframe(df_path=df_path)
@@ -247,13 +246,13 @@ def run_mice_pipeline(df_path_or_dir: str, target_columns: list[str],
         kernel, imputed_datasets, imputed_dataset_names = apply_mice(df=df, df_name=df_name, binary_columns=binary_columns, resulting_datasets=resulting_datasets, iterations=iterations, random_state=random_state)
-        save_imputed_datasets(save_dir=save_datasets_dir, imputed_datasets=imputed_datasets, df_targets=df_targets, imputed_dataset_names=imputed_dataset_names)
+        save_imputed_datasets(save_dir=save_datasets_path, imputed_datasets=imputed_datasets, df_targets=df_targets, imputed_dataset_names=imputed_dataset_names)
         imputed_column_names = get_na_column_names(df=df)
-        get_convergence_diagnostic(kernel=kernel, imputed_dataset_names=imputed_dataset_names, column_names=imputed_column_names, root_dir=save_metrics_dir)
+        get_convergence_diagnostic(kernel=kernel, imputed_dataset_names=imputed_dataset_names, column_names=imputed_column_names, root_dir=save_metrics_path)
-        get_imputed_distributions(kernel=kernel, df_name=df_name, root_dir=save_metrics_dir, column_names=imputed_column_names)
+        get_imputed_distributions(kernel=kernel, df_name=df_name, root_dir=save_metrics_path, column_names=imputed_column_names)
 def _skip_targets(df: pd.DataFrame, target_cols: list[str]):

dragon-ml-toolbox 1.4.8__py3-none-any.whl → 2.1.0__py3-none-any.whl

Potentially problematic release.

dragon-ml-toolbox 1.4.8py3-none-any.whl → 2.1.0py3-none-any.whl