PyPI - dragon-ml-toolbox - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

dragon-ml-toolbox 1.2.0py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{dragon_ml_toolbox-1.2.0.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dragon-ml-toolbox
-Version: 1.2.0
+Version: 1.2.1
 Summary: A collection of tools for data science and machine learning projects
 Author-email: Karl Loza <luigiloza@gmail.com>
 License-Expression: MIT
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
+License-File: LICENSE-THIRD-PARTY.md
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: matplotlib

{dragon_ml_toolbox-1.2.0.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,10 @@
-dragon_ml_toolbox-1.2.0.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-1.2.1.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
+dragon_ml_toolbox-1.2.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=e1Hg5ZtaBpDV7ZvxhLe1ac28l7nMjvi1MSE5YvB1s-o,1472
 ml_tools/MICE_imputation.py,sha256=Xvupj6w4NJ7d8gcJbpp1y3LVVnWEfvx-It7oEksuT5I,7349
 ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ml_tools/data_exploration.py,sha256=AMQ5XLrRhV6dLhptjl2Jppgk9JJ06ZjXEuvqkjC3gt0,26998
+ml_tools/data_exploration.py,sha256=laTNbN5_xlhqWiKfF-cJ9yMZ8zAM2a-AryqgiIQBBLg,26649
 ml_tools/datasetmaster.py,sha256=VUneKshnmjOGbtqVVGTFcIMRKF3s6ZDYrosIYKDjD80,28956
-ml_tools/ensemble_learning.py,sha256=uA7A94CLv8o2l125oTEi0cjHusZkB-7Mnrtn7SGTfjs,29714
+ml_tools/ensemble_learning.py,sha256=5UmlXI3Orm5zL0P07Ub_Y0gwjruH-REHY-cFWQpJWb0,29085
 ml_tools/handle_excel.py,sha256=IR0VQc3hYdmjwC31E5YxDnRcWig4jSIx7Y_7to-KZz4,11969
 ml_tools/logger.py,sha256=XwSpCUzw2Le24fJHyljBxNLgw63SwjZ0pMjTJqf0ylI,4622
 ml_tools/particle_swarm_optimization.py,sha256=jpkje4OETC9fyISxxUTx4XGrImSU6gDEcwz46ZDs2bQ,19250
@@ -11,7 +12,7 @@ ml_tools/pytorch_models.py,sha256=Oykw02sOZLCjvSadQd64UGesBN7kq0x1EGXHusvYiQI,99
 ml_tools/trainer.py,sha256=Zd7AaHeoNd8dEas2JChWoHaCUpWUVRDUMybuHaKJ0XY,16740
 ml_tools/utilities.py,sha256=mG_--EFplfI9H7OhrWI8VkdNJtTbs4Wbz32xvcFWps8,5518
 ml_tools/vision_helpers.py,sha256=lBAW6dzAK-HOswAt1fU_tfP9hkNLY5D8c_I_7hhEXno,7528
-dragon_ml_toolbox-1.2.0.dist-info/METADATA,sha256=LmlbpETQETUcZuGatEtnP6JttrkN7kVObxjzvl5INfk,5128
-dragon_ml_toolbox-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dragon_ml_toolbox-1.2.0.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
-dragon_ml_toolbox-1.2.0.dist-info/RECORD,,
+dragon_ml_toolbox-1.2.1.dist-info/METADATA,sha256=_dLYb0G6dqpxh2jeWdWuG91LHQZCNDq2HVxbcBLlcu0,5165
+dragon_ml_toolbox-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dragon_ml_toolbox-1.2.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
+dragon_ml_toolbox-1.2.1.dist-info/RECORD,,

dragon_ml_toolbox-1.2.1.dist-info/licenses/LICENSE-THIRD-PARTY.md ADDED Viewed

@@ -0,0 +1,23 @@
+# Third-Party Licenses
+This project depends on the following third-party packages. Each is governed by its own license, linked below.
+- [pandas](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
+- [numpy](https://github.com/numpy/numpy/blob/main/LICENSE.txt)
+- [matplotlib](https://github.com/matplotlib/matplotlib/blob/main/LICENSE/LICENSE)
+- [seaborn](https://github.com/mwaskom/seaborn/blob/main/LICENSE)
+- [statsmodels](https://github.com/statsmodels/statsmodels/blob/main/LICENSE.txt)
+- [ipython](https://github.com/ipython/ipython/blob/main/COPYING.rst)
+- [torch](https://github.com/pytorch/pytorch/blob/main/LICENSE)
+- [scikit-learn](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)
+- [imblearn](https://github.com/scikit-learn-contrib/imbalanced-learn/blob/main/LICENSE)
+- [Pillow](https://github.com/python-pillow/Pillow/blob/main/LICENSE)
+- [joblib](https://github.com/joblib/joblib/blob/main/LICENSE.txt)
+- [xgboost](https://github.com/dmlc/xgboost/blob/main/LICENSE)
+- [lightgbm](https://github.com/microsoft/LightGBM/blob/master/LICENSE)
+- [shap](https://github.com/shap/shap/blob/master/LICENSE)
+- [openpyxl](https://github.com/chronossc/openpyxl/blob/main/LICENSE)
+- [miceforest](https://github.com/AnotherSamWilson/miceforest/blob/main/LICENSE)
+- [polars](https://github.com/pola-rs/polars/blob/main/LICENSE)
+- [torchvision](https://github.com/pytorch/vision/blob/main/LICENSE)
+- [pyswarm](https://pythonhosted.org/pyswarm/#license)

ml_tools/data_exploration.py CHANGED Viewed

@@ -15,8 +15,7 @@ from ml_tools.utilities import sanitize_filename
 # Keep track of all available functions, show using `info()`
-__all__ = ["load_dataframe",
-           "summarize_dataframe",
+__all__ = ["summarize_dataframe",
            "drop_rows_with_missing_data",
            "split_features_targets",
            "show_null_columns",
@@ -33,21 +32,6 @@ __all__ = ["load_dataframe",
            "drop_vif_based"]
-def load_dataframe(df_path: str) -> pd.DataFrame:
-    """
-    Loads a DataFrame from a CSV file.
-    Args:
-        df_path (str): Path to the CSV file.
-    Returns:
-        pd.DataFrame: Loaded DataFrame.
-    """
-    df = pd.read_csv(df_path, encoding='utf-8')
-    print(f"DataFrame shape {df.shape}")
-    return df
 def summarize_dataframe(df: pd.DataFrame, round_digits: int = 2):
     """
     Returns a summary DataFrame with data types, non-null counts, number of unique values,

ml_tools/ensemble_learning.py CHANGED Viewed

@@ -21,6 +21,8 @@ from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
 from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, mean_absolute_error, mean_squared_error, r2_score, roc_curve, roc_auc_score
 import shap
+from .utilities import yield_dataframes_from_dir
 import warnings # Ignore warnings
 warnings.filterwarnings('ignore', category=DeprecationWarning)
 warnings.filterwarnings('ignore', category=FutureWarning)
@@ -28,23 +30,6 @@ warnings.filterwarnings('ignore', category=UserWarning)
 ###### 1. Dataset Loader ######
-#Load imputed datasets as a generator
-def yield_imputed_dataframe(datasets_dir: str):
-    '''
-    Yields a tuple `(dataframe, dataframe_name)`
-    '''
-    dataset_filenames = [dataset for dataset in os.listdir(datasets_dir) if dataset.endswith(".csv")]
-    if not dataset_filenames:
-        raise IOError(f"No imputed datasets have been found at {datasets_dir}")
-    for dataset_filename in dataset_filenames:
-        full_path = os.path.join(datasets_dir, dataset_filename)
-        df = pd.read_csv(full_path)
-        #remove extension
-        filename = os.path.splitext(os.path.basename(dataset_filename))[0]
-        print(f"Working on dataset: {filename}")
-        yield (df, filename)
 #Split a dataset into features and targets datasets
 def dataset_yielder(df: pd.DataFrame, target_cols: list[str]):
     '''
@@ -543,7 +528,7 @@ def get_shap_values(model, model_name: str,
             plot_size=figsize,
             max_display=max_display_features,
             alpha=0.7,
-            color=plt.get_cmap('viridis')
+            color=plt.get_cmap('viridis') # type: ignore
         )
         # Add professional styling
@@ -674,7 +659,7 @@ def run_pipeline(datasets_dir: str, save_dir: str, target_columns: list[str], ta
     #Check paths
     _check_paths(datasets_dir, save_dir)
     #Yield imputed dataset
-    for dataframe, dataframe_name in yield_imputed_dataframe(datasets_dir):
+    for dataframe, dataframe_name in yield_dataframes_from_dir(datasets_dir):
         #Yield features dataframe and target dataframe
         for df_features, df_target, feature_names, target_name in dataset_yielder(df=dataframe, target_cols=target_columns):
             #Dataset pipeline

{dragon_ml_toolbox-1.2.0.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{dragon_ml_toolbox-1.2.0.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dragon_ml_toolbox-1.2.0.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

dragon-ml-toolbox 1.2.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

dragon-ml-toolbox 1.2.0py3-none-any.whl → 1.2.1py3-none-any.whl