dragon-ml-toolbox 1.1.6__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dragon_ml_toolbox-1.1.6.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/METADATA +38 -21
- dragon_ml_toolbox-1.2.1.dist-info/RECORD +18 -0
- dragon_ml_toolbox-1.2.1.dist-info/licenses/LICENSE-THIRD-PARTY.md +23 -0
- ml_tools/MICE_imputation.py +1 -1
- ml_tools/data_exploration.py +2 -18
- ml_tools/ensemble_learning.py +4 -19
- ml_tools/logger.py +1 -1
- dragon_ml_toolbox-1.1.6.dist-info/RECORD +0 -17
- {dragon_ml_toolbox-1.1.6.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/WHEEL +0 -0
- {dragon_ml_toolbox-1.1.6.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {dragon_ml_toolbox-1.1.6.dist-info → dragon_ml_toolbox-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dragon-ml-toolbox
|
|
3
|
-
Version: 1.1
|
|
4
|
-
Summary: A collection of tools for machine learning projects
|
|
3
|
+
Version: 1.2.1
|
|
4
|
+
Summary: A collection of tools for data science and machine learning projects
|
|
5
5
|
Author-email: Karl Loza <luigiloza@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
|
|
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
+
License-File: LICENSE-THIRD-PARTY.md
|
|
14
15
|
Requires-Dist: numpy
|
|
15
16
|
Requires-Dist: pandas
|
|
16
17
|
Requires-Dist: matplotlib
|
|
@@ -70,6 +71,25 @@ Provides-Extra: vision-helpers
|
|
|
70
71
|
Requires-Dist: Pillow; extra == "vision-helpers"
|
|
71
72
|
Requires-Dist: torch; extra == "vision-helpers"
|
|
72
73
|
Requires-Dist: torchvision; extra == "vision-helpers"
|
|
74
|
+
Provides-Extra: full
|
|
75
|
+
Requires-Dist: pandas; extra == "full"
|
|
76
|
+
Requires-Dist: numpy; extra == "full"
|
|
77
|
+
Requires-Dist: matplotlib; extra == "full"
|
|
78
|
+
Requires-Dist: seaborn; extra == "full"
|
|
79
|
+
Requires-Dist: statsmodels; extra == "full"
|
|
80
|
+
Requires-Dist: ipython; extra == "full"
|
|
81
|
+
Requires-Dist: torch; extra == "full"
|
|
82
|
+
Requires-Dist: scikit-learn; extra == "full"
|
|
83
|
+
Requires-Dist: imblearn; extra == "full"
|
|
84
|
+
Requires-Dist: Pillow; extra == "full"
|
|
85
|
+
Requires-Dist: joblib; extra == "full"
|
|
86
|
+
Requires-Dist: xgboost; extra == "full"
|
|
87
|
+
Requires-Dist: lightgbm; extra == "full"
|
|
88
|
+
Requires-Dist: shap; extra == "full"
|
|
89
|
+
Requires-Dist: openpyxl; extra == "full"
|
|
90
|
+
Requires-Dist: miceforest; extra == "full"
|
|
91
|
+
Requires-Dist: polars; extra == "full"
|
|
92
|
+
Requires-Dist: torchvision; extra == "full"
|
|
73
93
|
Dynamic: license-file
|
|
74
94
|
|
|
75
95
|
# dragon-ml-tools
|
|
@@ -85,22 +105,30 @@ A collection of Python utilities and machine learning tools, structured as a mod
|
|
|
85
105
|
|
|
86
106
|
## Installation
|
|
87
107
|
|
|
88
|
-
|
|
108
|
+
Python 3.9+ recommended.
|
|
89
109
|
|
|
90
|
-
|
|
110
|
+
### Via PyPI (Stable Releases)
|
|
111
|
+
|
|
112
|
+
Install the latest stable release from PyPI with optional dependencies:
|
|
91
113
|
|
|
92
114
|
```bash
|
|
93
|
-
|
|
94
|
-
cd ML_tools
|
|
95
|
-
pip install -e '.[logger]'
|
|
115
|
+
pip install dragon-ml-tools[logger,trainer]
|
|
96
116
|
```
|
|
97
117
|
|
|
98
|
-
|
|
118
|
+
To install dependencies from all modules
|
|
99
119
|
|
|
100
|
-
|
|
120
|
+
```bash
|
|
121
|
+
pip install dragon-ml-tools[full]
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Via GitHub (Editable)
|
|
125
|
+
|
|
126
|
+
Clone the repository and install in editable mode with optional dependencies:
|
|
101
127
|
|
|
102
128
|
```bash
|
|
103
|
-
|
|
129
|
+
git clone https://github.com/DrAg0n-BoRn/ML_tools.git
|
|
130
|
+
cd ML_tools
|
|
131
|
+
pip install -e '.[logger]'
|
|
104
132
|
```
|
|
105
133
|
|
|
106
134
|
## Usage
|
|
@@ -111,14 +139,3 @@ After installation, import modules like this:
|
|
|
111
139
|
from ml_tools.utilities import sanitize_filename
|
|
112
140
|
from ml_tools.logger import custom_logger
|
|
113
141
|
```
|
|
114
|
-
|
|
115
|
-
## Development
|
|
116
|
-
|
|
117
|
-
Python 3.9+ recommended.
|
|
118
|
-
|
|
119
|
-
To install all dependencies including development tools:
|
|
120
|
-
|
|
121
|
-
```python
|
|
122
|
-
pip install -e '.[dev]'
|
|
123
|
-
```
|
|
124
|
-
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
dragon_ml_toolbox-1.2.1.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
+
dragon_ml_toolbox-1.2.1.dist-info/licenses/LICENSE-THIRD-PARTY.md,sha256=e1Hg5ZtaBpDV7ZvxhLe1ac28l7nMjvi1MSE5YvB1s-o,1472
|
|
3
|
+
ml_tools/MICE_imputation.py,sha256=Xvupj6w4NJ7d8gcJbpp1y3LVVnWEfvx-It7oEksuT5I,7349
|
|
4
|
+
ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
ml_tools/data_exploration.py,sha256=laTNbN5_xlhqWiKfF-cJ9yMZ8zAM2a-AryqgiIQBBLg,26649
|
|
6
|
+
ml_tools/datasetmaster.py,sha256=VUneKshnmjOGbtqVVGTFcIMRKF3s6ZDYrosIYKDjD80,28956
|
|
7
|
+
ml_tools/ensemble_learning.py,sha256=5UmlXI3Orm5zL0P07Ub_Y0gwjruH-REHY-cFWQpJWb0,29085
|
|
8
|
+
ml_tools/handle_excel.py,sha256=IR0VQc3hYdmjwC31E5YxDnRcWig4jSIx7Y_7to-KZz4,11969
|
|
9
|
+
ml_tools/logger.py,sha256=XwSpCUzw2Le24fJHyljBxNLgw63SwjZ0pMjTJqf0ylI,4622
|
|
10
|
+
ml_tools/particle_swarm_optimization.py,sha256=jpkje4OETC9fyISxxUTx4XGrImSU6gDEcwz46ZDs2bQ,19250
|
|
11
|
+
ml_tools/pytorch_models.py,sha256=Oykw02sOZLCjvSadQd64UGesBN7kq0x1EGXHusvYiQI,9908
|
|
12
|
+
ml_tools/trainer.py,sha256=Zd7AaHeoNd8dEas2JChWoHaCUpWUVRDUMybuHaKJ0XY,16740
|
|
13
|
+
ml_tools/utilities.py,sha256=mG_--EFplfI9H7OhrWI8VkdNJtTbs4Wbz32xvcFWps8,5518
|
|
14
|
+
ml_tools/vision_helpers.py,sha256=lBAW6dzAK-HOswAt1fU_tfP9hkNLY5D8c_I_7hhEXno,7528
|
|
15
|
+
dragon_ml_toolbox-1.2.1.dist-info/METADATA,sha256=_dLYb0G6dqpxh2jeWdWuG91LHQZCNDq2HVxbcBLlcu0,5165
|
|
16
|
+
dragon_ml_toolbox-1.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
dragon_ml_toolbox-1.2.1.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
18
|
+
dragon_ml_toolbox-1.2.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Third-Party Licenses
|
|
2
|
+
|
|
3
|
+
This project depends on the following third-party packages. Each is governed by its own license, linked below.
|
|
4
|
+
|
|
5
|
+
- [pandas](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
|
|
6
|
+
- [numpy](https://github.com/numpy/numpy/blob/main/LICENSE.txt)
|
|
7
|
+
- [matplotlib](https://github.com/matplotlib/matplotlib/blob/main/LICENSE/LICENSE)
|
|
8
|
+
- [seaborn](https://github.com/mwaskom/seaborn/blob/main/LICENSE)
|
|
9
|
+
- [statsmodels](https://github.com/statsmodels/statsmodels/blob/main/LICENSE.txt)
|
|
10
|
+
- [ipython](https://github.com/ipython/ipython/blob/main/COPYING.rst)
|
|
11
|
+
- [torch](https://github.com/pytorch/pytorch/blob/main/LICENSE)
|
|
12
|
+
- [scikit-learn](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)
|
|
13
|
+
- [imblearn](https://github.com/scikit-learn-contrib/imbalanced-learn/blob/main/LICENSE)
|
|
14
|
+
- [Pillow](https://github.com/python-pillow/Pillow/blob/main/LICENSE)
|
|
15
|
+
- [joblib](https://github.com/joblib/joblib/blob/main/LICENSE.txt)
|
|
16
|
+
- [xgboost](https://github.com/dmlc/xgboost/blob/main/LICENSE)
|
|
17
|
+
- [lightgbm](https://github.com/microsoft/LightGBM/blob/master/LICENSE)
|
|
18
|
+
- [shap](https://github.com/shap/shap/blob/master/LICENSE)
|
|
19
|
+
- [openpyxl](https://github.com/chronossc/openpyxl/blob/main/LICENSE)
|
|
20
|
+
- [miceforest](https://github.com/AnotherSamWilson/miceforest/blob/main/LICENSE)
|
|
21
|
+
- [polars](https://github.com/pola-rs/polars/blob/main/LICENSE)
|
|
22
|
+
- [torchvision](https://github.com/pytorch/vision/blob/main/LICENSE)
|
|
23
|
+
- [pyswarm](https://pythonhosted.org/pyswarm/#license)
|
ml_tools/MICE_imputation.py
CHANGED
|
@@ -3,7 +3,7 @@ import miceforest as mf
|
|
|
3
3
|
import os
|
|
4
4
|
import matplotlib.pyplot as plt
|
|
5
5
|
import numpy as np
|
|
6
|
-
from utilities import load_dataframe, list_csv_paths
|
|
6
|
+
from ml_tools.utilities import load_dataframe, list_csv_paths
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def apply_mice(df: pd.DataFrame, df_name: str, resulting_datasets: int=1, iterations: int=20, random_state: int=101):
|
ml_tools/data_exploration.py
CHANGED
|
@@ -11,12 +11,11 @@ from typing import Union, Literal, Dict, Tuple, Optional
|
|
|
11
11
|
import os
|
|
12
12
|
import sys
|
|
13
13
|
import textwrap
|
|
14
|
-
from utilities import sanitize_filename
|
|
14
|
+
from ml_tools.utilities import sanitize_filename
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
# Keep track of all available functions, show using `info()`
|
|
18
|
-
__all__ = ["
|
|
19
|
-
"summarize_dataframe",
|
|
18
|
+
__all__ = ["summarize_dataframe",
|
|
20
19
|
"drop_rows_with_missing_data",
|
|
21
20
|
"split_features_targets",
|
|
22
21
|
"show_null_columns",
|
|
@@ -33,21 +32,6 @@ __all__ = ["load_dataframe",
|
|
|
33
32
|
"drop_vif_based"]
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
def load_dataframe(df_path: str) -> pd.DataFrame:
|
|
37
|
-
"""
|
|
38
|
-
Loads a DataFrame from a CSV file.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
df_path (str): Path to the CSV file.
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
pd.DataFrame: Loaded DataFrame.
|
|
45
|
-
"""
|
|
46
|
-
df = pd.read_csv(df_path, encoding='utf-8')
|
|
47
|
-
print(f"DataFrame shape {df.shape}")
|
|
48
|
-
return df
|
|
49
|
-
|
|
50
|
-
|
|
51
35
|
def summarize_dataframe(df: pd.DataFrame, round_digits: int = 2):
|
|
52
36
|
"""
|
|
53
37
|
Returns a summary DataFrame with data types, non-null counts, number of unique values,
|
ml_tools/ensemble_learning.py
CHANGED
|
@@ -21,6 +21,8 @@ from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
|
|
|
21
21
|
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, mean_absolute_error, mean_squared_error, r2_score, roc_curve, roc_auc_score
|
|
22
22
|
import shap
|
|
23
23
|
|
|
24
|
+
from .utilities import yield_dataframes_from_dir
|
|
25
|
+
|
|
24
26
|
import warnings # Ignore warnings
|
|
25
27
|
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
|
26
28
|
warnings.filterwarnings('ignore', category=FutureWarning)
|
|
@@ -28,23 +30,6 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
###### 1. Dataset Loader ######
|
|
31
|
-
#Load imputed datasets as a generator
|
|
32
|
-
def yield_imputed_dataframe(datasets_dir: str):
|
|
33
|
-
'''
|
|
34
|
-
Yields a tuple `(dataframe, dataframe_name)`
|
|
35
|
-
'''
|
|
36
|
-
dataset_filenames = [dataset for dataset in os.listdir(datasets_dir) if dataset.endswith(".csv")]
|
|
37
|
-
if not dataset_filenames:
|
|
38
|
-
raise IOError(f"No imputed datasets have been found at {datasets_dir}")
|
|
39
|
-
|
|
40
|
-
for dataset_filename in dataset_filenames:
|
|
41
|
-
full_path = os.path.join(datasets_dir, dataset_filename)
|
|
42
|
-
df = pd.read_csv(full_path)
|
|
43
|
-
#remove extension
|
|
44
|
-
filename = os.path.splitext(os.path.basename(dataset_filename))[0]
|
|
45
|
-
print(f"Working on dataset: {filename}")
|
|
46
|
-
yield (df, filename)
|
|
47
|
-
|
|
48
33
|
#Split a dataset into features and targets datasets
|
|
49
34
|
def dataset_yielder(df: pd.DataFrame, target_cols: list[str]):
|
|
50
35
|
'''
|
|
@@ -543,7 +528,7 @@ def get_shap_values(model, model_name: str,
|
|
|
543
528
|
plot_size=figsize,
|
|
544
529
|
max_display=max_display_features,
|
|
545
530
|
alpha=0.7,
|
|
546
|
-
color=plt.get_cmap('viridis')
|
|
531
|
+
color=plt.get_cmap('viridis') # type: ignore
|
|
547
532
|
)
|
|
548
533
|
|
|
549
534
|
# Add professional styling
|
|
@@ -674,7 +659,7 @@ def run_pipeline(datasets_dir: str, save_dir: str, target_columns: list[str], ta
|
|
|
674
659
|
#Check paths
|
|
675
660
|
_check_paths(datasets_dir, save_dir)
|
|
676
661
|
#Yield imputed dataset
|
|
677
|
-
for dataframe, dataframe_name in
|
|
662
|
+
for dataframe, dataframe_name in yield_dataframes_from_dir(datasets_dir):
|
|
678
663
|
#Yield features dataframe and target dataframe
|
|
679
664
|
for df_features, df_target, feature_names, target_name in dataset_yielder(df=dataframe, target_cols=target_columns):
|
|
680
665
|
#Dataset pipeline
|
ml_tools/logger.py
CHANGED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
dragon_ml_toolbox-1.1.6.dist-info/licenses/LICENSE,sha256=2uUFNy7D0TLgHim1K5s3DIJ4q_KvxEXVilnU20cWliY,1066
|
|
2
|
-
ml_tools/MICE_imputation.py,sha256=wLM4DJTs-CxuGzEmuTj7Tmb7AoKGs16cdxQD2Ne8Dv0,7340
|
|
3
|
-
ml_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
ml_tools/data_exploration.py,sha256=Nx8V6xYmh2XqMF3WXg0BdAQnDAFq5cFd36JHFIf56vc,26989
|
|
5
|
-
ml_tools/datasetmaster.py,sha256=VUneKshnmjOGbtqVVGTFcIMRKF3s6ZDYrosIYKDjD80,28956
|
|
6
|
-
ml_tools/ensemble_learning.py,sha256=uA7A94CLv8o2l125oTEi0cjHusZkB-7Mnrtn7SGTfjs,29714
|
|
7
|
-
ml_tools/handle_excel.py,sha256=IR0VQc3hYdmjwC31E5YxDnRcWig4jSIx7Y_7to-KZz4,11969
|
|
8
|
-
ml_tools/logger.py,sha256=yQ5v8e2UnkKgQDszpg5zihpLPI8ehEci7p_2PKkshls,4613
|
|
9
|
-
ml_tools/particle_swarm_optimization.py,sha256=jpkje4OETC9fyISxxUTx4XGrImSU6gDEcwz46ZDs2bQ,19250
|
|
10
|
-
ml_tools/pytorch_models.py,sha256=Oykw02sOZLCjvSadQd64UGesBN7kq0x1EGXHusvYiQI,9908
|
|
11
|
-
ml_tools/trainer.py,sha256=Zd7AaHeoNd8dEas2JChWoHaCUpWUVRDUMybuHaKJ0XY,16740
|
|
12
|
-
ml_tools/utilities.py,sha256=mG_--EFplfI9H7OhrWI8VkdNJtTbs4Wbz32xvcFWps8,5518
|
|
13
|
-
ml_tools/vision_helpers.py,sha256=lBAW6dzAK-HOswAt1fU_tfP9hkNLY5D8c_I_7hhEXno,7528
|
|
14
|
-
dragon_ml_toolbox-1.1.6.dist-info/METADATA,sha256=sqpFHimlmN3xtYTeqPa8XU4BWUj8k6u5HP1E7PRWI_0,4403
|
|
15
|
-
dragon_ml_toolbox-1.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
16
|
-
dragon_ml_toolbox-1.1.6.dist-info/top_level.txt,sha256=wm-oxax3ciyez6VoO4zsFd-gSok2VipYXnbg3TH9PtU,9
|
|
17
|
-
dragon_ml_toolbox-1.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|