dragon-ml-toolbox 1.1.6__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dragon-ml-toolbox might be problematic. Click here for more details.

Files changed (23) hide show
  1. dragon_ml_toolbox-1.2.1/LICENSE-THIRD-PARTY.md +23 -0
  2. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/PKG-INFO +38 -21
  3. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/README.md +16 -19
  4. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/dragon_ml_toolbox.egg-info/PKG-INFO +38 -21
  5. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/dragon_ml_toolbox.egg-info/SOURCES.txt +1 -0
  6. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/dragon_ml_toolbox.egg-info/requires.txt +20 -0
  7. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/MICE_imputation.py +1 -1
  8. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/data_exploration.py +2 -18
  9. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/ensemble_learning.py +4 -19
  10. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/logger.py +1 -1
  11. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/pyproject.toml +25 -2
  12. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/LICENSE +0 -0
  13. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/dragon_ml_toolbox.egg-info/dependency_links.txt +0 -0
  14. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/dragon_ml_toolbox.egg-info/top_level.txt +0 -0
  15. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/__init__.py +0 -0
  16. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/datasetmaster.py +0 -0
  17. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/handle_excel.py +0 -0
  18. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/particle_swarm_optimization.py +0 -0
  19. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/pytorch_models.py +0 -0
  20. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/trainer.py +0 -0
  21. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/utilities.py +0 -0
  22. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/ml_tools/vision_helpers.py +0 -0
  23. {dragon_ml_toolbox-1.1.6 → dragon_ml_toolbox-1.2.1}/setup.cfg +0 -0
@@ -0,0 +1,23 @@
1
+ # Third-Party Licenses
2
+
3
+ This project depends on the following third-party packages. Each is governed by its own license, linked below.
4
+
5
+ - [pandas](https://github.com/pandas-dev/pandas/blob/main/LICENSE)
6
+ - [numpy](https://github.com/numpy/numpy/blob/main/LICENSE.txt)
7
+ - [matplotlib](https://github.com/matplotlib/matplotlib/blob/main/LICENSE/LICENSE)
8
+ - [seaborn](https://github.com/mwaskom/seaborn/blob/main/LICENSE)
9
+ - [statsmodels](https://github.com/statsmodels/statsmodels/blob/main/LICENSE.txt)
10
+ - [ipython](https://github.com/ipython/ipython/blob/main/COPYING.rst)
11
+ - [torch](https://github.com/pytorch/pytorch/blob/main/LICENSE)
12
+ - [scikit-learn](https://github.com/scikit-learn/scikit-learn/blob/main/COPYING)
13
+ - [imblearn](https://github.com/scikit-learn-contrib/imbalanced-learn/blob/main/LICENSE)
14
+ - [Pillow](https://github.com/python-pillow/Pillow/blob/main/LICENSE)
15
+ - [joblib](https://github.com/joblib/joblib/blob/main/LICENSE.txt)
16
+ - [xgboost](https://github.com/dmlc/xgboost/blob/main/LICENSE)
17
+ - [lightgbm](https://github.com/microsoft/LightGBM/blob/master/LICENSE)
18
+ - [shap](https://github.com/shap/shap/blob/master/LICENSE)
19
+ - [openpyxl](https://github.com/chronossc/openpyxl/blob/main/LICENSE)
20
+ - [miceforest](https://github.com/AnotherSamWilson/miceforest/blob/main/LICENSE)
21
+ - [polars](https://github.com/pola-rs/polars/blob/main/LICENSE)
22
+ - [torchvision](https://github.com/pytorch/vision/blob/main/LICENSE)
23
+ - [pyswarm](https://pythonhosted.org/pyswarm/#license)
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 1.1.6
4
- Summary: A collection of tools for machine learning projects
3
+ Version: 1.2.1
4
+ Summary: A collection of tools for data science and machine learning projects
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ License-File: LICENSE-THIRD-PARTY.md
14
15
  Requires-Dist: numpy
15
16
  Requires-Dist: pandas
16
17
  Requires-Dist: matplotlib
@@ -70,6 +71,25 @@ Provides-Extra: vision-helpers
70
71
  Requires-Dist: Pillow; extra == "vision-helpers"
71
72
  Requires-Dist: torch; extra == "vision-helpers"
72
73
  Requires-Dist: torchvision; extra == "vision-helpers"
74
+ Provides-Extra: full
75
+ Requires-Dist: pandas; extra == "full"
76
+ Requires-Dist: numpy; extra == "full"
77
+ Requires-Dist: matplotlib; extra == "full"
78
+ Requires-Dist: seaborn; extra == "full"
79
+ Requires-Dist: statsmodels; extra == "full"
80
+ Requires-Dist: ipython; extra == "full"
81
+ Requires-Dist: torch; extra == "full"
82
+ Requires-Dist: scikit-learn; extra == "full"
83
+ Requires-Dist: imblearn; extra == "full"
84
+ Requires-Dist: Pillow; extra == "full"
85
+ Requires-Dist: joblib; extra == "full"
86
+ Requires-Dist: xgboost; extra == "full"
87
+ Requires-Dist: lightgbm; extra == "full"
88
+ Requires-Dist: shap; extra == "full"
89
+ Requires-Dist: openpyxl; extra == "full"
90
+ Requires-Dist: miceforest; extra == "full"
91
+ Requires-Dist: polars; extra == "full"
92
+ Requires-Dist: torchvision; extra == "full"
73
93
  Dynamic: license-file
74
94
 
75
95
  # dragon-ml-tools
@@ -85,22 +105,30 @@ A collection of Python utilities and machine learning tools, structured as a mod
85
105
 
86
106
  ## Installation
87
107
 
88
- ### Via GitHub (Editable / Development Mode)
108
+ Python 3.9+ recommended.
89
109
 
90
- Clone the repository and install in editable mode with optional dependencies:
110
+ ### Via PyPI (Stable Releases)
111
+
112
+ Install the latest stable release from PyPI with optional dependencies:
91
113
 
92
114
  ```bash
93
- git clone https://github.com/DrAg0n-BoRn/ML_tools.git
94
- cd ML_tools
95
- pip install -e '.[logger]'
115
+ pip install dragon-ml-tools[logger,trainer]
96
116
  ```
97
117
 
98
- ### Via PyPI (Stable Releases)
118
+ To install dependencies from all modules
99
119
 
100
- Install the latest stable release from PyPI with optional dependencies:
120
+ ```bash
121
+ pip install dragon-ml-tools[full]
122
+ ```
123
+
124
+ ### Via GitHub (Editable)
125
+
126
+ Clone the repository and install in editable mode with optional dependencies:
101
127
 
102
128
  ```bash
103
- pip install dragon-ml-tools[logger,trainer]
129
+ git clone https://github.com/DrAg0n-BoRn/ML_tools.git
130
+ cd ML_tools
131
+ pip install -e '.[logger]'
104
132
  ```
105
133
 
106
134
  ## Usage
@@ -111,14 +139,3 @@ After installation, import modules like this:
111
139
  from ml_tools.utilities import sanitize_filename
112
140
  from ml_tools.logger import custom_logger
113
141
  ```
114
-
115
- ## Development
116
-
117
- Python 3.9+ recommended.
118
-
119
- To install all dependencies including development tools:
120
-
121
- ```python
122
- pip install -e '.[dev]'
123
- ```
124
-
@@ -11,22 +11,30 @@ A collection of Python utilities and machine learning tools, structured as a mod
11
11
 
12
12
  ## Installation
13
13
 
14
- ### Via GitHub (Editable / Development Mode)
14
+ Python 3.9+ recommended.
15
15
 
16
- Clone the repository and install in editable mode with optional dependencies:
16
+ ### Via PyPI (Stable Releases)
17
+
18
+ Install the latest stable release from PyPI with optional dependencies:
17
19
 
18
20
  ```bash
19
- git clone https://github.com/DrAg0n-BoRn/ML_tools.git
20
- cd ML_tools
21
- pip install -e '.[logger]'
21
+ pip install dragon-ml-tools[logger,trainer]
22
22
  ```
23
23
 
24
- ### Via PyPI (Stable Releases)
24
+ To install dependencies from all modules
25
25
 
26
- Install the latest stable release from PyPI with optional dependencies:
26
+ ```bash
27
+ pip install dragon-ml-tools[full]
28
+ ```
29
+
30
+ ### Via GitHub (Editable)
31
+
32
+ Clone the repository and install in editable mode with optional dependencies:
27
33
 
28
34
  ```bash
29
- pip install dragon-ml-tools[logger,trainer]
35
+ git clone https://github.com/DrAg0n-BoRn/ML_tools.git
36
+ cd ML_tools
37
+ pip install -e '.[logger]'
30
38
  ```
31
39
 
32
40
  ## Usage
@@ -37,14 +45,3 @@ After installation, import modules like this:
37
45
  from ml_tools.utilities import sanitize_filename
38
46
  from ml_tools.logger import custom_logger
39
47
  ```
40
-
41
- ## Development
42
-
43
- Python 3.9+ recommended.
44
-
45
- To install all dependencies including development tools:
46
-
47
- ```python
48
- pip install -e '.[dev]'
49
- ```
50
-
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dragon-ml-toolbox
3
- Version: 1.1.6
4
- Summary: A collection of tools for machine learning projects
3
+ Version: 1.2.1
4
+ Summary: A collection of tools for data science and machine learning projects
5
5
  Author-email: Karl Loza <luigiloza@gmail.com>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/DrAg0n-BoRn/ML_tools
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ License-File: LICENSE-THIRD-PARTY.md
14
15
  Requires-Dist: numpy
15
16
  Requires-Dist: pandas
16
17
  Requires-Dist: matplotlib
@@ -70,6 +71,25 @@ Provides-Extra: vision-helpers
70
71
  Requires-Dist: Pillow; extra == "vision-helpers"
71
72
  Requires-Dist: torch; extra == "vision-helpers"
72
73
  Requires-Dist: torchvision; extra == "vision-helpers"
74
+ Provides-Extra: full
75
+ Requires-Dist: pandas; extra == "full"
76
+ Requires-Dist: numpy; extra == "full"
77
+ Requires-Dist: matplotlib; extra == "full"
78
+ Requires-Dist: seaborn; extra == "full"
79
+ Requires-Dist: statsmodels; extra == "full"
80
+ Requires-Dist: ipython; extra == "full"
81
+ Requires-Dist: torch; extra == "full"
82
+ Requires-Dist: scikit-learn; extra == "full"
83
+ Requires-Dist: imblearn; extra == "full"
84
+ Requires-Dist: Pillow; extra == "full"
85
+ Requires-Dist: joblib; extra == "full"
86
+ Requires-Dist: xgboost; extra == "full"
87
+ Requires-Dist: lightgbm; extra == "full"
88
+ Requires-Dist: shap; extra == "full"
89
+ Requires-Dist: openpyxl; extra == "full"
90
+ Requires-Dist: miceforest; extra == "full"
91
+ Requires-Dist: polars; extra == "full"
92
+ Requires-Dist: torchvision; extra == "full"
73
93
  Dynamic: license-file
74
94
 
75
95
  # dragon-ml-tools
@@ -85,22 +105,30 @@ A collection of Python utilities and machine learning tools, structured as a mod
85
105
 
86
106
  ## Installation
87
107
 
88
- ### Via GitHub (Editable / Development Mode)
108
+ Python 3.9+ recommended.
89
109
 
90
- Clone the repository and install in editable mode with optional dependencies:
110
+ ### Via PyPI (Stable Releases)
111
+
112
+ Install the latest stable release from PyPI with optional dependencies:
91
113
 
92
114
  ```bash
93
- git clone https://github.com/DrAg0n-BoRn/ML_tools.git
94
- cd ML_tools
95
- pip install -e '.[logger]'
115
+ pip install dragon-ml-tools[logger,trainer]
96
116
  ```
97
117
 
98
- ### Via PyPI (Stable Releases)
118
+ To install dependencies from all modules
99
119
 
100
- Install the latest stable release from PyPI with optional dependencies:
120
+ ```bash
121
+ pip install dragon-ml-tools[full]
122
+ ```
123
+
124
+ ### Via GitHub (Editable)
125
+
126
+ Clone the repository and install in editable mode with optional dependencies:
101
127
 
102
128
  ```bash
103
- pip install dragon-ml-tools[logger,trainer]
129
+ git clone https://github.com/DrAg0n-BoRn/ML_tools.git
130
+ cd ML_tools
131
+ pip install -e '.[logger]'
104
132
  ```
105
133
 
106
134
  ## Usage
@@ -111,14 +139,3 @@ After installation, import modules like this:
111
139
  from ml_tools.utilities import sanitize_filename
112
140
  from ml_tools.logger import custom_logger
113
141
  ```
114
-
115
- ## Development
116
-
117
- Python 3.9+ recommended.
118
-
119
- To install all dependencies including development tools:
120
-
121
- ```python
122
- pip install -e '.[dev]'
123
- ```
124
-
@@ -1,4 +1,5 @@
1
1
  LICENSE
2
+ LICENSE-THIRD-PARTY.md
2
3
  README.md
3
4
  pyproject.toml
4
5
  dragon_ml_toolbox.egg-info/PKG-INFO
@@ -32,6 +32,26 @@ xgboost
32
32
  lightgbm
33
33
  shap
34
34
 
35
+ [full]
36
+ pandas
37
+ numpy
38
+ matplotlib
39
+ seaborn
40
+ statsmodels
41
+ ipython
42
+ torch
43
+ scikit-learn
44
+ imblearn
45
+ Pillow
46
+ joblib
47
+ xgboost
48
+ lightgbm
49
+ shap
50
+ openpyxl
51
+ miceforest
52
+ polars
53
+ torchvision
54
+
35
55
  [handle_excel]
36
56
  openpyxl
37
57
  pandas
@@ -3,7 +3,7 @@ import miceforest as mf
3
3
  import os
4
4
  import matplotlib.pyplot as plt
5
5
  import numpy as np
6
- from utilities import load_dataframe, list_csv_paths
6
+ from ml_tools.utilities import load_dataframe, list_csv_paths
7
7
 
8
8
 
9
9
  def apply_mice(df: pd.DataFrame, df_name: str, resulting_datasets: int=1, iterations: int=20, random_state: int=101):
@@ -11,12 +11,11 @@ from typing import Union, Literal, Dict, Tuple, Optional
11
11
  import os
12
12
  import sys
13
13
  import textwrap
14
- from utilities import sanitize_filename
14
+ from ml_tools.utilities import sanitize_filename
15
15
 
16
16
 
17
17
  # Keep track of all available functions, show using `info()`
18
- __all__ = ["load_dataframe",
19
- "summarize_dataframe",
18
+ __all__ = ["summarize_dataframe",
20
19
  "drop_rows_with_missing_data",
21
20
  "split_features_targets",
22
21
  "show_null_columns",
@@ -33,21 +32,6 @@ __all__ = ["load_dataframe",
33
32
  "drop_vif_based"]
34
33
 
35
34
 
36
- def load_dataframe(df_path: str) -> pd.DataFrame:
37
- """
38
- Loads a DataFrame from a CSV file.
39
-
40
- Args:
41
- df_path (str): Path to the CSV file.
42
-
43
- Returns:
44
- pd.DataFrame: Loaded DataFrame.
45
- """
46
- df = pd.read_csv(df_path, encoding='utf-8')
47
- print(f"DataFrame shape {df.shape}")
48
- return df
49
-
50
-
51
35
  def summarize_dataframe(df: pd.DataFrame, round_digits: int = 2):
52
36
  """
53
37
  Returns a summary DataFrame with data types, non-null counts, number of unique values,
@@ -21,6 +21,8 @@ from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
21
21
  from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay, mean_absolute_error, mean_squared_error, r2_score, roc_curve, roc_auc_score
22
22
  import shap
23
23
 
24
+ from .utilities import yield_dataframes_from_dir
25
+
24
26
  import warnings # Ignore warnings
25
27
  warnings.filterwarnings('ignore', category=DeprecationWarning)
26
28
  warnings.filterwarnings('ignore', category=FutureWarning)
@@ -28,23 +30,6 @@ warnings.filterwarnings('ignore', category=UserWarning)
28
30
 
29
31
 
30
32
  ###### 1. Dataset Loader ######
31
- #Load imputed datasets as a generator
32
- def yield_imputed_dataframe(datasets_dir: str):
33
- '''
34
- Yields a tuple `(dataframe, dataframe_name)`
35
- '''
36
- dataset_filenames = [dataset for dataset in os.listdir(datasets_dir) if dataset.endswith(".csv")]
37
- if not dataset_filenames:
38
- raise IOError(f"No imputed datasets have been found at {datasets_dir}")
39
-
40
- for dataset_filename in dataset_filenames:
41
- full_path = os.path.join(datasets_dir, dataset_filename)
42
- df = pd.read_csv(full_path)
43
- #remove extension
44
- filename = os.path.splitext(os.path.basename(dataset_filename))[0]
45
- print(f"Working on dataset: {filename}")
46
- yield (df, filename)
47
-
48
33
  #Split a dataset into features and targets datasets
49
34
  def dataset_yielder(df: pd.DataFrame, target_cols: list[str]):
50
35
  '''
@@ -543,7 +528,7 @@ def get_shap_values(model, model_name: str,
543
528
  plot_size=figsize,
544
529
  max_display=max_display_features,
545
530
  alpha=0.7,
546
- color=plt.get_cmap('viridis')
531
+ color=plt.get_cmap('viridis') # type: ignore
547
532
  )
548
533
 
549
534
  # Add professional styling
@@ -674,7 +659,7 @@ def run_pipeline(datasets_dir: str, save_dir: str, target_columns: list[str], ta
674
659
  #Check paths
675
660
  _check_paths(datasets_dir, save_dir)
676
661
  #Yield imputed dataset
677
- for dataframe, dataframe_name in yield_imputed_dataframe(datasets_dir):
662
+ for dataframe, dataframe_name in yield_dataframes_from_dir(datasets_dir):
678
663
  #Yield features dataframe and target dataframe
679
664
  for df_features, df_target, feature_names, target_name in dataset_yielder(df=dataframe, target_cols=target_columns):
680
665
  #Dataset pipeline
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  from openpyxl.styles import Font, PatternFill
6
6
  import traceback
7
7
  import json
8
- from utilities import sanitize_filename
8
+ from ml_tools.utilities import sanitize_filename
9
9
 
10
10
 
11
11
  def custom_logger(
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "dragon-ml-toolbox"
3
- version = "1.1.6"
4
- description = "A collection of tools for machine learning projects"
3
+ version = "1.2.1"
4
+ description = "A collection of tools for data science and machine learning projects"
5
5
  authors = [
6
6
  { name = "Karl Loza", email = "luigiloza@gmail.com" }
7
7
  ]
@@ -99,7 +99,30 @@ vision_helpers = [
99
99
  "torchvision"
100
100
  ]
101
101
 
102
+ full = [
103
+ "pandas",
104
+ "numpy",
105
+ "matplotlib",
106
+ "seaborn",
107
+ "statsmodels",
108
+ "ipython",
109
+ "torch",
110
+ "scikit-learn",
111
+ "imblearn",
112
+ "Pillow",
113
+ "joblib",
114
+ "xgboost",
115
+ "lightgbm",
116
+ "shap",
117
+ "openpyxl",
118
+ "miceforest",
119
+ "polars",
120
+ "torchvision"
121
+ ]
122
+
102
123
  [build-system]
103
124
  requires = ["setuptools>=61.0"]
104
125
  build-backend = "setuptools.build_meta"
105
126
 
127
+ [tool.setuptools]
128
+ packages = ["ml_tools"]