skwrapper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skwrapper-0.1.0/LICENCE.txt +21 -0
- skwrapper-0.1.0/PKG-INFO +174 -0
- skwrapper-0.1.0/README.md +163 -0
- skwrapper-0.1.0/pyproject.toml +23 -0
- skwrapper-0.1.0/setup.cfg +4 -0
- skwrapper-0.1.0/src/skwrapper/Supervised/classificationModels.py +264 -0
- skwrapper-0.1.0/src/skwrapper/Supervised/regressionModels.py +363 -0
- skwrapper-0.1.0/src/skwrapper/__init__.py +4 -0
- skwrapper-0.1.0/src/skwrapper/core.py +236 -0
- skwrapper-0.1.0/src/skwrapper.egg-info/PKG-INFO +174 -0
- skwrapper-0.1.0/src/skwrapper.egg-info/SOURCES.txt +13 -0
- skwrapper-0.1.0/src/skwrapper.egg-info/dependency_links.txt +1 -0
- skwrapper-0.1.0/src/skwrapper.egg-info/requires.txt +1 -0
- skwrapper-0.1.0/src/skwrapper.egg-info/top_level.txt +2 -0
- skwrapper-0.1.0/src/tests/test_basic.py +73 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Anuj Rajesh Tiwari
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
skwrapper-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skwrapper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: This package helps the Data Scientist to train there model on dataset with different models without copy pasting the code again and again, this package is a Sklearn wrapper which does performs the model training. this saves time of developer and it also helps with detail metrics and for quick scan which model best fits for dataset
|
|
5
|
+
Author: Anuj Rajesh Tiwari
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENCE.txt
|
|
9
|
+
Requires-Dist: scikit-learn>=1.3
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# **skwrapper**
|
|
13
|
+
This package helps the Data Scientist to train there model on dataset with different models without copy pasting the code again and again, this package is a Sklearn wrapper which does performs the model training. this saves time of developer and it also helps with detail metrics and for quick scan which model best fits for dataset
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
- Supports regression and classifications models
|
|
17
|
+
- Computes common regression and classificatons metrics.
|
|
18
|
+
- Optional display of predicted values.
|
|
19
|
+
- Easy-to-use unified interface for training and evaluation.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
```bash```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## **Usage Example**
|
|
29
|
+
```python
|
|
30
|
+
## Import class from Library
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import matplotlib.pyplot as plt
|
|
33
|
+
import seaborn as sns
|
|
34
|
+
from skwrapper import sc, sr
|
|
35
|
+
|
|
36
|
+
df = pd.read_csv("Social_Network_Ads.csv")
|
|
37
|
+
|
|
38
|
+
selected_row = df.loc[:, 'Age': 'Purchased']
|
|
39
|
+
|
|
40
|
+
from sklearn.model_selection import train_test_split
|
|
41
|
+
from sklearn.preprocessing import StandardScaler
|
|
42
|
+
# first we have to define X and y where X is the variable or feature input and y is the output target basically
|
|
43
|
+
x = selected_row[['Age', 'EstimatedSalary']]
|
|
44
|
+
y = selected_row['Purchased']
|
|
45
|
+
|
|
46
|
+
## Split the Data
|
|
47
|
+
X_train, x_test, Y_train, y_test = train_test_split( x, y, train_size=0.8, random_state=48 )
|
|
48
|
+
|
|
49
|
+
X_train.shape, x_test.shape
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# doing standardization
|
|
53
|
+
scaler = StandardScaler()
|
|
54
|
+
|
|
55
|
+
#fit the scaler to the train set, it will learn the parameter
|
|
56
|
+
scaler.fit(X_train) ## learn mean and std from the train dataset
|
|
57
|
+
X_train_scaled = scaler.transform(X_train) ## Apply sacling
|
|
58
|
+
X_test_scaler = scaler.transform(x_test) ## Apply same scaling on X_test as well
|
|
59
|
+
|
|
60
|
+
#convert the numpy 2D arry to pd dataframes with column names on it as numpy array dont have column name after scaling
|
|
61
|
+
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
|
|
62
|
+
X_test_scaler_df = pd.DataFrame(X_test_scaler, columns=x_test.columns)
|
|
63
|
+
|
|
64
|
+
print(X_train_scaled_df.describe())
|
|
65
|
+
print(X_train.describe())
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Initialize class
|
|
69
|
+
sc = sc() ## Supervised Classifications class
|
|
70
|
+
sr = sr() ## Supervised Regression class
|
|
71
|
+
|
|
72
|
+
# Train and evaluate Models
|
|
73
|
+
|
|
|
74
|
+
## Single Model Execution for sr(Supervised Classification Models)
|
|
75
|
+
sc.perform(
|
|
76
|
+
case=["logistic"],
|
|
77
|
+
xy_train=[X_train_scaled_df, y_train],
|
|
78
|
+
xy_test=[X_test_scaler_df, y_test],
|
|
79
|
+
|
|
80
|
+
## Optional Parameter
|
|
81
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
## Multiple Model Execution for sr(Supervised Regression Models)
|
|
85
|
+
result = sc.perform(
|
|
86
|
+
case=[" logistic", "svc", "knc"],
|
|
87
|
+
xy_train=[X_train_scaled_df, y_train],
|
|
88
|
+
xy_test=[X_test_scaler_df, y_test],
|
|
89
|
+
|
|
90
|
+
## Optional Parameter
|
|
91
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
#----------------and for Supervised Regression Models-------------------#
|
|
95
|
+
## Single Model Execution:
|
|
96
|
+
sr.perform(
|
|
97
|
+
case=["linearR"],
|
|
98
|
+
xy_train=[X_train, y_train],
|
|
99
|
+
xy_test=[X_test, y_test],
|
|
100
|
+
|
|
101
|
+
#Optional Parameter
|
|
102
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
## Multiple Model Execution:
|
|
106
|
+
sr.perform(
|
|
107
|
+
case=["linearR", "svr", "knr"],
|
|
108
|
+
xy_train=[X_train, y_train],
|
|
109
|
+
xy_test=[X_test, y_test]
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
print(result) ## This will print all the metrics for all defiend models in **case**
|
|
113
|
+
|
|
114
|
+
# Access specific model metrics or predection value
|
|
115
|
+
print("MSE for Linear Regression:", results["linearR"]["mse"])
|
|
116
|
+
print("predicted_value for SVR:", results["svr"]["predicted_value"])
|
|
117
|
+
|
|
118
|
+
## You Can Plot the predicted Values
|
|
119
|
+
sns.scatterplot(result['svr']['predicted_value'])
|
|
120
|
+
plt.show()
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## **Supported Models**
|
|
126
|
+
- **Wrapper class for multiple sklearn regression models.**:
|
|
127
|
+
|
|
128
|
+
| Model | Description |
|
|
129
|
+
|-------|-------------|
|
|
130
|
+
| linearR | Linear Regression |
|
|
131
|
+
| ridge | Ridge Regression |
|
|
132
|
+
| lasso | Lasso Regression |
|
|
133
|
+
| svr | Support Vector Regression |
|
|
134
|
+
| knr | K-Nearest Neighbors Regressor |
|
|
135
|
+
| gbr | Gradient Boosting Regressor |
|
|
136
|
+
| rfr | Random Forest Regressor |
|
|
137
|
+
| dtr | Decision Tree Regressor |
|
|
138
|
+
|
|
139
|
+
## **Metrics**
|
|
140
|
+
- **metrics computed automatically**:
|
|
141
|
+
|
|
142
|
+
```markdown
|
|
143
|
+
- Mean Squared Error (MSE)
|
|
144
|
+
- Mean Absolute Error (MAE)
|
|
145
|
+
- Root Mean Squared Error (RMSE)
|
|
146
|
+
- R² Score
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
- **Wrapper class for multiple sklearn Classifications models.**:
|
|
152
|
+
|
|
153
|
+
| Model | Description |
|
|
154
|
+
|-------|-------------|
|
|
155
|
+
| logistic | Logistic Regression Classifier |
|
|
156
|
+
| svc | Support Vector Classifier (SVC) |
|
|
157
|
+
| rfc | Random Forest Classifier |
|
|
158
|
+
| gbc | Gradient Boosting Classifier |
|
|
159
|
+
| knc | K-Nearest Neighbors Classifier |
|
|
160
|
+
| dtc | Decision Tree Classifier |
|
|
161
|
+
|
|
162
|
+
## **Metrics**
|
|
163
|
+
- **metrics computed automatically**:
|
|
164
|
+
|
|
165
|
+
```markdown
|
|
166
|
+
- accuracy
|
|
167
|
+
- confusion_matrix
|
|
168
|
+
- classification_report
|
|
169
|
+
```
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# **skwrapper**
|
|
2
|
+
This package helps the Data Scientist to train there model on dataset with different models without copy pasting the code again and again, this package is a Sklearn wrapper which does performs the model training. this saves time of developer and it also helps with detail metrics and for quick scan which model best fits for dataset
|
|
3
|
+
|
|
4
|
+
## Features
|
|
5
|
+
- Supports regression and classifications models
|
|
6
|
+
- Computes common regression and classificatons metrics.
|
|
7
|
+
- Optional display of predicted values.
|
|
8
|
+
- Easy-to-use unified interface for training and evaluation.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
```bash```
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## **Usage Example**
|
|
18
|
+
```python
|
|
19
|
+
## Import class from Library
|
|
20
|
+
import pandas as pd
|
|
21
|
+
import matplotlib.pyplot as plt
|
|
22
|
+
import seaborn as sns
|
|
23
|
+
from skwrapper import sc, sr
|
|
24
|
+
|
|
25
|
+
df = pd.read_csv("Social_Network_Ads.csv")
|
|
26
|
+
|
|
27
|
+
selected_row = df.loc[:, 'Age': 'Purchased']
|
|
28
|
+
|
|
29
|
+
from sklearn.model_selection import train_test_split
|
|
30
|
+
from sklearn.preprocessing import StandardScaler
|
|
31
|
+
# first we have to define X and y where X is the variable or feature input and y is the output target basically
|
|
32
|
+
x = selected_row[['Age', 'EstimatedSalary']]
|
|
33
|
+
y = selected_row['Purchased']
|
|
34
|
+
|
|
35
|
+
## Split the Data
|
|
36
|
+
X_train, x_test, Y_train, y_test = train_test_split( x, y, train_size=0.8, random_state=48 )
|
|
37
|
+
|
|
38
|
+
X_train.shape, x_test.shape
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# doing standardization
|
|
42
|
+
scaler = StandardScaler()
|
|
43
|
+
|
|
44
|
+
#fit the scaler to the train set, it will learn the parameter
|
|
45
|
+
scaler.fit(X_train) ## learn mean and std from the train dataset
|
|
46
|
+
X_train_scaled = scaler.transform(X_train) ## Apply sacling
|
|
47
|
+
X_test_scaler = scaler.transform(x_test) ## Apply same scaling on X_test as well
|
|
48
|
+
|
|
49
|
+
#convert the numpy 2D arry to pd dataframes with column names on it as numpy array dont have column name after scaling
|
|
50
|
+
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
|
|
51
|
+
X_test_scaler_df = pd.DataFrame(X_test_scaler, columns=x_test.columns)
|
|
52
|
+
|
|
53
|
+
print(X_train_scaled_df.describe())
|
|
54
|
+
print(X_train.describe())
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Initialize class
|
|
58
|
+
sc = sc() ## Supervised Classifications class
|
|
59
|
+
sr = sr() ## Supervised Regression class
|
|
60
|
+
|
|
61
|
+
# Train and evaluate Models
|
|
62
|
+
|
|
|
63
|
+
## Single Model Execution for sr(Supervised Classification Models)
|
|
64
|
+
sc.perform(
|
|
65
|
+
case=["logistic"],
|
|
66
|
+
xy_train=[X_train_scaled_df, y_train],
|
|
67
|
+
xy_test=[X_test_scaler_df, y_test],
|
|
68
|
+
|
|
69
|
+
## Optional Parameter
|
|
70
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
## Multiple Model Execution for sr(Supervised Regression Models)
|
|
74
|
+
result = sc.perform(
|
|
75
|
+
case=[" logistic", "svc", "knc"],
|
|
76
|
+
xy_train=[X_train_scaled_df, y_train],
|
|
77
|
+
xy_test=[X_test_scaler_df, y_test],
|
|
78
|
+
|
|
79
|
+
## Optional Parameter
|
|
80
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
#----------------and for Supervised Regression Models-------------------#
|
|
84
|
+
## Single Model Execution:
|
|
85
|
+
sr.perform(
|
|
86
|
+
case=["linearR"],
|
|
87
|
+
xy_train=[X_train, y_train],
|
|
88
|
+
xy_test=[X_test, y_test],
|
|
89
|
+
|
|
90
|
+
#Optional Parameter
|
|
91
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
## Multiple Model Execution:
|
|
95
|
+
sr.perform(
|
|
96
|
+
case=["linearR", "svr", "knr"],
|
|
97
|
+
xy_train=[X_train, y_train],
|
|
98
|
+
xy_test=[X_test, y_test]
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
print(result) ## This will print all the metrics for all defiend models in **case**
|
|
102
|
+
|
|
103
|
+
# Access specific model metrics or predection value
|
|
104
|
+
print("MSE for Linear Regression:", results["linearR"]["mse"])
|
|
105
|
+
print("predicted_value for SVR:", results["svr"]["predicted_value"])
|
|
106
|
+
|
|
107
|
+
## You Can Plot the predicted Values
|
|
108
|
+
sns.scatterplot(result['svr']['predicted_value'])
|
|
109
|
+
plt.show()
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## **Supported Models**
|
|
115
|
+
- **Wrapper class for multiple sklearn regression models.**:
|
|
116
|
+
|
|
117
|
+
| Model | Description |
|
|
118
|
+
|-------|-------------|
|
|
119
|
+
| linearR | Linear Regression |
|
|
120
|
+
| ridge | Ridge Regression |
|
|
121
|
+
| lasso | Lasso Regression |
|
|
122
|
+
| svr | Support Vector Regression |
|
|
123
|
+
| knr | K-Nearest Neighbors Regressor |
|
|
124
|
+
| gbr | Gradient Boosting Regressor |
|
|
125
|
+
| rfr | Random Forest Regressor |
|
|
126
|
+
| dtr | Decision Tree Regressor |
|
|
127
|
+
|
|
128
|
+
## **Metrics**
|
|
129
|
+
- **metrics computed automatically**:
|
|
130
|
+
|
|
131
|
+
```markdown
|
|
132
|
+
- Mean Squared Error (MSE)
|
|
133
|
+
- Mean Absolute Error (MAE)
|
|
134
|
+
- Root Mean Squared Error (RMSE)
|
|
135
|
+
- R² Score
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
- **Wrapper class for multiple sklearn Classifications models.**:
|
|
141
|
+
|
|
142
|
+
| Model | Description |
|
|
143
|
+
|-------|-------------|
|
|
144
|
+
| logistic | Logistic Regression Classifier |
|
|
145
|
+
| svc | Support Vector Classifier (SVC) |
|
|
146
|
+
| rfc | Random Forest Classifier |
|
|
147
|
+
| gbc | Gradient Boosting Classifier |
|
|
148
|
+
| knc | K-Nearest Neighbors Classifier |
|
|
149
|
+
| dtc | Decision Tree Classifier |
|
|
150
|
+
|
|
151
|
+
## **Metrics**
|
|
152
|
+
- **metrics computed automatically**:
|
|
153
|
+
|
|
154
|
+
```markdown
|
|
155
|
+
- accuracy
|
|
156
|
+
- confusion_matrix
|
|
157
|
+
- classification_report
|
|
158
|
+
```
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "skwrapper"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "This package helps the Data Scientist to train there model on dataset with different models without copy pasting the code again and again, this package is a Sklearn wrapper which does performs the model training. this saves time of developer and it also helps with detail metrics and for quick scan which model best fits for dataset"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Anuj Rajesh Tiwari" }
|
|
14
|
+
]
|
|
15
|
+
dependencies = [
|
|
16
|
+
"scikit-learn>=1.3"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[tool.setuptools]
|
|
20
|
+
package-dir = {"" = "src"}
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["src"]
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from sklearn.linear_model import LogisticRegression
|
|
2
|
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
|
3
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
4
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
5
|
+
from sklearn.svm import SVC
|
|
6
|
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
|
7
|
+
|
|
8
|
+
def logistic(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
9
|
+
if xy_test is None or xy_train is None:
|
|
10
|
+
raise ValueError("xy_train and xy_test is required to perform Logistic Regression")
|
|
11
|
+
x_train, y_train = xy_train
|
|
12
|
+
x_test, y_test = xy_test
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#Model Parameters
|
|
16
|
+
C: float = kwargs.get("C", 1.0)
|
|
17
|
+
solver: str = kwargs.get("solver", "lbfgs")
|
|
18
|
+
max_iter: int = kwargs.get("max_iter", 1000)
|
|
19
|
+
fit_intercept: bool = kwargs.get("fit_intercept", True)
|
|
20
|
+
class_weight = kwargs.get("class_weight", None)
|
|
21
|
+
random_state = kwargs.get("random_state", 42)
|
|
22
|
+
print("<------------------------Running LogisticRegression---------------------------------------->")
|
|
23
|
+
|
|
24
|
+
model = LogisticRegression( C=C, solver=solver, max_iter=max_iter, fit_intercept=fit_intercept,
|
|
25
|
+
class_weight=class_weight,
|
|
26
|
+
random_state=random_state )
|
|
27
|
+
model.fit(x_train, y_train)
|
|
28
|
+
y_pred = model.predict(x_test)
|
|
29
|
+
|
|
30
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
31
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
32
|
+
|
|
33
|
+
metrics = {
|
|
34
|
+
"predicted_value": y_pred,
|
|
35
|
+
"accuracy": accuracy,
|
|
36
|
+
"confusion_matrix": cm
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for key, v in metrics.items():
|
|
40
|
+
if key == "predicted_value" and show_pred != True:
|
|
41
|
+
continue
|
|
42
|
+
if( key == "predicted_value" ):
|
|
43
|
+
print(f"{key}:\n {v}")
|
|
44
|
+
print("-----------------metrics-----------------")
|
|
45
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
46
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
47
|
+
print(classification_report(y_test, y_pred))
|
|
48
|
+
|
|
49
|
+
return metrics
|
|
50
|
+
|
|
51
|
+
###################################################################################################################################
|
|
52
|
+
|
|
53
|
+
def randomForestClassifer(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
54
|
+
if xy_test is None or xy_train is None:
|
|
55
|
+
raise ValueError("xy_train and xy_test is required to perform RandomForestClassifer")
|
|
56
|
+
x_train, y_train = xy_train
|
|
57
|
+
x_test, y_test = xy_test
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
##Modal Parameters
|
|
61
|
+
n_estimators: int = kwargs.get("n_estimators", 100)
|
|
62
|
+
max_depth: int = kwargs.get("max_depth", None)
|
|
63
|
+
criterion: str = kwargs.get("criterion", "gini")
|
|
64
|
+
class_weight = kwargs.get("class_weight", None)
|
|
65
|
+
random_state = kwargs.get("random_state", 42)
|
|
66
|
+
n_jobs: int = kwargs.get("n_jobs", -1)
|
|
67
|
+
|
|
68
|
+
print("<-------------------------Running RandomForestClassifer------------------------------------->")
|
|
69
|
+
|
|
70
|
+
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion,
|
|
71
|
+
class_weight=class_weight, random_state=random_state, n_jobs=n_jobs)
|
|
72
|
+
model.fit(x_train, y_train)
|
|
73
|
+
y_pred = model.predict(x_test)
|
|
74
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
75
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
76
|
+
|
|
77
|
+
metrics = {
|
|
78
|
+
"predicted_value": y_pred,
|
|
79
|
+
"accuracy": accuracy,
|
|
80
|
+
"confusion_matrix": cm
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for key, v in metrics.items():
|
|
84
|
+
if key == "predicted_value" and show_pred != True:
|
|
85
|
+
print(f"{key}:\n {v}")
|
|
86
|
+
print("-----------------metrics-----------------")
|
|
87
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
88
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
89
|
+
print(classification_report(y_test, y_pred))
|
|
90
|
+
|
|
91
|
+
return metrics
|
|
92
|
+
|
|
93
|
+
######################################################################################################################################
|
|
94
|
+
|
|
95
|
+
def svc(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
96
|
+
if xy_test is None or xy_train is None:
|
|
97
|
+
raise ValueError("xy_train and xy_test is required to perform SupportVectorClassifer")
|
|
98
|
+
x_train, y_train = xy_train
|
|
99
|
+
x_test, y_test = xy_test
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
#Model Parameters
|
|
103
|
+
kernel: str = kwargs.get("kernel", 'linear')
|
|
104
|
+
C: int = kwargs.get("C", 1.0)
|
|
105
|
+
random_state:int = kwargs.get("random_state", 0)
|
|
106
|
+
probability: bool = kwargs.get("probability", False)
|
|
107
|
+
|
|
108
|
+
print("<-------------------------Running SupportVectorClassifer------------------------------------->")
|
|
109
|
+
|
|
110
|
+
model = SVC( kernel=kernel, C=C, random_state=random_state, probability=probability )
|
|
111
|
+
model.fit(x_train, y_train)
|
|
112
|
+
y_pred = model.predict(x_test)
|
|
113
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
114
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
115
|
+
|
|
116
|
+
metrics = {
|
|
117
|
+
"predicted_value": y_pred,
|
|
118
|
+
"accuracy": accuracy,
|
|
119
|
+
"confusion_matrix": cm
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
for key, v in metrics.items():
|
|
123
|
+
if key == "predicted_value" and show_pred != True:
|
|
124
|
+
print(f"{key}:\n {v}")
|
|
125
|
+
print("-----------------metrics-----------------")
|
|
126
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
127
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
128
|
+
print(classification_report(y_test, y_pred))
|
|
129
|
+
|
|
130
|
+
return metrics
|
|
131
|
+
|
|
132
|
+
######################################################################################################################################
|
|
133
|
+
|
|
134
|
+
def GBC(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
135
|
+
if xy_test is None or xy_train is None:
|
|
136
|
+
raise ValueError("xy_train and xy_test is required to perform GradientBoostingClassifier")
|
|
137
|
+
x_train, y_train = xy_train
|
|
138
|
+
x_test, y_test = xy_test
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
#Model Parameters
|
|
142
|
+
n_estimators: int = kwargs.get("n_estimators", 100)
|
|
143
|
+
learning_rate: float = kwargs.get("learning_rate", 0.1)
|
|
144
|
+
max_depth:int = kwargs.get("max_depth", 3)
|
|
145
|
+
subsample: float = kwargs.get("subsample", 1.0)
|
|
146
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
147
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
148
|
+
random_state: int = kwargs.get("random_state", None)
|
|
149
|
+
|
|
150
|
+
print("<-------------------------Running GradientBoostingClassifier------------------------------------->")
|
|
151
|
+
|
|
152
|
+
model = GradientBoostingClassifier(n_estimators= n_estimators, learning_rate= learning_rate,
|
|
153
|
+
max_depth= max_depth, subsample= subsample,
|
|
154
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf, random_state= random_state )
|
|
155
|
+
model.fit(x_train, y_train)
|
|
156
|
+
y_pred = model.predict(x_test)
|
|
157
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
158
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
159
|
+
|
|
160
|
+
metrics = {
|
|
161
|
+
"predicted_value": y_pred,
|
|
162
|
+
"accuracy": accuracy,
|
|
163
|
+
"confusion_matrix": cm
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
for key, v in metrics.items():
|
|
167
|
+
if key == "predicted_value" and show_pred != True:
|
|
168
|
+
print(f"{key}:\n {v}")
|
|
169
|
+
print("-----------------metrics-----------------")
|
|
170
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
171
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
172
|
+
print(classification_report(y_test, y_pred))
|
|
173
|
+
|
|
174
|
+
return metrics
|
|
175
|
+
|
|
176
|
+
######################################################################################################################################
|
|
177
|
+
|
|
178
|
+
def KNC(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
179
|
+
if xy_test is None or xy_train is None:
|
|
180
|
+
raise ValueError("xy_train and xy_test is required to perform KNeighborsClassifier")
|
|
181
|
+
x_train, y_train = xy_train
|
|
182
|
+
x_test, y_test = xy_test
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
#Model Parameters
|
|
186
|
+
n_neighbors: int = kwargs.get("n_neighbors", 5)
|
|
187
|
+
weights : str = kwargs.get("weights", 'uniform')
|
|
188
|
+
algorithm :int = kwargs.get("algorithm", 'auto')
|
|
189
|
+
leaf_size: int = kwargs.get("leaf_size", 30)
|
|
190
|
+
p: int = kwargs.get("p", 2)
|
|
191
|
+
metric = kwargs.get("metric", 'minkowski')
|
|
192
|
+
n_jobs: int = kwargs.get("n_jobs", None)
|
|
193
|
+
|
|
194
|
+
print("<-------------------------Running KNeighborsClassifier------------------------------------->")
|
|
195
|
+
|
|
196
|
+
model = KNeighborsClassifier( n_neighbors=n_neighbors, weights=weights,
|
|
197
|
+
algorithm=algorithm, leaf_size=leaf_size, p=p,
|
|
198
|
+
metric=metric, n_jobs=n_jobs )
|
|
199
|
+
model.fit(x_train, y_train)
|
|
200
|
+
y_pred = model.predict(x_test)
|
|
201
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
202
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
203
|
+
|
|
204
|
+
metrics = {
|
|
205
|
+
"predicted_value": y_pred,
|
|
206
|
+
"accuracy": accuracy,
|
|
207
|
+
"confusion_matrix": cm
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
for key, v in metrics.items():
|
|
211
|
+
if key == "predicted_value" and show_pred != True:
|
|
212
|
+
print(f"{key}:\n {v}")
|
|
213
|
+
print("-----------------metrics-----------------")
|
|
214
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
215
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
216
|
+
print(classification_report(y_test, y_pred))
|
|
217
|
+
|
|
218
|
+
return metrics
|
|
219
|
+
|
|
220
|
+
######################################################################################################################################
|
|
221
|
+
|
|
222
|
+
def DTC(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
223
|
+
if xy_test is None or xy_train is None:
|
|
224
|
+
raise ValueError("xy_train and xy_test is required to perform KNeighborsClassifier")
|
|
225
|
+
x_train, y_train = xy_train
|
|
226
|
+
x_test, y_test = xy_test
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
#Model Parameters
|
|
230
|
+
criterion:str = kwargs.get("criterion", "gini")
|
|
231
|
+
splitter: str = kwargs.get("splitter", "best")
|
|
232
|
+
max_depth = kwargs.get("max_depth", None)
|
|
233
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
234
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
235
|
+
max_features = kwargs.get("max_features", None)
|
|
236
|
+
random_state: int = kwargs.get("random_state", None)
|
|
237
|
+
|
|
238
|
+
print("<-------------------------Running DecisionTreeClassifier------------------------------------->")
|
|
239
|
+
|
|
240
|
+
model = DecisionTreeClassifier(criterion=criterion, splitter=splitter, max_depth=max_depth,
|
|
241
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf, max_features= max_features,
|
|
242
|
+
random_state=random_state )
|
|
243
|
+
model.fit(x_train, y_train)
|
|
244
|
+
y_pred = model.predict(x_test)
|
|
245
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
246
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
247
|
+
|
|
248
|
+
metrics = {
|
|
249
|
+
"predicted_value": y_pred,
|
|
250
|
+
"accuracy": accuracy,
|
|
251
|
+
"confusion_matrix": cm
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
for key, v in metrics.items():
|
|
255
|
+
if key == "predicted_value" and show_pred != True:
|
|
256
|
+
print(f"{key}:\n {v}")
|
|
257
|
+
print("-----------------metrics-----------------")
|
|
258
|
+
print(f"accuracy score for DecisionTreeClassifier: {accuracy :.4f}")
|
|
259
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
260
|
+
print(classification_report(y_test, y_pred))
|
|
261
|
+
|
|
262
|
+
return metrics
|
|
263
|
+
|
|
264
|
+
|