skwrapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skwrapper/Supervised/classificationModels.py +264 -0
- skwrapper/Supervised/regressionModels.py +363 -0
- skwrapper/__init__.py +4 -0
- skwrapper/core.py +236 -0
- skwrapper-0.1.0.dist-info/METADATA +174 -0
- skwrapper-0.1.0.dist-info/RECORD +10 -0
- skwrapper-0.1.0.dist-info/WHEEL +5 -0
- skwrapper-0.1.0.dist-info/licenses/LICENCE.txt +21 -0
- skwrapper-0.1.0.dist-info/top_level.txt +2 -0
- tests/test_basic.py +73 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from sklearn.linear_model import LogisticRegression
|
|
2
|
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
|
3
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
4
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
5
|
+
from sklearn.svm import SVC
|
|
6
|
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
|
7
|
+
|
|
8
|
+
def logistic(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
9
|
+
if xy_test is None or xy_train is None:
|
|
10
|
+
raise ValueError("xy_train and xy_test is required to perform Logistic Regression")
|
|
11
|
+
x_train, y_train = xy_train
|
|
12
|
+
x_test, y_test = xy_test
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
#Model Parameters
|
|
16
|
+
C: float = kwargs.get("C", 1.0)
|
|
17
|
+
solver: str = kwargs.get("solver", "lbfgs")
|
|
18
|
+
max_iter: int = kwargs.get("max_iter", 1000)
|
|
19
|
+
fit_intercept: bool = kwargs.get("fit_intercept", True)
|
|
20
|
+
class_weight = kwargs.get("class_weight", None)
|
|
21
|
+
random_state = kwargs.get("random_state", 42)
|
|
22
|
+
print("<------------------------Running LogisticRegression---------------------------------------->")
|
|
23
|
+
|
|
24
|
+
model = LogisticRegression( C=C, solver=solver, max_iter=max_iter, fit_intercept=fit_intercept,
|
|
25
|
+
class_weight=class_weight,
|
|
26
|
+
random_state=random_state )
|
|
27
|
+
model.fit(x_train, y_train)
|
|
28
|
+
y_pred = model.predict(x_test)
|
|
29
|
+
|
|
30
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
31
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
32
|
+
|
|
33
|
+
metrics = {
|
|
34
|
+
"predicted_value": y_pred,
|
|
35
|
+
"accuracy": accuracy,
|
|
36
|
+
"confusion_matrix": cm
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for key, v in metrics.items():
|
|
40
|
+
if key == "predicted_value" and show_pred != True:
|
|
41
|
+
continue
|
|
42
|
+
if( key == "predicted_value" ):
|
|
43
|
+
print(f"{key}:\n {v}")
|
|
44
|
+
print("-----------------metrics-----------------")
|
|
45
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
46
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
47
|
+
print(classification_report(y_test, y_pred))
|
|
48
|
+
|
|
49
|
+
return metrics
|
|
50
|
+
|
|
51
|
+
###################################################################################################################################
|
|
52
|
+
|
|
53
|
+
def randomForestClassifer(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
54
|
+
if xy_test is None or xy_train is None:
|
|
55
|
+
raise ValueError("xy_train and xy_test is required to perform RandomForestClassifer")
|
|
56
|
+
x_train, y_train = xy_train
|
|
57
|
+
x_test, y_test = xy_test
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
##Modal Parameters
|
|
61
|
+
n_estimators: int = kwargs.get("n_estimators", 100)
|
|
62
|
+
max_depth: int = kwargs.get("max_depth", None)
|
|
63
|
+
criterion: str = kwargs.get("criterion", "gini")
|
|
64
|
+
class_weight = kwargs.get("class_weight", None)
|
|
65
|
+
random_state = kwargs.get("random_state", 42)
|
|
66
|
+
n_jobs: int = kwargs.get("n_jobs", -1)
|
|
67
|
+
|
|
68
|
+
print("<-------------------------Running RandomForestClassifer------------------------------------->")
|
|
69
|
+
|
|
70
|
+
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion,
|
|
71
|
+
class_weight=class_weight, random_state=random_state, n_jobs=n_jobs)
|
|
72
|
+
model.fit(x_train, y_train)
|
|
73
|
+
y_pred = model.predict(x_test)
|
|
74
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
75
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
76
|
+
|
|
77
|
+
metrics = {
|
|
78
|
+
"predicted_value": y_pred,
|
|
79
|
+
"accuracy": accuracy,
|
|
80
|
+
"confusion_matrix": cm
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for key, v in metrics.items():
|
|
84
|
+
if key == "predicted_value" and show_pred != True:
|
|
85
|
+
print(f"{key}:\n {v}")
|
|
86
|
+
print("-----------------metrics-----------------")
|
|
87
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
88
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
89
|
+
print(classification_report(y_test, y_pred))
|
|
90
|
+
|
|
91
|
+
return metrics
|
|
92
|
+
|
|
93
|
+
######################################################################################################################################
|
|
94
|
+
|
|
95
|
+
def svc(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
96
|
+
if xy_test is None or xy_train is None:
|
|
97
|
+
raise ValueError("xy_train and xy_test is required to perform SupportVectorClassifer")
|
|
98
|
+
x_train, y_train = xy_train
|
|
99
|
+
x_test, y_test = xy_test
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
#Model Parameters
|
|
103
|
+
kernel: str = kwargs.get("kernel", 'linear')
|
|
104
|
+
C: int = kwargs.get("C", 1.0)
|
|
105
|
+
random_state:int = kwargs.get("random_state", 0)
|
|
106
|
+
probability: bool = kwargs.get("probability", False)
|
|
107
|
+
|
|
108
|
+
print("<-------------------------Running SupportVectorClassifer------------------------------------->")
|
|
109
|
+
|
|
110
|
+
model = SVC( kernel=kernel, C=C, random_state=random_state, probability=probability )
|
|
111
|
+
model.fit(x_train, y_train)
|
|
112
|
+
y_pred = model.predict(x_test)
|
|
113
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
114
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
115
|
+
|
|
116
|
+
metrics = {
|
|
117
|
+
"predicted_value": y_pred,
|
|
118
|
+
"accuracy": accuracy,
|
|
119
|
+
"confusion_matrix": cm
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
for key, v in metrics.items():
|
|
123
|
+
if key == "predicted_value" and show_pred != True:
|
|
124
|
+
print(f"{key}:\n {v}")
|
|
125
|
+
print("-----------------metrics-----------------")
|
|
126
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
127
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
128
|
+
print(classification_report(y_test, y_pred))
|
|
129
|
+
|
|
130
|
+
return metrics
|
|
131
|
+
|
|
132
|
+
######################################################################################################################################
|
|
133
|
+
|
|
134
|
+
def GBC(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
135
|
+
if xy_test is None or xy_train is None:
|
|
136
|
+
raise ValueError("xy_train and xy_test is required to perform GradientBoostingClassifier")
|
|
137
|
+
x_train, y_train = xy_train
|
|
138
|
+
x_test, y_test = xy_test
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
#Model Parameters
|
|
142
|
+
n_estimators: int = kwargs.get("n_estimators", 100)
|
|
143
|
+
learning_rate: float = kwargs.get("learning_rate", 0.1)
|
|
144
|
+
max_depth:int = kwargs.get("max_depth", 3)
|
|
145
|
+
subsample: float = kwargs.get("subsample", 1.0)
|
|
146
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
147
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
148
|
+
random_state: int = kwargs.get("random_state", None)
|
|
149
|
+
|
|
150
|
+
print("<-------------------------Running GradientBoostingClassifier------------------------------------->")
|
|
151
|
+
|
|
152
|
+
model = GradientBoostingClassifier(n_estimators= n_estimators, learning_rate= learning_rate,
|
|
153
|
+
max_depth= max_depth, subsample= subsample,
|
|
154
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf, random_state= random_state )
|
|
155
|
+
model.fit(x_train, y_train)
|
|
156
|
+
y_pred = model.predict(x_test)
|
|
157
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
158
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
159
|
+
|
|
160
|
+
metrics = {
|
|
161
|
+
"predicted_value": y_pred,
|
|
162
|
+
"accuracy": accuracy,
|
|
163
|
+
"confusion_matrix": cm
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
for key, v in metrics.items():
|
|
167
|
+
if key == "predicted_value" and show_pred != True:
|
|
168
|
+
print(f"{key}:\n {v}")
|
|
169
|
+
print("-----------------metrics-----------------")
|
|
170
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
171
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
172
|
+
print(classification_report(y_test, y_pred))
|
|
173
|
+
|
|
174
|
+
return metrics
|
|
175
|
+
|
|
176
|
+
######################################################################################################################################
|
|
177
|
+
|
|
178
|
+
def KNC(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
179
|
+
if xy_test is None or xy_train is None:
|
|
180
|
+
raise ValueError("xy_train and xy_test is required to perform KNeighborsClassifier")
|
|
181
|
+
x_train, y_train = xy_train
|
|
182
|
+
x_test, y_test = xy_test
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
#Model Parameters
|
|
186
|
+
n_neighbors: int = kwargs.get("n_neighbors", 5)
|
|
187
|
+
weights : str = kwargs.get("weights", 'uniform')
|
|
188
|
+
algorithm :int = kwargs.get("algorithm", 'auto')
|
|
189
|
+
leaf_size: int = kwargs.get("leaf_size", 30)
|
|
190
|
+
p: int = kwargs.get("p", 2)
|
|
191
|
+
metric = kwargs.get("metric", 'minkowski')
|
|
192
|
+
n_jobs: int = kwargs.get("n_jobs", None)
|
|
193
|
+
|
|
194
|
+
print("<-------------------------Running KNeighborsClassifier------------------------------------->")
|
|
195
|
+
|
|
196
|
+
model = KNeighborsClassifier( n_neighbors=n_neighbors, weights=weights,
|
|
197
|
+
algorithm=algorithm, leaf_size=leaf_size, p=p,
|
|
198
|
+
metric=metric, n_jobs=n_jobs )
|
|
199
|
+
model.fit(x_train, y_train)
|
|
200
|
+
y_pred = model.predict(x_test)
|
|
201
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
202
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
203
|
+
|
|
204
|
+
metrics = {
|
|
205
|
+
"predicted_value": y_pred,
|
|
206
|
+
"accuracy": accuracy,
|
|
207
|
+
"confusion_matrix": cm
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
for key, v in metrics.items():
|
|
211
|
+
if key == "predicted_value" and show_pred != True:
|
|
212
|
+
print(f"{key}:\n {v}")
|
|
213
|
+
print("-----------------metrics-----------------")
|
|
214
|
+
print(f"accuracy score for LogisticRegression: {accuracy :.4f}")
|
|
215
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
216
|
+
print(classification_report(y_test, y_pred))
|
|
217
|
+
|
|
218
|
+
return metrics
|
|
219
|
+
|
|
220
|
+
######################################################################################################################################
|
|
221
|
+
|
|
222
|
+
def DTC(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
223
|
+
if xy_test is None or xy_train is None:
|
|
224
|
+
raise ValueError("xy_train and xy_test is required to perform KNeighborsClassifier")
|
|
225
|
+
x_train, y_train = xy_train
|
|
226
|
+
x_test, y_test = xy_test
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
#Model Parameters
|
|
230
|
+
criterion:str = kwargs.get("criterion", "gini")
|
|
231
|
+
splitter: str = kwargs.get("splitter", "best")
|
|
232
|
+
max_depth = kwargs.get("max_depth", None)
|
|
233
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
234
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
235
|
+
max_features = kwargs.get("max_features", None)
|
|
236
|
+
random_state: int = kwargs.get("random_state", None)
|
|
237
|
+
|
|
238
|
+
print("<-------------------------Running DecisionTreeClassifier------------------------------------->")
|
|
239
|
+
|
|
240
|
+
model = DecisionTreeClassifier(criterion=criterion, splitter=splitter, max_depth=max_depth,
|
|
241
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf, max_features= max_features,
|
|
242
|
+
random_state=random_state )
|
|
243
|
+
model.fit(x_train, y_train)
|
|
244
|
+
y_pred = model.predict(x_test)
|
|
245
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
246
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
247
|
+
|
|
248
|
+
metrics = {
|
|
249
|
+
"predicted_value": y_pred,
|
|
250
|
+
"accuracy": accuracy,
|
|
251
|
+
"confusion_matrix": cm
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
for key, v in metrics.items():
|
|
255
|
+
if key == "predicted_value" and show_pred != True:
|
|
256
|
+
print(f"{key}:\n {v}")
|
|
257
|
+
print("-----------------metrics-----------------")
|
|
258
|
+
print(f"accuracy score for DecisionTreeClassifier: {accuracy :.4f}")
|
|
259
|
+
print(f"Confusion_matrix:\n {cm}")
|
|
260
|
+
print(classification_report(y_test, y_pred))
|
|
261
|
+
|
|
262
|
+
return metrics
|
|
263
|
+
|
|
264
|
+
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
from sklearn.linear_model import LinearRegression, Lasso, Ridge
|
|
2
|
+
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
|
3
|
+
from sklearn.neighbors import KNeighborsRegressor
|
|
4
|
+
from sklearn.tree import DecisionTreeRegressor
|
|
5
|
+
from sklearn.svm import SVR
|
|
6
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, root_mean_squared_error
|
|
7
|
+
|
|
8
|
+
def linerR(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
9
|
+
if xy_train is None or xy_test is None:
|
|
10
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
11
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
12
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
13
|
+
|
|
14
|
+
x_train, y_train = xy_train
|
|
15
|
+
x_test, y_test = xy_test
|
|
16
|
+
|
|
17
|
+
## Model Parameters
|
|
18
|
+
fit_intercept=kwargs.get("fit_intercept", True)
|
|
19
|
+
copy_X= kwargs.get("copy_x", True)
|
|
20
|
+
positive= kwargs.get("positive",False)
|
|
21
|
+
|
|
22
|
+
print("<------------------------Running LinearRegression---------------------------------------->")
|
|
23
|
+
|
|
24
|
+
model = LinearRegression(fit_intercept=fit_intercept, copy_X=copy_X, positive=positive)
|
|
25
|
+
model.fit(x_train, y_train)
|
|
26
|
+
|
|
27
|
+
pred = model.predict(x_test)
|
|
28
|
+
|
|
29
|
+
metrics = {
|
|
30
|
+
"predicted_value": pred,
|
|
31
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
32
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
33
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
34
|
+
"r2" : r2_score(y_test, pred),
|
|
35
|
+
}
|
|
36
|
+
for key, v in metrics.items():
|
|
37
|
+
|
|
38
|
+
if key == "predicted_value" and show_pred != True:
|
|
39
|
+
continue
|
|
40
|
+
if key == "predicted_value":
|
|
41
|
+
print(f"{key}:\n {v}")
|
|
42
|
+
print("-----------------metrics-----------------")
|
|
43
|
+
else:
|
|
44
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
45
|
+
|
|
46
|
+
return metrics
|
|
47
|
+
|
|
48
|
+
###################################################################################################################################
|
|
49
|
+
|
|
50
|
+
def RFR(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
51
|
+
if xy_train is None or xy_test is None:
|
|
52
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
53
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
54
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
55
|
+
|
|
56
|
+
x_train, y_train = xy_train
|
|
57
|
+
x_test, y_test = xy_test
|
|
58
|
+
|
|
59
|
+
## Model Parameters
|
|
60
|
+
n_estimators = kwargs.get("n_esti", 100)
|
|
61
|
+
max_depth = kwargs.get("max_d", None)
|
|
62
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
63
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
64
|
+
random_state = kwargs.get("random_state", None)
|
|
65
|
+
|
|
66
|
+
print("<------------------------Running RandomForestRegressor---------------------------------------->")
|
|
67
|
+
|
|
68
|
+
model = RandomForestRegressor(n_estimators= n_estimators, max_depth= max_depth,
|
|
69
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf,
|
|
70
|
+
random_state= random_state )
|
|
71
|
+
model.fit(x_train, y_train)
|
|
72
|
+
|
|
73
|
+
pred = model.predict(x_test)
|
|
74
|
+
|
|
75
|
+
metrics = {
|
|
76
|
+
"predicted_value": pred,
|
|
77
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
78
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
79
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
80
|
+
"r2" : r2_score(y_test, pred),
|
|
81
|
+
}
|
|
82
|
+
for key, v in metrics.items():
|
|
83
|
+
if key == "predicted_value" and show_pred != True:
|
|
84
|
+
continue
|
|
85
|
+
if key == "predicted_value":
|
|
86
|
+
print(f"{key}:\n {v}")
|
|
87
|
+
print("-----------------metrics-----------------")
|
|
88
|
+
else:
|
|
89
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
90
|
+
|
|
91
|
+
return metrics
|
|
92
|
+
|
|
93
|
+
###################################################################################################################################
|
|
94
|
+
|
|
95
|
+
def GBR(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
96
|
+
if xy_train is None or xy_test is None:
|
|
97
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
98
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
99
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
100
|
+
|
|
101
|
+
x_train, y_train = xy_train
|
|
102
|
+
x_test, y_test = xy_test
|
|
103
|
+
|
|
104
|
+
## Model Parameters
|
|
105
|
+
n_estimators: int = kwargs.get("n_esti", 100)
|
|
106
|
+
learning_rate: float = kwargs.get("l_r", 0.1)
|
|
107
|
+
max_depth:int = kwargs.get("max_d", 3)
|
|
108
|
+
subsample: float = kwargs.get("subsample", 1.0)
|
|
109
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
110
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
111
|
+
loss = kwargs.get("loss", "squared_error")
|
|
112
|
+
random_state: int = kwargs.get("random_state", None)
|
|
113
|
+
|
|
114
|
+
print("<------------------------Running GradientBoostingRegressor---------------------------------------->")
|
|
115
|
+
|
|
116
|
+
model = GradientBoostingRegressor(n_estimators= n_estimators, max_depth= max_depth,
|
|
117
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf,
|
|
118
|
+
random_state=random_state, subsample=subsample, learning_rate=learning_rate, loss=loss)
|
|
119
|
+
model.fit(x_train, y_train)
|
|
120
|
+
|
|
121
|
+
pred = model.predict(x_test)
|
|
122
|
+
|
|
123
|
+
metrics = {
|
|
124
|
+
"predicted_value": pred,
|
|
125
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
126
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
127
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
128
|
+
"r2" : r2_score(y_test, pred),
|
|
129
|
+
}
|
|
130
|
+
for key, v in metrics.items():
|
|
131
|
+
if key == "predicted_value" and show_pred != True:
|
|
132
|
+
continue
|
|
133
|
+
if key == "predicted_value":
|
|
134
|
+
print(f"{key}:\n {v}")
|
|
135
|
+
print("-----------------metrics-----------------")
|
|
136
|
+
else:
|
|
137
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
138
|
+
|
|
139
|
+
return metrics
|
|
140
|
+
|
|
141
|
+
###################################################################################################################################
|
|
142
|
+
|
|
143
|
+
def KNR(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
144
|
+
if xy_train is None or xy_test is None:
|
|
145
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
146
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
147
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
148
|
+
|
|
149
|
+
x_train, y_train = xy_train
|
|
150
|
+
x_test, y_test = xy_test
|
|
151
|
+
|
|
152
|
+
## Model Parameters
|
|
153
|
+
n_neighbors: int = kwargs.get("n_neighbors", 5)
|
|
154
|
+
weights : str = kwargs.get("weights", 'uniform')
|
|
155
|
+
algorithm :int = kwargs.get("algorithm", 'auto')
|
|
156
|
+
leaf_size: int = kwargs.get("leaf_size", 30)
|
|
157
|
+
p: int = kwargs.get("p", 2)
|
|
158
|
+
metric = kwargs.get("metric", 'minkowski')
|
|
159
|
+
n_jobs: int = kwargs.get("n_jobs", None)
|
|
160
|
+
|
|
161
|
+
print("<------------------------Running KNeighborsRegressor---------------------------------------->")
|
|
162
|
+
|
|
163
|
+
model = KNeighborsRegressor( n_neighbors=n_neighbors, weights=weights,
|
|
164
|
+
algorithm=algorithm, leaf_size=leaf_size, p= p,
|
|
165
|
+
metric=metric, n_jobs= n_jobs )
|
|
166
|
+
model.fit(x_train, y_train)
|
|
167
|
+
|
|
168
|
+
pred = model.predict(x_test)
|
|
169
|
+
|
|
170
|
+
metrics = {
|
|
171
|
+
"predicted_value": pred,
|
|
172
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
173
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
174
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
175
|
+
"r2" : r2_score(y_test, pred),
|
|
176
|
+
}
|
|
177
|
+
for key, v in metrics.items():
|
|
178
|
+
if key == "predicted_value" and show_pred != True:
|
|
179
|
+
continue
|
|
180
|
+
if key == "predicted_value":
|
|
181
|
+
print(f"{key}:\n {v}")
|
|
182
|
+
print("-----------------metrics-----------------")
|
|
183
|
+
else:
|
|
184
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
185
|
+
|
|
186
|
+
return metrics
|
|
187
|
+
|
|
188
|
+
###################################################################################################################################
|
|
189
|
+
|
|
190
|
+
def svr(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
191
|
+
if xy_train is None or xy_test is None:
|
|
192
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
193
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
194
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
195
|
+
|
|
196
|
+
x_train, y_train = xy_train
|
|
197
|
+
x_test, y_test = xy_test
|
|
198
|
+
|
|
199
|
+
## Model Parameters
|
|
200
|
+
kernel: str = kwargs.get("kernel", "rbf")
|
|
201
|
+
C: float = kwargs.get("C", 1.0)
|
|
202
|
+
epsilon: float = kwargs.get("epsilon", 0.1)
|
|
203
|
+
gamma = kwargs.get("gamma", "scale")
|
|
204
|
+
|
|
205
|
+
print("<------------------------Running SupportVectorRegression.---------------------------------------->")
|
|
206
|
+
|
|
207
|
+
model = SVR( kernel=kernel, C=C, epsilon=epsilon, gamma=gamma, )
|
|
208
|
+
model.fit(x_train, y_train)
|
|
209
|
+
|
|
210
|
+
pred = model.predict(x_test)
|
|
211
|
+
|
|
212
|
+
metrics = {
|
|
213
|
+
"predicted_value": pred,
|
|
214
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
215
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
216
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
217
|
+
"r2" : r2_score(y_test, pred),
|
|
218
|
+
}
|
|
219
|
+
for key, v in metrics.items():
|
|
220
|
+
if key == "predicted_value" and show_pred != True:
|
|
221
|
+
continue
|
|
222
|
+
if key == "predicted_value":
|
|
223
|
+
print(f"{key}:\n {v}")
|
|
224
|
+
print("-----------------metrics-----------------")
|
|
225
|
+
else:
|
|
226
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
227
|
+
|
|
228
|
+
return metrics
|
|
229
|
+
|
|
230
|
+
###################################################################################################################################
|
|
231
|
+
|
|
232
|
+
def lasso(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
233
|
+
if xy_train is None or xy_test is None:
|
|
234
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
235
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
236
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
237
|
+
|
|
238
|
+
x_train, y_train = xy_train
|
|
239
|
+
x_test, y_test = xy_test
|
|
240
|
+
|
|
241
|
+
## Model Parameters
|
|
242
|
+
alpha: float = kwargs.get("alpha", 1.0)
|
|
243
|
+
fit_intercept: bool = kwargs.get("fit_intercept", True)
|
|
244
|
+
max_iter: int = kwargs.get("max_iter", 1000)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
print("<------------------------Running Lasso Regression.---------------------------------------->")
|
|
248
|
+
|
|
249
|
+
model = Lasso( max_iter=max_iter, fit_intercept=fit_intercept, alpha=alpha )
|
|
250
|
+
model.fit(x_train, y_train)
|
|
251
|
+
|
|
252
|
+
pred = model.predict(x_test)
|
|
253
|
+
|
|
254
|
+
metrics = {
|
|
255
|
+
"predicted_value": pred,
|
|
256
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
257
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
258
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
259
|
+
"r2" : r2_score(y_test, pred),
|
|
260
|
+
}
|
|
261
|
+
for key, v in metrics.items():
|
|
262
|
+
if key == "predicted_value" and show_pred != True:
|
|
263
|
+
continue
|
|
264
|
+
if key == "predicted_value":
|
|
265
|
+
print(f"{key}:\n {v}")
|
|
266
|
+
print("-----------------metrics-----------------")
|
|
267
|
+
else:
|
|
268
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
269
|
+
|
|
270
|
+
return metrics
|
|
271
|
+
|
|
272
|
+
###################################################################################################################################
|
|
273
|
+
|
|
274
|
+
def ridge(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
275
|
+
if xy_train is None or xy_test is None:
|
|
276
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
277
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
278
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
279
|
+
|
|
280
|
+
x_train, y_train = xy_train
|
|
281
|
+
x_test, y_test = xy_test
|
|
282
|
+
|
|
283
|
+
## Model Parameters
|
|
284
|
+
alpha: float = kwargs.get("alpha", 1.0)
|
|
285
|
+
fit_intercept: bool = kwargs.get("fit_intercept", True)
|
|
286
|
+
solver: str = kwargs.get("solver", "auto")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
print("<------------------------Running Ridge Regression.---------------------------------------->")
|
|
290
|
+
|
|
291
|
+
model = Ridge( solver=solver, fit_intercept=fit_intercept, alpha=alpha )
|
|
292
|
+
model.fit(x_train, y_train)
|
|
293
|
+
|
|
294
|
+
pred = model.predict(x_test)
|
|
295
|
+
|
|
296
|
+
metrics = {
|
|
297
|
+
"predicted_value": pred,
|
|
298
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
299
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
300
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
301
|
+
"r2" : r2_score(y_test, pred),
|
|
302
|
+
}
|
|
303
|
+
for key, v in metrics.items():
|
|
304
|
+
if key == "predicted_value" and show_pred != True:
|
|
305
|
+
continue
|
|
306
|
+
if key == "predicted_value":
|
|
307
|
+
print(f"{key}:\n {v}")
|
|
308
|
+
print("-----------------metrics-----------------")
|
|
309
|
+
else:
|
|
310
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
311
|
+
|
|
312
|
+
return metrics
|
|
313
|
+
|
|
314
|
+
###################################################################################################################################
|
|
315
|
+
|
|
316
|
+
def DTR(xy_train, xy_test, show_pred:bool, **kwargs):
|
|
317
|
+
if xy_train is None or xy_test is None:
|
|
318
|
+
raise ValueError("xy_train & xy_test is needed")
|
|
319
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
320
|
+
raise ValueError("xy_train and xy_test require 2 items x_train, y_train, x_test, y_test")
|
|
321
|
+
|
|
322
|
+
x_train, y_train = xy_train
|
|
323
|
+
x_test, y_test = xy_test
|
|
324
|
+
|
|
325
|
+
## Model Parameters
|
|
326
|
+
criterion = kwargs.get("criterion", "squared_error")
|
|
327
|
+
splitter = kwargs.get("splitter", "best")
|
|
328
|
+
max_depth = kwargs.get("max_depth", None)
|
|
329
|
+
min_samples_split = kwargs.get("min_samples_split", 2)
|
|
330
|
+
min_samples_leaf = kwargs.get("min_samples_leaf", 1)
|
|
331
|
+
max_features = kwargs.get("max_features", None)
|
|
332
|
+
random_state = kwargs.get("random_state", None)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
print("<------------------------Running DecisionTreeRegressor.---------------------------------------->")
|
|
336
|
+
|
|
337
|
+
model = DecisionTreeRegressor( criterion=criterion, splitter=splitter, max_depth=max_depth,
|
|
338
|
+
min_samples_split= min_samples_split, min_samples_leaf= min_samples_leaf, max_features= max_features,
|
|
339
|
+
random_state=random_state )
|
|
340
|
+
model.fit(x_train, y_train)
|
|
341
|
+
|
|
342
|
+
pred = model.predict(x_test)
|
|
343
|
+
|
|
344
|
+
metrics = {
|
|
345
|
+
"predicted_value": pred,
|
|
346
|
+
"mse" : mean_squared_error(y_test, pred),
|
|
347
|
+
"mae" : mean_absolute_error(y_test, pred),
|
|
348
|
+
"rmse" : root_mean_squared_error(y_test, pred),
|
|
349
|
+
"r2" : r2_score(y_test, pred),
|
|
350
|
+
}
|
|
351
|
+
for key, v in metrics.items():
|
|
352
|
+
if key == "predicted_value" and show_pred != True:
|
|
353
|
+
continue
|
|
354
|
+
if key == "predicted_value":
|
|
355
|
+
print(f"{key}:\n {v}")
|
|
356
|
+
print("-----------------metrics-----------------")
|
|
357
|
+
else:
|
|
358
|
+
print(f"{key.upper()}: {v:.3f}")
|
|
359
|
+
|
|
360
|
+
return metrics
|
|
361
|
+
|
|
362
|
+
###################################################################################################################################
|
|
363
|
+
|
skwrapper/__init__.py
ADDED
skwrapper/core.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from .Supervised.classificationModels import logistic, svc, randomForestClassifer, GBC, KNC, DTC
|
|
2
|
+
from .Supervised.regressionModels import linerR, RFR, GBR, svr, KNR, lasso, ridge, DTR
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
class supervised_classification:
|
|
8
|
+
"""
|
|
9
|
+
Wrapper class for multiple sklearn classification models.
|
|
10
|
+
See module docstring for usage examples.
|
|
11
|
+
|
|
12
|
+
Supported Models
|
|
13
|
+
----------------
|
|
14
|
+
"logistic": Logistic Classification,
|
|
15
|
+
"svc": Support Vector Classification,
|
|
16
|
+
"rfc": Random-Forest Classifer,
|
|
17
|
+
"gbc": Gradient-Boosting Classifer,
|
|
18
|
+
"knc": K-Nearest Neighbors Classifer,
|
|
19
|
+
"dtc": Decision Tree Classifer
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.operations = {
|
|
24
|
+
"logistic": logistic,
|
|
25
|
+
"svc": svc,
|
|
26
|
+
"rfc": randomForestClassifer,
|
|
27
|
+
"gbc": GBC,
|
|
28
|
+
"knc": KNC,
|
|
29
|
+
"dtc": DTC
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def perform(self, case=None, xy_train = None, xy_test=None, show_pred:bool = False, **kwargs ):
|
|
33
|
+
"""
|
|
34
|
+
Classification Utilities
|
|
35
|
+
=========================
|
|
36
|
+
|
|
37
|
+
This module provides tools to execute and evaluate multiple classification models.
|
|
38
|
+
It supports models such as Logistic Regression, Support Vector Classifier (SVC),
|
|
39
|
+
Random Forest Classifier (RFC), K-Nearest Classifier (KNC), and others. The module
|
|
40
|
+
computes standard classification metrics and can optionally display predicted values.
|
|
41
|
+
|
|
42
|
+
To check supported models:
|
|
43
|
+
>>> help(sc)
|
|
44
|
+
|
|
45
|
+
Classes
|
|
46
|
+
-------
|
|
47
|
+
sc
|
|
48
|
+
Main class to run classification models. Use the `perform` method to execute selected models.
|
|
49
|
+
|
|
50
|
+
Methods
|
|
51
|
+
-------
|
|
52
|
+
sc.perform(case, xy_train, xy_test, show_pred=False, **kwargs)
|
|
53
|
+
Run selected classification models and return predictions along with evaluation metrics.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
case : list
|
|
58
|
+
List of model names to execute, e.g., ["logistic", "svc", "rfc"].
|
|
59
|
+
|
|
60
|
+
To check supported models:
|
|
61
|
+
>>> help(sc)
|
|
62
|
+
|
|
63
|
+
xy_train : list
|
|
64
|
+
Training dataset in the format [X_train, y_train].
|
|
65
|
+
|
|
66
|
+
xy_test : list
|
|
67
|
+
Testing dataset in the format [X_test, y_test].
|
|
68
|
+
|
|
69
|
+
show_pred : bool, default=False
|
|
70
|
+
If True, predicted values will be printed.
|
|
71
|
+
If False, only evaluation metrics will be displayed.
|
|
72
|
+
|
|
73
|
+
**kwargs : dict
|
|
74
|
+
Model-specific parameters passed to the underlying classification functions.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
dict
|
|
79
|
+
Dictionary containing results for each executed model:
|
|
80
|
+
- predicted_value
|
|
81
|
+
- accuracy
|
|
82
|
+
- confusion_matrix
|
|
83
|
+
|
|
84
|
+
Examples
|
|
85
|
+
--------
|
|
86
|
+
Single Model Execution:
|
|
87
|
+
>>> sc_instance.perform(
|
|
88
|
+
... case=["logistic"],
|
|
89
|
+
... xy_train=[X_train, y_train],
|
|
90
|
+
... xy_test=[X_test, y_test],
|
|
91
|
+
... show_pred=True
|
|
92
|
+
... )
|
|
93
|
+
|
|
94
|
+
Multiple Model Execution:
|
|
95
|
+
>>> sc_instance.perform(
|
|
96
|
+
... case=["logistic", "svc", "knc"],
|
|
97
|
+
... xy_train=[X_train, y_train],
|
|
98
|
+
... xy_test=[X_test, y_test]
|
|
99
|
+
... )
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
if case is None or xy_train is None or xy_test is None :
|
|
103
|
+
raise ValueError("case, xy_tarin and xy_test is required")
|
|
104
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
105
|
+
raise ValueError("xy_train and xy_test must contain exactly 2 items")
|
|
106
|
+
|
|
107
|
+
result = {}
|
|
108
|
+
for operation in case:
|
|
109
|
+
if operation in self.operations:
|
|
110
|
+
try:
|
|
111
|
+
result[operation] = self.operations[operation](xy_train, xy_test, show_pred, **kwargs)
|
|
112
|
+
except (ValueError, IndexError) as e:
|
|
113
|
+
logger.error("Operation '%s' failed: %s", operation, e)
|
|
114
|
+
else:
|
|
115
|
+
raise ValueError(f"Model '{operation }' is not supported.")
|
|
116
|
+
return result
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class supervised_regression:
|
|
120
|
+
"""
|
|
121
|
+
Wrapper class for multiple sklearn regression models.
|
|
122
|
+
See module docstring for usage examples.
|
|
123
|
+
|
|
124
|
+
Supported Models
|
|
125
|
+
----------------
|
|
126
|
+
linearR :Linear Regression
|
|
127
|
+
ridge : Ridge Regression
|
|
128
|
+
lasso : Lasso Regression
|
|
129
|
+
svr : Support Vector Regression
|
|
130
|
+
knr : K-Nearest Neighbors Regressor
|
|
131
|
+
gbr : Gradient-Boosting Regressor
|
|
132
|
+
rfr : Random-Forest Regressor
|
|
133
|
+
dtr : Decision Tree Regressor
|
|
134
|
+
"""
|
|
135
|
+
def __init__(self):
|
|
136
|
+
# Each key is a model name, and the value is the corresponding function.
|
|
137
|
+
self.operationss = {
|
|
138
|
+
"linearR": linerR,
|
|
139
|
+
"rfr": RFR,
|
|
140
|
+
"gbr": GBR,
|
|
141
|
+
"svr": svr,
|
|
142
|
+
"knr": KNR,
|
|
143
|
+
"lasso": lasso,
|
|
144
|
+
"ridge": ridge,
|
|
145
|
+
"dtr": DTR
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
def perform(self, case=None, xy_train=None, xy_test=None, show_pred:bool =False, **kwargs):
|
|
149
|
+
"""
|
|
150
|
+
Regression Utilities Module
|
|
151
|
+
===========================
|
|
152
|
+
|
|
153
|
+
This module provides tools to train and evaluate multiple regression models.
|
|
154
|
+
It supports linear regression, support vector regression, k-nearest neighbors regression,
|
|
155
|
+
and other models, with automatic computation of common regression metrics.
|
|
156
|
+
|
|
157
|
+
Classes
|
|
158
|
+
-------
|
|
159
|
+
sr
|
|
160
|
+
Main class to run regression models. Use the `perform` method to execute selected models.
|
|
161
|
+
|
|
162
|
+
Methods
|
|
163
|
+
-------
|
|
164
|
+
sr.perform(case, xy_train, xy_test, show_pred=False, **kwargs)
|
|
165
|
+
Runs the selected regression models and returns predictions
|
|
166
|
+
along with evaluation metrics.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
case : list
|
|
171
|
+
List of model names to execute.
|
|
172
|
+
Example: ["linearR", "svr"].
|
|
173
|
+
|
|
174
|
+
To check supported models:
|
|
175
|
+
>>> help(sr)
|
|
176
|
+
|
|
177
|
+
xy_train : list
|
|
178
|
+
Training dataset in the format [X_train, y_train].
|
|
179
|
+
|
|
180
|
+
xy_test : list
|
|
181
|
+
Testing dataset in the format [X_test, y_test].
|
|
182
|
+
|
|
183
|
+
show_pred : bool, default=False
|
|
184
|
+
If True, predicted values will be printed.
|
|
185
|
+
If False, only evaluation metrics will be printed.
|
|
186
|
+
|
|
187
|
+
**kwargs : dict
|
|
188
|
+
Model-specific parameters that will be passed to the
|
|
189
|
+
underlying regression model.
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
dict
|
|
194
|
+
Dictionary containing results for each executed model:
|
|
195
|
+
- predicted_value
|
|
196
|
+
- mse
|
|
197
|
+
- mae
|
|
198
|
+
- rmse
|
|
199
|
+
- r2
|
|
200
|
+
|
|
201
|
+
Examples
|
|
202
|
+
--------
|
|
203
|
+
Single Model Execution
|
|
204
|
+
>>> sr_instance.perform(
|
|
205
|
+
... case=["linearR"],
|
|
206
|
+
... xy_train=[X_train, y_train],
|
|
207
|
+
... xy_test=[X_test, y_test],
|
|
208
|
+
... show_pred=True
|
|
209
|
+
... )
|
|
210
|
+
|
|
211
|
+
Multiple Model Execution
|
|
212
|
+
>>> sr_instance.perform(
|
|
213
|
+
... case=["linearR", "svr", "knr"],
|
|
214
|
+
... xy_train=[X_train, y_train],
|
|
215
|
+
... xy_test=[X_test, y_test]
|
|
216
|
+
... )
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
if case is None or xy_train is None or xy_test is None :
|
|
220
|
+
raise ValueError("case, xy_train, and xy_test are required")
|
|
221
|
+
if len(xy_train) != 2 or len(xy_test) != 2:
|
|
222
|
+
raise ValueError("xy_train and xy_test must contain exactly 2 items")
|
|
223
|
+
result = {}
|
|
224
|
+
for operation in case:
|
|
225
|
+
if operation in self.operationss:
|
|
226
|
+
try:
|
|
227
|
+
result[operation] = self.operationss[operation](xy_train, xy_test, show_pred, **kwargs)
|
|
228
|
+
except (ValueError, IndexError) as e:
|
|
229
|
+
logger.error("Operation '%s' failed: %s", operation, e)
|
|
230
|
+
else:
|
|
231
|
+
raise ValueError(f"Model '{operation }' is not supported.")
|
|
232
|
+
return result
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skwrapper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: This package helps the Data Scientist to train there model on dataset with different models without copy pasting the code again and again, this package is a Sklearn wrapper which does performs the model training. this saves time of developer and it also helps with detail metrics and for quick scan which model best fits for dataset
|
|
5
|
+
Author: Anuj Rajesh Tiwari
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENCE.txt
|
|
9
|
+
Requires-Dist: scikit-learn>=1.3
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# **skwrapper**
|
|
13
|
+
This package helps the Data Scientist to train there model on dataset with different models without copy pasting the code again and again, this package is a Sklearn wrapper which does performs the model training. this saves time of developer and it also helps with detail metrics and for quick scan which model best fits for dataset
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
- Supports regression and classifications models
|
|
17
|
+
- Computes common regression and classificatons metrics.
|
|
18
|
+
- Optional display of predicted values.
|
|
19
|
+
- Easy-to-use unified interface for training and evaluation.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
```bash```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## **Usage Example**
|
|
29
|
+
```python
|
|
30
|
+
## Import class from Library
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import matplotlib.pyplot as plt
|
|
33
|
+
import seaborn as sns
|
|
34
|
+
from skwrapper import sc, sr
|
|
35
|
+
|
|
36
|
+
df = pd.read_csv("Social_Network_Ads.csv")
|
|
37
|
+
|
|
38
|
+
selected_row = df.loc[:, 'Age': 'Purchased']
|
|
39
|
+
|
|
40
|
+
from sklearn.model_selection import train_test_split
|
|
41
|
+
from sklearn.preprocessing import StandardScaler
|
|
42
|
+
# first we have to define X and y where X is the variable or feature input and y is the output target basically
|
|
43
|
+
x = selected_row[['Age', 'EstimatedSalary']]
|
|
44
|
+
y = selected_row['Purchased']
|
|
45
|
+
|
|
46
|
+
## Split the Data
|
|
47
|
+
X_train, x_test, Y_train, y_test = train_test_split( x, y, train_size=0.8, random_state=48 )
|
|
48
|
+
|
|
49
|
+
X_train.shape, x_test.shape
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# doing standardization
|
|
53
|
+
scaler = StandardScaler()
|
|
54
|
+
|
|
55
|
+
#fit the scaler to the train set, it will learn the parameter
|
|
56
|
+
scaler.fit(X_train) ## learn mean and std from the train dataset
|
|
57
|
+
X_train_scaled = scaler.transform(X_train) ## Apply sacling
|
|
58
|
+
X_test_scaler = scaler.transform(x_test) ## Apply same scaling on X_test as well
|
|
59
|
+
|
|
60
|
+
#convert the numpy 2D arry to pd dataframes with column names on it as numpy array dont have column name after scaling
|
|
61
|
+
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
|
|
62
|
+
X_test_scaler_df = pd.DataFrame(X_test_scaler, columns=x_test.columns)
|
|
63
|
+
|
|
64
|
+
print(X_train_scaled_df.describe())
|
|
65
|
+
print(X_train.describe())
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# Initialize class
|
|
69
|
+
sc = sc() ## Supervised Classifications class
|
|
70
|
+
sr = sr() ## Supervised Regression class
|
|
71
|
+
|
|
72
|
+
# Train and evaluate Models
|
|
73
|
+
|
|
|
74
|
+
## Single Model Execution for sr(Supervised Classification Models)
|
|
75
|
+
sc.perform(
|
|
76
|
+
case=["logistic"],
|
|
77
|
+
xy_train=[X_train_scaled_df, y_train],
|
|
78
|
+
xy_test=[X_test_scaler_df, y_test],
|
|
79
|
+
|
|
80
|
+
## Optional Parameter
|
|
81
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
## Multiple Model Execution for sr(Supervised Regression Models)
|
|
85
|
+
result = sc.perform(
|
|
86
|
+
case=[" logistic", "svc", "knc"],
|
|
87
|
+
xy_train=[X_train_scaled_df, y_train],
|
|
88
|
+
xy_test=[X_test_scaler_df, y_test],
|
|
89
|
+
|
|
90
|
+
## Optional Parameter
|
|
91
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
#----------------and for Supervised Regression Models-------------------#
|
|
95
|
+
## Single Model Execution:
|
|
96
|
+
sr.perform(
|
|
97
|
+
case=["linearR"],
|
|
98
|
+
xy_train=[X_train, y_train],
|
|
99
|
+
xy_test=[X_test, y_test],
|
|
100
|
+
|
|
101
|
+
#Optional Parameter
|
|
102
|
+
show_pred=True # If True, predicted_values will be printed. If False, only evaluation metrics will be displayed.
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
## Multiple Model Execution:
|
|
106
|
+
sr.perform(
|
|
107
|
+
case=["linearR", "svr", "knr"],
|
|
108
|
+
xy_train=[X_train, y_train],
|
|
109
|
+
xy_test=[X_test, y_test]
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
print(result) ## This will print all the metrics for all defiend models in **case**
|
|
113
|
+
|
|
114
|
+
# Access specific model metrics or predection value
|
|
115
|
+
print("MSE for Linear Regression:", results["linearR"]["mse"])
|
|
116
|
+
print("predicted_value for SVR:", results["svr"]["predicted_value"])
|
|
117
|
+
|
|
118
|
+
## You Can Plot the predicted Values
|
|
119
|
+
sns.scatterplot(result['svr']['predicted_value'])
|
|
120
|
+
plt.show()
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## **Supported Models**
|
|
126
|
+
- **Wrapper class for multiple sklearn regression models.**:
|
|
127
|
+
|
|
128
|
+
| Model | Description |
|
|
129
|
+
|-------|-------------|
|
|
130
|
+
| linearR | Linear Regression |
|
|
131
|
+
| ridge | Ridge Regression |
|
|
132
|
+
| lasso | Lasso Regression |
|
|
133
|
+
| svr | Support Vector Regression |
|
|
134
|
+
| knr | K-Nearest Neighbors Regressor |
|
|
135
|
+
| gbr | Gradient Boosting Regressor |
|
|
136
|
+
| rfr | Random Forest Regressor |
|
|
137
|
+
| dtr | Decision Tree Regressor |
|
|
138
|
+
|
|
139
|
+
## **Metrics**
|
|
140
|
+
- **metrics computed automatically**:
|
|
141
|
+
|
|
142
|
+
```markdown
|
|
143
|
+
- Mean Squared Error (MSE)
|
|
144
|
+
- Mean Absolute Error (MAE)
|
|
145
|
+
- Root Mean Squared Error (RMSE)
|
|
146
|
+
- R² Score
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
- **Wrapper class for multiple sklearn Classifications models.**:
|
|
152
|
+
|
|
153
|
+
| Model | Description |
|
|
154
|
+
|-------|-------------|
|
|
155
|
+
| logistic | Logistic Regression Classifier |
|
|
156
|
+
| svc | Support Vector Classifier (SVC) |
|
|
157
|
+
| rfc | Random Forest Classifier |
|
|
158
|
+
| gbc | Gradient Boosting Classifier |
|
|
159
|
+
| knc | K-Nearest Neighbors Classifier |
|
|
160
|
+
| dtc | Decision Tree Classifier |
|
|
161
|
+
|
|
162
|
+
## **Metrics**
|
|
163
|
+
- **metrics computed automatically**:
|
|
164
|
+
|
|
165
|
+
```markdown
|
|
166
|
+
- accuracy
|
|
167
|
+
- confusion_matrix
|
|
168
|
+
- classification_report
|
|
169
|
+
```
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
skwrapper/__init__.py,sha256=8Q6ZimekxULZN6fGse29hNXXfUvASaq7nVHP73s6Lpg,108
|
|
2
|
+
skwrapper/core.py,sha256=HCPVm6c3bdW3YqQgNHJLxW6HMOlUPwiGdzbJt2AMpBs,7120
|
|
3
|
+
skwrapper/Supervised/classificationModels.py,sha256=Nerahd2qyH2L8A6eAPaXB7xefKYDgbwuHQFruNtYMaQ,10947
|
|
4
|
+
skwrapper/Supervised/regressionModels.py,sha256=KJCjWbjQLFA-SXwpJd0CatY0CObq5bl0NZLc8DHdBCA,14278
|
|
5
|
+
skwrapper-0.1.0.dist-info/licenses/LICENCE.txt,sha256=Y5d-uZ3T8fXs2sAmlkI0twS423qMdIEExcY9-PoQeG8,1096
|
|
6
|
+
tests/test_basic.py,sha256=_Q1U0ezyFUQBJ5yP4zM-tRjT9IwjbutTi4jFNtQYvME,2367
|
|
7
|
+
skwrapper-0.1.0.dist-info/METADATA,sha256=2eZ0Q7KGFnlLjnJSIoeCmGU3GmfCVXO5sHCyjolMYC8,5486
|
|
8
|
+
skwrapper-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
skwrapper-0.1.0.dist-info/top_level.txt,sha256=d_zPYcMdQmcehsnJczoZ1ohT7_690DU9DUrkqKs8V10,16
|
|
10
|
+
skwrapper-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Anuj Rajesh Tiwari
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tests/test_basic.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from skwrapper import sc, sr
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import seaborn as sns
|
|
5
|
+
|
|
6
|
+
import kagglehub
|
|
7
|
+
|
|
8
|
+
# path = kagglehub.dataset_download("raipiyush558/social-network-ad")
|
|
9
|
+
# print(path)
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
os.listdir('C:/Users/91989/.cache/kagglehub/datasets/raipiyush558/social-network-ad/versions/1')
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
df = pd.read_csv("C:/Users/91989/.cache/kagglehub/datasets/raipiyush558/social-network-ad/versions/1/Social_Network_Ads.csv")
|
|
16
|
+
# print(df.head(5))
|
|
17
|
+
# print(df.tail(5))
|
|
18
|
+
|
|
19
|
+
maxSal = df["EstimatedSalary"].max()
|
|
20
|
+
minSal = df['EstimatedSalary'].min()
|
|
21
|
+
|
|
22
|
+
print(f"max = {maxSal}, min = {minSal}")
|
|
23
|
+
|
|
24
|
+
selected_row = df.loc[:, 'Age': 'Purchased']
|
|
25
|
+
|
|
26
|
+
# print(selected_row)
|
|
27
|
+
|
|
28
|
+
from sklearn.model_selection import train_test_split
|
|
29
|
+
from sklearn.preprocessing import StandardScaler
|
|
30
|
+
## first we have to define X and y where X is the variable or feature input and y is the output target basically
|
|
31
|
+
x = selected_row[['Age', 'EstimatedSalary']]
|
|
32
|
+
# print(x) just to see the outcome
|
|
33
|
+
y = selected_row['Purchased']
|
|
34
|
+
# print(y)
|
|
35
|
+
|
|
36
|
+
## Split the Data
|
|
37
|
+
X_train, x_test, Y_train, y_test = train_test_split( x, y, train_size=0.8, random_state=48 )
|
|
38
|
+
|
|
39
|
+
X_train.shape, x_test.shape
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# doing standardization
|
|
43
|
+
scaler = StandardScaler()
|
|
44
|
+
|
|
45
|
+
#fit the scaler to the train set, it will learn the parameter
|
|
46
|
+
scaler.fit(X_train) ## learn mean and std from the train dataset
|
|
47
|
+
X_train_scaled = scaler.transform(X_train) ## Apply sacling
|
|
48
|
+
X_test_scaler = scaler.transform(x_test) ## Apply same scaling on X_test as well
|
|
49
|
+
|
|
50
|
+
# scaler.mean_
|
|
51
|
+
|
|
52
|
+
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns) #convert the numpy 2D arry to pd dataframes with column names on it as numpy array dont have column name after scaling
|
|
53
|
+
X_test_scaler_df = pd.DataFrame(X_test_scaler, columns=x_test.columns) #convert the numpy 2D arry to pd dataframes with column names on it as numpy array dont have column name after scaling
|
|
54
|
+
print(X_train_scaled_df.describe())
|
|
55
|
+
print(X_train.describe())
|
|
56
|
+
|
|
57
|
+
sc = sc()
|
|
58
|
+
sr = sr()
|
|
59
|
+
|
|
60
|
+
# print(help(sr))
|
|
61
|
+
|
|
62
|
+
# print(help(sr.perform))
|
|
63
|
+
result = sr.perform(case=["linearR", "svr"],
|
|
64
|
+
xy_train=[X_train_scaled_df, Y_train],
|
|
65
|
+
xy_test=[X_test_scaler_df, y_test]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
sns.scatterplot(result['svr']['predicted_value'])
|
|
69
|
+
# print(result['linearR']["mse"])
|
|
70
|
+
|
|
71
|
+
plt.show()
|
|
72
|
+
|
|
73
|
+
# print(df)
|