mlexam-src 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mlexam_src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .main import GeneratoreCodice
|
mlexam_src/main.py
ADDED
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
class GeneratoreCodice:
|
|
2
|
+
def __init__(self):
|
|
3
|
+
# Definiamo un dizionario che contiene tutti i tuoi codici pre-compilati e commentati
|
|
4
|
+
self._archivio_codice = {
|
|
5
|
+
"linear": """
|
|
6
|
+
# ==========================================
|
|
7
|
+
# REGRESSIONE LINEARE FROM SCRATCH
|
|
8
|
+
# ==========================================
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
class LinearRegressionFromScratch:
|
|
12
|
+
def __init__(self, lr=0.01, n_iters=1000):
|
|
13
|
+
self.lr = lr
|
|
14
|
+
self.n_iters = n_iters
|
|
15
|
+
self.weights = None
|
|
16
|
+
self.bias = None
|
|
17
|
+
|
|
18
|
+
def fit(self, X, y):
|
|
19
|
+
n_samples, n_features = X.shape
|
|
20
|
+
self.weights = np.zeros(n_features)
|
|
21
|
+
self.bias = 0
|
|
22
|
+
# Ciclo di ottimizzazione (Gradient Descent)
|
|
23
|
+
for _ in range(self.n_iters):
|
|
24
|
+
y_predicted = np.dot(X, self.weights) + self.bias
|
|
25
|
+
dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
|
|
26
|
+
db = (1 / n_samples) * np.sum(y_predicted - y)
|
|
27
|
+
self.weights -= self.lr * dw
|
|
28
|
+
self.bias -= self.lr * dbclass LinearRegression:
|
|
29
|
+
def __init__(self, learning_rate=1e-2, n_steps=200, n_features=1, lmd=0.01, seed=123):
|
|
30
|
+
self.seed = seed
|
|
31
|
+
np.random.seed(self.seed)
|
|
32
|
+
|
|
33
|
+
self.learning_rate = learning_rate
|
|
34
|
+
self.n_steps = n_steps
|
|
35
|
+
self.theta = np.random.rand(n_features)
|
|
36
|
+
self.lmd = lmd
|
|
37
|
+
|
|
38
|
+
self.lmd_ = np.zeros(n_features)
|
|
39
|
+
self.lmd_ = np.full(n_features, lmd)
|
|
40
|
+
self.lmd_[0] = 0
|
|
41
|
+
|
|
42
|
+
def fit_fbgd(self, X_train, y_train):
|
|
43
|
+
m = len(X_train)
|
|
44
|
+
cost_history = np.zeros(self.n_steps)
|
|
45
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
46
|
+
|
|
47
|
+
for step in range(0, self.n_steps):
|
|
48
|
+
preds = np.dot(X_train, self.theta)
|
|
49
|
+
error = preds - y_train
|
|
50
|
+
gradient = 1/m * np.dot(X_train.T, error)
|
|
51
|
+
self.theta = self.theta - self.learning_rate * gradient
|
|
52
|
+
|
|
53
|
+
theta_history[step, :] = self.theta.T
|
|
54
|
+
cost_history[step] = 1/(2*m) * np.dot(error, error.T)
|
|
55
|
+
|
|
56
|
+
return cost_history, theta_history
|
|
57
|
+
|
|
58
|
+
def fit_regularized_fbgd(self, X_train, y_train):
|
|
59
|
+
m = len(X_train)
|
|
60
|
+
cost_history = np.zeros(self.n_steps)
|
|
61
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
62
|
+
|
|
63
|
+
for step in range(0, self.n_steps):
|
|
64
|
+
preds = np.dot(X_train, self.theta)
|
|
65
|
+
error = preds - y_train
|
|
66
|
+
self.theta = self.theta - self.learning_rate * 1/m * (np.dot(X_train.T, error) + self.lmd_ * self.theta)
|
|
67
|
+
|
|
68
|
+
cost_history[step] = 1/(2*m) * (np.dot(error, error.T) + self.lmd * np.dot(self.theta[1:].T, self.theta[1:]))
|
|
69
|
+
theta_history[step, :] = self.theta.T
|
|
70
|
+
|
|
71
|
+
return cost_history, theta_history
|
|
72
|
+
|
|
73
|
+
def fit_sgd(self, X_train, y_train):
|
|
74
|
+
m = len(X_train)
|
|
75
|
+
cost_history = np.zeros(self.n_steps)
|
|
76
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
77
|
+
|
|
78
|
+
for step in range(self.n_steps):
|
|
79
|
+
random_index = np.random.randint(m)
|
|
80
|
+
x_i = X_train[random_index]
|
|
81
|
+
y_i = y_train[random_index]
|
|
82
|
+
|
|
83
|
+
pred = np.dot(x_i, self.theta)
|
|
84
|
+
error = pred - y_i
|
|
85
|
+
self.theta = self.theta - self.learning_rate * x_i.T * error
|
|
86
|
+
|
|
87
|
+
pred = np.dot(X_train, self.theta)
|
|
88
|
+
error_train = pred - y_train
|
|
89
|
+
cost_history[step] = 1/(2*m) * np.dot(error_train, error_train.T)
|
|
90
|
+
theta_history[step, :] = self.theta.T
|
|
91
|
+
|
|
92
|
+
return cost_history, theta_history
|
|
93
|
+
|
|
94
|
+
def fit_sgd_v2(self, X_train, y_train):
|
|
95
|
+
m = len(X_train)
|
|
96
|
+
cost_history = np.zeros(self.n_steps)
|
|
97
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
98
|
+
|
|
99
|
+
for epoch in range(self.n_steps):
|
|
100
|
+
for i in range(m):
|
|
101
|
+
prediction = np.dot(X_train[i], self.theta)
|
|
102
|
+
error = prediction - y_train[i]
|
|
103
|
+
self.theta = self.theta - self.learning_rate * X_train[i].T * error
|
|
104
|
+
theta_history[epoch, :] = self.theta.T
|
|
105
|
+
|
|
106
|
+
pred = np.dot(X_train, self.theta)
|
|
107
|
+
error_train = pred - y_train
|
|
108
|
+
cost_history[epoch] = 1/(2*m) * np.dot(error_train, error_train.T)
|
|
109
|
+
theta_history[epoch, :] = self.theta.T
|
|
110
|
+
|
|
111
|
+
return cost_history, theta_history
|
|
112
|
+
|
|
113
|
+
def fit_mbgd(self, X_train, y_train, batch_size=4):
|
|
114
|
+
m = len(X_train)
|
|
115
|
+
cost_history = np.zeros(self.n_steps)
|
|
116
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
117
|
+
|
|
118
|
+
for step in range(self.n_steps):
|
|
119
|
+
indices = np.random.choice(m, batch_size)
|
|
120
|
+
x_b = X_train[indices]
|
|
121
|
+
y_b = y_train[indices]
|
|
122
|
+
|
|
123
|
+
pred_b = np.dot(x_b, self.theta)
|
|
124
|
+
error_b = pred_b - y_b
|
|
125
|
+
|
|
126
|
+
self.theta = self.theta - self.learning_rate * (1 / batch_size) * np.dot(x_b.T, error_b)
|
|
127
|
+
|
|
128
|
+
pred_train = np.dot(X_train, self.theta)
|
|
129
|
+
error_train = pred_train - y_train
|
|
130
|
+
cost_history[step] = 1 / (2 * m) * np.dot(error_train, error_train.T)
|
|
131
|
+
theta_history[step, :] = self.theta.T
|
|
132
|
+
|
|
133
|
+
return cost_history, theta_history
|
|
134
|
+
|
|
135
|
+
def fit_mbgd_v2(self, X_train, y_train, batch_size=4):
|
|
136
|
+
m = len(X_train)
|
|
137
|
+
cost_history = np.zeros(self.n_steps)
|
|
138
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
139
|
+
|
|
140
|
+
for epoch in range(self.n_steps):
|
|
141
|
+
for i in range(0, m, batch_size):
|
|
142
|
+
x_b = X_train[i:i+batch_size]
|
|
143
|
+
y_b = y_train[i:i+batch_size]
|
|
144
|
+
|
|
145
|
+
pred_b = np.dot(x_b, self.theta)
|
|
146
|
+
error_b = pred_b - y_b
|
|
147
|
+
self.theta = self.theta - self.learning_rate * (1/len(x_b)) * np.dot(x_b.T, error_b)
|
|
148
|
+
|
|
149
|
+
pred_train = np.dot(X_train, self.theta)
|
|
150
|
+
error_train = pred_train - y_train
|
|
151
|
+
cost_history[epoch] = (1/(2*m)* np.dot(error_train, error_train.T))
|
|
152
|
+
theta_history[epoch, :] = self.theta.T
|
|
153
|
+
|
|
154
|
+
return cost_history, theta_history
|
|
155
|
+
|
|
156
|
+
def predict(self, X_test):
|
|
157
|
+
return np.dot(X_test, self.theta)
|
|
158
|
+
|
|
159
|
+
---adatta codice
|
|
160
|
+
import pandas as pd
|
|
161
|
+
import numpy as np
|
|
162
|
+
import matplotlib.pyplot as plt
|
|
163
|
+
|
|
164
|
+
houses = pd.read_csv('./datasets/houses_portaland_simple.csv')
|
|
165
|
+
display(houses[:10])
|
|
166
|
+
|
|
167
|
+
print(houses.describe())
|
|
168
|
+
|
|
169
|
+
houses.drop('Bedroom', axis=1, inplace=True)
|
|
170
|
+
print(houses.head())
|
|
171
|
+
|
|
172
|
+
houses = houses.sample(frac=1, random_state=123).reset_index(drop=True)
|
|
173
|
+
|
|
174
|
+
plt.plot(houses.Size, houses.Price, 'r.')
|
|
175
|
+
plt.show()
|
|
176
|
+
|
|
177
|
+
print(houses.corr())
|
|
178
|
+
|
|
179
|
+
houses = houses.values
|
|
180
|
+
mean = houses.mean(axis=0)
|
|
181
|
+
std = houses.std(axis=0)
|
|
182
|
+
houses = (houses - mean) / std
|
|
183
|
+
|
|
184
|
+
x = houses[:, 0]
|
|
185
|
+
y = houses[:, 1]
|
|
186
|
+
plt.plot(x, y, 'r.')
|
|
187
|
+
plt.show()
|
|
188
|
+
|
|
189
|
+
x = np.c_[np.ones(x.shape[0]), x]
|
|
190
|
+
print(x[:5])
|
|
191
|
+
|
|
192
|
+
linear = LinearRegression(n_features=x.shape[1], n_steps=1000, learning_rate=0.01)
|
|
193
|
+
|
|
194
|
+
lineX = np.linspace(x[:, 1].min(), x[:, 1].max(), 100)
|
|
195
|
+
liney = [linear.theta[0] + linear.theta[1]*xx for xx in lineX]
|
|
196
|
+
plt.plot(x[:, 1], y, 'r.', label='Training data')
|
|
197
|
+
plt.plot(lineX, liney, 'b--', label='Current hypothesis')
|
|
198
|
+
plt.legend()
|
|
199
|
+
plt.show()
|
|
200
|
+
|
|
201
|
+
cost_history, theta_history = linear.fit_fbgd(x, y)
|
|
202
|
+
|
|
203
|
+
print(f'''Thetas: {*linear.theta,}''')
|
|
204
|
+
print(f'''Final train cost: {cost_history[-1]:.3f}''')
|
|
205
|
+
|
|
206
|
+
print(theta_history)
|
|
207
|
+
print("theta_0 (after training): ", theta_history[-1, 0])
|
|
208
|
+
print("theta_1 (after training): ", theta_history[-1, 1])
|
|
209
|
+
|
|
210
|
+
lineX = np.linspace(x[:, 1].min(), x[:, 1].max(), 100)
|
|
211
|
+
liney = [theta_history[-1, 0] + theta_history[-1, 1]*xx for xx in lineX]
|
|
212
|
+
plt.plot(x[:, 1], y, 'r.', label='Training data')
|
|
213
|
+
plt.plot(lineX, liney, 'b--', label='Final hypothesis')
|
|
214
|
+
plt.legend()
|
|
215
|
+
plt.show()
|
|
216
|
+
|
|
217
|
+
plt.plot(cost_history, 'g--')
|
|
218
|
+
plt.show()
|
|
219
|
+
|
|
220
|
+
theta0_vals = np.linspace(-2, 2, 100)
|
|
221
|
+
theta1_vals = np.linspace(-2, 3, 100)
|
|
222
|
+
J_vals = np.zeros((theta0_vals.size, theta1_vals.size))
|
|
223
|
+
|
|
224
|
+
for t0, theta0 in enumerate(theta0_vals):
|
|
225
|
+
for t1, theta1 in enumerate(theta1_vals):
|
|
226
|
+
thetaT = np.array([[theta0], [theta1]])
|
|
227
|
+
h = x.dot(thetaT.flatten())
|
|
228
|
+
j = (h - y)
|
|
229
|
+
J = j.dot(j) / 2 / (len(x))
|
|
230
|
+
J_vals[t0, t1] = J
|
|
231
|
+
|
|
232
|
+
J_vals = J_vals.T
|
|
233
|
+
A, B = np.meshgrid(theta0_vals, theta1_vals)
|
|
234
|
+
cp = plt.contourf(A, B, J_vals)
|
|
235
|
+
plt.colorbar(cp)
|
|
236
|
+
plt.plot(theta_history.T[0], theta_history.T[1], 'r--')
|
|
237
|
+
plt.show()
|
|
238
|
+
""",
|
|
239
|
+
|
|
240
|
+
"multiple": """
|
|
241
|
+
# ==========================================
|
|
242
|
+
# MULTIPLE LINEAR REGRESSION FROM SCRATCH
|
|
243
|
+
# ==========================================
|
|
244
|
+
import numpy as np
|
|
245
|
+
|
|
246
|
+
class LinearRegression:
|
|
247
|
+
def __init__(self, learning_rate=1e-2, n_steps=200, n_features=1, lmd=0.01, seed=123):
|
|
248
|
+
self.seed = seed
|
|
249
|
+
np.random.seed(self.seed)
|
|
250
|
+
|
|
251
|
+
self.learning_rate = learning_rate
|
|
252
|
+
self.n_steps = n_steps
|
|
253
|
+
self.theta = np.random.rand(n_features)
|
|
254
|
+
self.lmd = lmd
|
|
255
|
+
|
|
256
|
+
self.lmd_ = np.zeros(n_features)
|
|
257
|
+
self.lmd_ = np.full(n_features, lmd)
|
|
258
|
+
self.lmd_[0] = 0
|
|
259
|
+
|
|
260
|
+
def fit_fbgd(self, X_train, y_train):
|
|
261
|
+
m = len(X_train)
|
|
262
|
+
cost_history = np.zeros(self.n_steps)
|
|
263
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
264
|
+
|
|
265
|
+
for step in range(0, self.n_steps):
|
|
266
|
+
preds = np.dot(X_train, self.theta)
|
|
267
|
+
error = preds - y_train
|
|
268
|
+
gradient = 1/m * np.dot(X_train.T, error)
|
|
269
|
+
self.theta = self.theta - self.learning_rate * gradient
|
|
270
|
+
|
|
271
|
+
theta_history[step, :] = self.theta.T
|
|
272
|
+
cost_history[step] = 1/(2*m) * np.dot(error, error.T)
|
|
273
|
+
|
|
274
|
+
return cost_history, theta_history
|
|
275
|
+
|
|
276
|
+
def fit_regularized_fbgd(self, X_train, y_train):
|
|
277
|
+
m = len(X_train)
|
|
278
|
+
cost_history = np.zeros(self.n_steps)
|
|
279
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
280
|
+
|
|
281
|
+
for step in range(0, self.n_steps):
|
|
282
|
+
preds = np.dot(X_train, self.theta)
|
|
283
|
+
error = preds - y_train
|
|
284
|
+
self.theta = self.theta - self.learning_rate * 1/m * (np.dot(X_train.T, error) + self.lmd_ * self.theta)
|
|
285
|
+
|
|
286
|
+
cost_history[step] = 1/(2*m) * (np.dot(error, error.T) + self.lmd * np.dot(self.theta[1:].T, self.theta[1:]))
|
|
287
|
+
theta_history[step, :] = self.theta.T
|
|
288
|
+
|
|
289
|
+
return cost_history, theta_history
|
|
290
|
+
|
|
291
|
+
def fit_sgd(self, X_train, y_train):
|
|
292
|
+
m = len(X_train)
|
|
293
|
+
cost_history = np.zeros(self.n_steps)
|
|
294
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
295
|
+
|
|
296
|
+
for step in range(self.n_steps):
|
|
297
|
+
random_index = np.random.randint(m)
|
|
298
|
+
x_i = X_train[random_index]
|
|
299
|
+
y_i = y_train[random_index]
|
|
300
|
+
|
|
301
|
+
pred = np.dot(x_i, self.theta)
|
|
302
|
+
error = pred - y_i
|
|
303
|
+
self.theta = self.theta - self.learning_rate * x_i.T * error
|
|
304
|
+
|
|
305
|
+
pred = np.dot(X_train, self.theta)
|
|
306
|
+
error_train = pred - y_train
|
|
307
|
+
cost_history[step] = 1/(2*m) * np.dot(error_train, error_train.T)
|
|
308
|
+
theta_history[step, :] = self.theta.T
|
|
309
|
+
|
|
310
|
+
return cost_history, theta_history
|
|
311
|
+
|
|
312
|
+
def fit_sgd_v2(self, X_train, y_train):
|
|
313
|
+
m = len(X_train)
|
|
314
|
+
cost_history = np.zeros(self.n_steps)
|
|
315
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
316
|
+
|
|
317
|
+
for epoch in range(self.n_steps):
|
|
318
|
+
for i in range(m):
|
|
319
|
+
prediction = np.dot(X_train[i], self.theta)
|
|
320
|
+
error = prediction - y_train[i]
|
|
321
|
+
self.theta = self.theta - self.learning_rate * X_train[i].T * error
|
|
322
|
+
theta_history[epoch, :] = self.theta.T
|
|
323
|
+
|
|
324
|
+
pred = np.dot(X_train, self.theta)
|
|
325
|
+
error_train = pred - y_train
|
|
326
|
+
cost_history[epoch] = 1/(2*m) * np.dot(error_train, error_train.T)
|
|
327
|
+
theta_history[epoch, :] = self.theta.T
|
|
328
|
+
|
|
329
|
+
return cost_history, theta_history
|
|
330
|
+
|
|
331
|
+
def fit_mbgd(self, X_train, y_train, batch_size=4):
|
|
332
|
+
m = len(X_train)
|
|
333
|
+
cost_history = np.zeros(self.n_steps)
|
|
334
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
335
|
+
|
|
336
|
+
for step in range(self.n_steps):
|
|
337
|
+
indices = np.random.choice(m, batch_size)
|
|
338
|
+
x_b = X_train[indices]
|
|
339
|
+
y_b = y_train[indices]
|
|
340
|
+
|
|
341
|
+
pred_b = np.dot(x_b, self.theta)
|
|
342
|
+
error_b = pred_b - y_b
|
|
343
|
+
|
|
344
|
+
self.theta = self.theta - self.learning_rate * (1 / batch_size) * np.dot(x_b.T, error_b)
|
|
345
|
+
|
|
346
|
+
pred_train = np.dot(X_train, self.theta)
|
|
347
|
+
error_train = pred_train - y_train
|
|
348
|
+
cost_history[step] = 1 / (2 * m) * np.dot(error_train, error_train.T)
|
|
349
|
+
theta_history[step, :] = self.theta.T
|
|
350
|
+
|
|
351
|
+
return cost_history, theta_history
|
|
352
|
+
|
|
353
|
+
def fit_mbgd_v2(self, X_train, y_train, batch_size=4):
|
|
354
|
+
m = len(X_train)
|
|
355
|
+
cost_history = np.zeros(self.n_steps)
|
|
356
|
+
theta_history = np.zeros((self.n_steps, self.theta.shape[0]))
|
|
357
|
+
|
|
358
|
+
for epoch in range(self.n_steps):
|
|
359
|
+
for i in range(0, m, batch_size):
|
|
360
|
+
x_b = X_train[i:i+batch_size]
|
|
361
|
+
y_b = y_train[i:i+batch_size]
|
|
362
|
+
|
|
363
|
+
pred_b = np.dot(x_b, self.theta)
|
|
364
|
+
error_b = pred_b - y_b
|
|
365
|
+
self.theta = self.theta - self.learning_rate * (1/len(x_b)) * np.dot(x_b.T, error_b)
|
|
366
|
+
|
|
367
|
+
pred_train = np.dot(X_train, self.theta)
|
|
368
|
+
error_train = pred_train - y_train
|
|
369
|
+
cost_history[epoch] = (1/(2*m)* np.dot(error_train, error_train.T))
|
|
370
|
+
theta_history[epoch, :] = self.theta.T
|
|
371
|
+
|
|
372
|
+
return cost_history, theta_history
|
|
373
|
+
|
|
374
|
+
def predict(self, X_test):
|
|
375
|
+
return np.dot(X_test, self.theta)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
Regression Metric
|
|
379
|
+
class RegressionMetrics:
|
|
380
|
+
def __init__(self, model):
|
|
381
|
+
self.model = model
|
|
382
|
+
|
|
383
|
+
def compute_performance (self, y_test, y_pred):
|
|
384
|
+
mae = (self.mean_absolute_error(y_test, y_pred))
|
|
385
|
+
mape = self.mean_absolute_percentage_error(y_test, y_pred)
|
|
386
|
+
mpe = self.mean_percentage_error(y_test, y_pred)
|
|
387
|
+
mse = self.mean_squared_error(y_test, y_pred)
|
|
388
|
+
rmse = self.root_mean_squared_error(y_test, y_pred)
|
|
389
|
+
r2 = self.r_2(y_test, y_pred)
|
|
390
|
+
return {'mae':mae,'mape': mape,'mpe': mpe,
|
|
391
|
+
'mse': mse,'rmse': rmse,'r2': r2}
|
|
392
|
+
|
|
393
|
+
def mean_absolute_error (self, y_test, y_pred):
|
|
394
|
+
output_errors= np.abs(y_pred - y_test)
|
|
395
|
+
return np.average(output_errors)
|
|
396
|
+
|
|
397
|
+
def mean_squared_error (self, y_test, y_pred):
|
|
398
|
+
output_errors= (y_pred - y_test)**2
|
|
399
|
+
return np.average(output_errors)
|
|
400
|
+
|
|
401
|
+
def root_mean_squared_error (self, y_test, y_pred):
|
|
402
|
+
return np.sqrt(self.mean_squared_error(y_test, y_pred))
|
|
403
|
+
|
|
404
|
+
def mean_absolute_percentage_error (self, y_test, y_pred):
|
|
405
|
+
output_errors = np.abs((y_pred - y_test)/y_test)
|
|
406
|
+
return np.average(output_errors)*100
|
|
407
|
+
|
|
408
|
+
def mean_percentage_error (self, y_test, y_pred):
|
|
409
|
+
output_errors = (y_pred - y_test)/y_test
|
|
410
|
+
return np.average(output_errors)*100
|
|
411
|
+
|
|
412
|
+
def r_2(self, y_test, y_pred):
|
|
413
|
+
sst = np.sum ((y_test - y_test.mean())**2)
|
|
414
|
+
ssr = np.sum((y_pred-y_test)**2)
|
|
415
|
+
r2 = 1-(ssr/sst)
|
|
416
|
+
return r2
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
INIZIO
|
|
420
|
+
houses = pd.read_csv('./datasets/houses.csv')
|
|
421
|
+
|
|
422
|
+
display(houses)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
print(houses.describe())
|
|
426
|
+
|
|
427
|
+
houses = houses.sample(frac=1, random_state=123).reset_index(drop=True)
|
|
428
|
+
|
|
429
|
+
x = houses[['GrLivArea', 'LotArea', 'GarageArea', 'FullBath']].values
|
|
430
|
+
y = houses['SalePrice'].values
|
|
431
|
+
|
|
432
|
+
train_index = round(len(x) * 0.8)
|
|
433
|
+
|
|
434
|
+
X_train = x[:train_index]
|
|
435
|
+
y_train = y[:train_index]
|
|
436
|
+
|
|
437
|
+
X_test = x[train_index:]
|
|
438
|
+
y_test = y[train_index:]
|
|
439
|
+
|
|
440
|
+
mean = X_train.mean(axis=0)
|
|
441
|
+
std = X_train.std(axis=0)
|
|
442
|
+
|
|
443
|
+
X_train = (X_train - mean) / std
|
|
444
|
+
X_test = (X_test - mean) / std
|
|
445
|
+
|
|
446
|
+
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
|
|
447
|
+
X_test = np.c_[np.ones(X_test.shape[0]), X_test]
|
|
448
|
+
|
|
449
|
+
print(len(X_train))
|
|
450
|
+
|
|
451
|
+
linear = LinearRegression(n_features=X_train.shape[1], n_steps=100, learning_rate=0.05)
|
|
452
|
+
|
|
453
|
+
cost_history, theta_history = linear.fit_fbgd(X_train, y_train)
|
|
454
|
+
|
|
455
|
+
print(f'''Thetas: {*linear.theta,}''')
|
|
456
|
+
print(f'''Final train cost: {cost_history[-1]:.3f}''')
|
|
457
|
+
|
|
458
|
+
plt.plot(cost_history, 'g--')
|
|
459
|
+
plt.show()
|
|
460
|
+
|
|
461
|
+
reg_metrics = RegressionMetrics(linear)
|
|
462
|
+
y_pred = linear.predict(X_test)
|
|
463
|
+
|
|
464
|
+
print(reg_metrics.compute_performance(y_test, y_pred))
|
|
465
|
+
|
|
466
|
+
linear_true_sgd = LinearRegression(n_features=X_train.shape[1], n_steps=100, learning_rate=0.1)
|
|
467
|
+
|
|
468
|
+
cost_history_true_sgd, theta_history_true_sgd = linear_true_sgd.fit_sgd(X_train, y_train)
|
|
469
|
+
|
|
470
|
+
print(f'''Thetas: {*linear_true_sgd.theta,})''')
|
|
471
|
+
print(f''' Final Train cost: {cost_history_true_sgd[-1]: 3f}''')
|
|
472
|
+
|
|
473
|
+
plt.plot(cost_history_true_sgd, 'g--')
|
|
474
|
+
plt.show()
|
|
475
|
+
|
|
476
|
+
print(RegressionMetrics(linear_true_sgd).compute_performance(y_test, linear_true_sgd.predict(X_test)))
|
|
477
|
+
|
|
478
|
+
linear_true_mbgd = LinearRegression(n_features=X_train.shape[1], n_steps=100, learning_rate=0.01)
|
|
479
|
+
|
|
480
|
+
cost_history_true_mbgd, theta_history_true_mbgd = linear_true_mbgd.fit_mbgd(X_train, y_train, batch_size=16)
|
|
481
|
+
|
|
482
|
+
print(f'''Thetas: {*linear_true_mbgd.theta,})''')
|
|
483
|
+
print(f''' Final Train cost: {cost_history_true_mbgd[-1]: 3f}''')
|
|
484
|
+
|
|
485
|
+
plt.plot(cost_history_true_mbgd, 'g--')
|
|
486
|
+
plt.show()
|
|
487
|
+
|
|
488
|
+
print(RegressionMetrics(linear_true_mbgd).compute_performance(y_test, linear_true_sgd.predict(X_test)))
|
|
489
|
+
"""}
|
|
490
|
+
|
|
491
|
+
def dammi_codice(self, tipo_modello):
|
|
492
|
+
"""
|
|
493
|
+
Restituisce la stringa di codice in base al tipo_modello passato.
|
|
494
|
+
Se il tipo non esiste, avvisa l'utente.
|
|
495
|
+
"""
|
|
496
|
+
# Trasformiamo l'input in minuscolo per evitare errori di battitura (es. "SVM" o "svm")
|
|
497
|
+
chiave = tipo_modello.lower()
|
|
498
|
+
|
|
499
|
+
if chiave in self._archivio_codice:
|
|
500
|
+
return self._archivio_codice[chiave]
|
|
501
|
+
else:
|
|
502
|
+
opzioni = ", ".join(self._archivio_codice.keys())
|
|
503
|
+
return f"# Errore: Modello '{tipo_modello}' non trovato. Scegli tra: {opzioni}"
|
|
504
|
+
|
|
505
|
+
def copia_appunti(self, tipo_modello):
|
|
506
|
+
"""Stampa direttamente a schermo il codice richiesto."""
|
|
507
|
+
print(self.dammi_codice(tipo_modello))
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlexam_src
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: I miei appunti di ML from scratch
|
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
|
6
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
Requires-Dist: numpy>=1.20.0
|
|
11
|
+
|
|
12
|
+
# ML Exam Library 🚀
|
|
13
|
+
|
|
14
|
+
Una libreria Python creata per memorizzare, consultare e recuperare rapidamente codici e algoritmi di Machine Learning implementati **from scratch**.
|
|
15
|
+
|
|
16
|
+
Questa libreria funge da *cheat-sheet* interattivo: permette di stampare direttamente nel terminale il codice sorgente interamente commentato di diversi modelli, pronto per essere copiato e incollato nei tuoi progetti o script d'esame.
|
|
17
|
+
|
|
18
|
+
## 📦 Installazione
|
|
19
|
+
|
|
20
|
+
Una volta pubblicata, puoi installare la libreria localmente tramite `pip`:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install mlexam_src
|
|
24
|
+
|
|
25
|
+
#from mlexam_src import GeneratoreCodice
|
|
26
|
+
|
|
27
|
+
# Inizializza il generatore
|
|
28
|
+
cheat_sheet = GeneratoreCodice()
|
|
29
|
+
|
|
30
|
+
# 1. Stampa il codice della Regressione Lineare
|
|
31
|
+
cheat_sheet.copia_appunti("linear")
|
|
32
|
+
|
|
33
|
+
# 2. Stampa il codice della Regressione Logistica
|
|
34
|
+
cheat_sheet.copia_appunti("logistic")
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
mlexam_src/__init__.py,sha256=QAiJf1ZtMoWmTKCeq1N98WCKlfuEzoV6hmLq-Y7Ayy4,34
|
|
2
|
+
mlexam_src/main.py,sha256=I1DDQc0kr4FyHVWvp9dLfAjC5NISAbYGyfw5qB2pTag,18030
|
|
3
|
+
mlexam_src-0.1.0.dist-info/METADATA,sha256=sajzmc129kV7XhV7dhJLyd9tP3u5DzqS_B-nr7C39qg,1191
|
|
4
|
+
mlexam_src-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
5
|
+
mlexam_src-0.1.0.dist-info/top_level.txt,sha256=IDg0aAzNc_qKdXQtwmYgPdjfWTsx6IClMrAtnJK8-8Q,11
|
|
6
|
+
mlexam_src-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mlexam_src
|