optiml 1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiml/__init__.py +0 -0
- optiml/ml/__init__.py +0 -0
- optiml/ml/neural_network/__init__.py +3 -0
- optiml/ml/neural_network/_base.py +475 -0
- optiml/ml/neural_network/activations.py +79 -0
- optiml/ml/neural_network/initializers.py +66 -0
- optiml/ml/neural_network/layers.py +183 -0
- optiml/ml/neural_network/losses.py +178 -0
- optiml/ml/neural_network/regularizers.py +87 -0
- optiml/ml/svm/__init__.py +3 -0
- optiml/ml/svm/_base.py +1442 -0
- optiml/ml/svm/kernels.py +208 -0
- optiml/ml/svm/losses.py +284 -0
- optiml/ml/svm/smo.py +797 -0
- optiml/ml/tests/__init__.py +0 -0
- optiml/ml/tests/_datasets.py +49 -0
- optiml/ml/tests/_utils.py +28 -0
- optiml/ml/tests/test_initializers.py +33 -0
- optiml/ml/tests/test_neural_network.py +86 -0
- optiml/ml/tests/test_svc.py +245 -0
- optiml/ml/tests/test_svr.py +256 -0
- optiml/ml/utils.py +252 -0
- optiml/opti/__init__.py +4 -0
- optiml/opti/_base.py +309 -0
- optiml/opti/constrained/__init__.py +9 -0
- optiml/opti/constrained/_base.py +404 -0
- optiml/opti/constrained/active_set.py +228 -0
- optiml/opti/constrained/frank_wolfe.py +158 -0
- optiml/opti/constrained/interior_point.py +282 -0
- optiml/opti/constrained/projected_gradient.py +138 -0
- optiml/opti/constrained/tests/__init__.py +0 -0
- optiml/opti/constrained/tests/test_active_set.py +16 -0
- optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
- optiml/opti/constrained/tests/test_interior_point.py +16 -0
- optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
- optiml/opti/constrained/tests/test_lower_bound.py +29 -0
- optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
- optiml/opti/unconstrained/__init__.py +6 -0
- optiml/opti/unconstrained/_base.py +63 -0
- optiml/opti/unconstrained/line_search/__init__.py +10 -0
- optiml/opti/unconstrained/line_search/_base.py +106 -0
- optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
- optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
- optiml/opti/unconstrained/line_search/line_search.py +248 -0
- optiml/opti/unconstrained/line_search/newton.py +198 -0
- optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
- optiml/opti/unconstrained/proximal_bundle.py +219 -0
- optiml/opti/unconstrained/stochastic/__init__.py +12 -0
- optiml/opti/unconstrained/stochastic/_base.py +246 -0
- optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
- optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
- optiml/opti/unconstrained/stochastic/adam.py +179 -0
- optiml/opti/unconstrained/stochastic/adamax.py +178 -0
- optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
- optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
- optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
- optiml/opti/unconstrained/stochastic/schedules.py +89 -0
- optiml/opti/unconstrained/tests/__init__.py +0 -0
- optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
- optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
- optiml/opti/unconstrained/tests/test_adam.py +42 -0
- optiml/opti/unconstrained/tests/test_adamax.py +41 -0
- optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
- optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
- optiml/opti/unconstrained/tests/test_functions.py +34 -0
- optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
- optiml/opti/unconstrained/tests/test_newton.py +20 -0
- optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
- optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
- optiml/opti/unconstrained/tests/test_verbose.py +25 -0
- optiml/opti/utils.py +353 -0
- optiml-1.7.dist-info/METADATA +203 -0
- optiml-1.7.dist-info/RECORD +76 -0
- optiml-1.7.dist-info/WHEEL +5 -0
- optiml-1.7.dist-info/licenses/LICENSE +21 -0
- optiml-1.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pytest
|
|
3
|
+
from sklearn.model_selection import train_test_split
|
|
4
|
+
from sklearn.preprocessing import StandardScaler
|
|
5
|
+
|
|
6
|
+
from optiml.ml.svm import SVR
|
|
7
|
+
from optiml.ml.tests._datasets import load_boston
|
|
8
|
+
from optiml.ml.svm.kernels import linear
|
|
9
|
+
from optiml.ml.svm.losses import epsilon_insensitive, squared_epsilon_insensitive
|
|
10
|
+
from optiml.opti.constrained import ProjectedGradient, ActiveSet, InteriorPoint, FrankWolfe
|
|
11
|
+
from optiml.opti.unconstrained import ProximalBundle
|
|
12
|
+
from optiml.opti.unconstrained.line_search import SteepestGradientDescent, ConjugateGradient, Newton, BFGS, LBFGS
|
|
13
|
+
from optiml.opti.unconstrained.stochastic import (StochasticGradientDescent, Adam, AMSGrad,
|
|
14
|
+
AdaMax, AdaGrad, AdaDelta, RMSProp)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_solve_primal_l1_svr_with_line_search_optimizers():
|
|
18
|
+
X, y = load_boston(return_X_y=True)
|
|
19
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
20
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
21
|
+
|
|
22
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=SteepestGradientDescent)
|
|
23
|
+
svr.fit(X_train, y_train)
|
|
24
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
25
|
+
|
|
26
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=ConjugateGradient)
|
|
27
|
+
svr.fit(X_train, y_train)
|
|
28
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
29
|
+
|
|
30
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=Newton)
|
|
31
|
+
svr.fit(X_train, y_train)
|
|
32
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
33
|
+
|
|
34
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=BFGS)
|
|
35
|
+
svr.fit(X_train, y_train)
|
|
36
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
37
|
+
|
|
38
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=LBFGS)
|
|
39
|
+
svr.fit(X_train, y_train)
|
|
40
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_solve_primal_l1_svr_with_stochastic_optimizers():
|
|
44
|
+
X, y = load_boston(return_X_y=True)
|
|
45
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
46
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
47
|
+
|
|
48
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=StochasticGradientDescent)
|
|
49
|
+
svr.fit(X_train, y_train)
|
|
50
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
51
|
+
|
|
52
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=Adam)
|
|
53
|
+
svr.fit(X_train, y_train)
|
|
54
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
55
|
+
|
|
56
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=AMSGrad)
|
|
57
|
+
svr.fit(X_train, y_train)
|
|
58
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
59
|
+
|
|
60
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=AdaMax)
|
|
61
|
+
svr.fit(X_train, y_train)
|
|
62
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
63
|
+
|
|
64
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=AdaGrad, learning_rate=1.)
|
|
65
|
+
svr.fit(X_train, y_train)
|
|
66
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
67
|
+
|
|
68
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=AdaDelta, learning_rate=1., max_iter=3000)
|
|
69
|
+
svr.fit(X_train, y_train)
|
|
70
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
71
|
+
|
|
72
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=RMSProp)
|
|
73
|
+
svr.fit(X_train, y_train)
|
|
74
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_solve_primal_l1_svr_with_proximal_bundle():
|
|
78
|
+
X, y = load_boston(return_X_y=True)
|
|
79
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
80
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
81
|
+
svr = SVR(loss=epsilon_insensitive, optimizer=ProximalBundle)
|
|
82
|
+
svr.fit(X_train, y_train)
|
|
83
|
+
assert svr.score(X_test, y_test) >= 0.64
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_solve_dual_l1_svr_with_smo():
|
|
87
|
+
X, y = load_boston(return_X_y=True)
|
|
88
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
89
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
90
|
+
smo = SVR(loss=epsilon_insensitive, kernel=linear, dual=True, optimizer='smo').fit(X_train, y_train)
|
|
91
|
+
# SMO must reach essentially the same solution as the reference QP solver (cvxopt)
|
|
92
|
+
ref = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=False, dual=True, optimizer='cvxopt').fit(X_train, y_train)
|
|
93
|
+
assert np.allclose(smo.predict(X_test), ref.predict(X_test), atol=1e-1)
|
|
94
|
+
assert abs(smo.score(X_test, y_test) - ref.score(X_test, y_test)) <= 1e-2
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_solve_dual_l1_svr_with_cvxopt():
|
|
98
|
+
X, y = load_boston(return_X_y=True)
|
|
99
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
100
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
101
|
+
|
|
102
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True, dual=True, optimizer='cvxopt')
|
|
103
|
+
svr.fit(X_train, y_train)
|
|
104
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
105
|
+
|
|
106
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=False, dual=True, optimizer='cvxopt')
|
|
107
|
+
svr.fit(X_train, y_train)
|
|
108
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_solve_dual_l1_svr_with_reg_intercept_with_bcqp_optimizers():
|
|
112
|
+
X, y = load_boston(return_X_y=True)
|
|
113
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
114
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
115
|
+
|
|
116
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True, dual=True, optimizer=ProjectedGradient)
|
|
117
|
+
svr.fit(X_train, y_train)
|
|
118
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
119
|
+
|
|
120
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True, dual=True, optimizer=ActiveSet)
|
|
121
|
+
svr.fit(X_train, y_train)
|
|
122
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
123
|
+
|
|
124
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True, dual=True, optimizer=InteriorPoint)
|
|
125
|
+
svr.fit(X_train, y_train)
|
|
126
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
127
|
+
|
|
128
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True, dual=True, optimizer=FrankWolfe)
|
|
129
|
+
svr.fit(X_train, y_train)
|
|
130
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_solve_dual_l1_svr_with_proximal_bundle():
|
|
134
|
+
X, y = load_boston(return_X_y=True)
|
|
135
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
136
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
137
|
+
|
|
138
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True,
|
|
139
|
+
dual=True, optimizer=ProximalBundle, max_iter=150)
|
|
140
|
+
svr.fit(X_train, y_train)
|
|
141
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
142
|
+
|
|
143
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=False,
|
|
144
|
+
dual=True, optimizer=ProximalBundle, max_iter=150)
|
|
145
|
+
svr.fit(X_train, y_train)
|
|
146
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_solve_dual_l1_svr_with_AdaGrad():
|
|
150
|
+
X, y = load_boston(return_X_y=True)
|
|
151
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
152
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
153
|
+
|
|
154
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=True,
|
|
155
|
+
dual=True, optimizer=AdaGrad, learning_rate=1.)
|
|
156
|
+
svr.fit(X_train, y_train)
|
|
157
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
158
|
+
|
|
159
|
+
svr = SVR(loss=epsilon_insensitive, kernel=linear, reg_intercept=False,
|
|
160
|
+
dual=True, optimizer=AdaGrad, learning_rate=1.)
|
|
161
|
+
svr.fit(X_train, y_train)
|
|
162
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_solve_primal_l2_svr_with_line_search_optimizers():
|
|
166
|
+
X, y = load_boston(return_X_y=True)
|
|
167
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
168
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
169
|
+
|
|
170
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=SteepestGradientDescent)
|
|
171
|
+
svr.fit(X_train, y_train)
|
|
172
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
173
|
+
|
|
174
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=ConjugateGradient)
|
|
175
|
+
svr.fit(X_train, y_train)
|
|
176
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
177
|
+
|
|
178
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=Newton)
|
|
179
|
+
svr.fit(X_train, y_train)
|
|
180
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
181
|
+
|
|
182
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=BFGS)
|
|
183
|
+
svr.fit(X_train, y_train)
|
|
184
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
185
|
+
|
|
186
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=LBFGS)
|
|
187
|
+
svr.fit(X_train, y_train)
|
|
188
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_solve_primal_l2_svr_with_stochastic_optimizers():
|
|
192
|
+
X, y = load_boston(return_X_y=True)
|
|
193
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
194
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
195
|
+
|
|
196
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=StochasticGradientDescent)
|
|
197
|
+
svr.fit(X_train, y_train)
|
|
198
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
199
|
+
|
|
200
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=Adam)
|
|
201
|
+
svr.fit(X_train, y_train)
|
|
202
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
203
|
+
|
|
204
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=AMSGrad)
|
|
205
|
+
svr.fit(X_train, y_train)
|
|
206
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
207
|
+
|
|
208
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=AdaMax, max_iter=3000)
|
|
209
|
+
svr.fit(X_train, y_train)
|
|
210
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
211
|
+
|
|
212
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=AdaGrad, learning_rate=1.)
|
|
213
|
+
svr.fit(X_train, y_train)
|
|
214
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
215
|
+
|
|
216
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=AdaDelta, learning_rate=1., max_iter=5000)
|
|
217
|
+
svr.fit(X_train, y_train)
|
|
218
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
219
|
+
|
|
220
|
+
svr = SVR(loss=squared_epsilon_insensitive, optimizer=RMSProp)
|
|
221
|
+
svr.fit(X_train, y_train)
|
|
222
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def test_solve_dual_l2_svr_with_cvxopt():
|
|
226
|
+
X, y = load_boston(return_X_y=True)
|
|
227
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
228
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
229
|
+
|
|
230
|
+
svr = SVR(loss=squared_epsilon_insensitive, kernel=linear, reg_intercept=True, dual=True, optimizer='cvxopt')
|
|
231
|
+
svr.fit(X_train, y_train)
|
|
232
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
233
|
+
|
|
234
|
+
svr = SVR(loss=squared_epsilon_insensitive, kernel=linear, reg_intercept=False, dual=True, optimizer='cvxopt')
|
|
235
|
+
svr.fit(X_train, y_train)
|
|
236
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def test_solve_dual_l2_svr_with_AdaGrad():
|
|
240
|
+
X, y = load_boston(return_X_y=True)
|
|
241
|
+
X_scaled = StandardScaler().fit_transform(X)
|
|
242
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
|
|
243
|
+
|
|
244
|
+
svr = SVR(loss=squared_epsilon_insensitive, kernel=linear, reg_intercept=True,
|
|
245
|
+
dual=True, optimizer=AdaGrad, learning_rate=1.)
|
|
246
|
+
svr.fit(X_train, y_train)
|
|
247
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
248
|
+
|
|
249
|
+
svr = SVR(loss=squared_epsilon_insensitive, kernel=linear, reg_intercept=False,
|
|
250
|
+
dual=True, optimizer=AdaGrad, learning_rate=1.)
|
|
251
|
+
svr.fit(X_train, y_train)
|
|
252
|
+
assert svr.score(X_test, y_test) >= 0.67
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
if __name__ == "__main__":
|
|
256
|
+
pytest.main()
|
optiml/ml/utils.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import matplotlib.pyplot as plt
|
|
2
|
+
import numpy as np
|
|
3
|
+
from matplotlib.lines import Line2D
|
|
4
|
+
from sklearn.base import ClassifierMixin, RegressorMixin
|
|
5
|
+
from sklearn.model_selection import learning_curve, validation_curve
|
|
6
|
+
from sklearn.svm import LinearSVC as SKLinearSVC
|
|
7
|
+
from sklearn.svm import LinearSVR as SKLinearSVR
|
|
8
|
+
from sklearn.svm import SVC as SKLSVC
|
|
9
|
+
from sklearn.svm import SVR as SKLSVR
|
|
10
|
+
from sklearn.utils.multiclass import unique_labels
|
|
11
|
+
|
|
12
|
+
from .svm import SVM, SVC, SVR
|
|
13
|
+
from .svm.kernels import Kernel
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def moving_average(interval, window_size):
|
|
17
|
+
window = np.ones(int(window_size)) / window_size
|
|
18
|
+
return np.convolve(interval, window, 'same')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# data generators
|
|
22
|
+
|
|
23
|
+
def generate_linearly_separable_data(size=100, random_state=None):
|
|
24
|
+
rs = np.random.RandomState(random_state)
|
|
25
|
+
mean1 = np.array([0, 2])
|
|
26
|
+
mean2 = np.array([2, 0])
|
|
27
|
+
cov = np.array([[0.8, 0.6], [0.6, 0.8]])
|
|
28
|
+
X1 = rs.multivariate_normal(mean1, cov, size)
|
|
29
|
+
y1 = np.ones(len(X1))
|
|
30
|
+
X2 = rs.multivariate_normal(mean2, cov, size)
|
|
31
|
+
y2 = -np.ones(len(X2))
|
|
32
|
+
return np.vstack((X1, X2)), np.hstack((y1, y2))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def generate_linearly_separable_overlap_data(size=100, random_state=None):
|
|
36
|
+
rs = np.random.RandomState(random_state)
|
|
37
|
+
mean1 = np.array([0, 2])
|
|
38
|
+
mean2 = np.array([2, 0])
|
|
39
|
+
cov = np.array([[1.5, 1.0], [1.0, 1.5]])
|
|
40
|
+
X1 = rs.multivariate_normal(mean1, cov, size)
|
|
41
|
+
y1 = np.ones(len(X1))
|
|
42
|
+
X2 = rs.multivariate_normal(mean2, cov, size)
|
|
43
|
+
y2 = -np.ones(len(X2))
|
|
44
|
+
return np.vstack((X1, X2)), np.hstack((y1, y2))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def generate_nonlinearly_separable_data(size=100, random_state=None):
|
|
48
|
+
rs = np.random.RandomState(random_state)
|
|
49
|
+
mean1 = [-1, 2]
|
|
50
|
+
mean2 = [1, -1]
|
|
51
|
+
mean3 = [4, -4]
|
|
52
|
+
mean4 = [-4, 4]
|
|
53
|
+
cov = [[1.0, 0.8], [0.8, 1.0]]
|
|
54
|
+
X1 = rs.multivariate_normal(mean1, cov, size)
|
|
55
|
+
X1 = np.vstack((X1, rs.multivariate_normal(mean3, cov, size)))
|
|
56
|
+
y1 = np.ones(len(X1))
|
|
57
|
+
X2 = rs.multivariate_normal(mean2, cov, size)
|
|
58
|
+
X2 = np.vstack((X2, rs.multivariate_normal(mean4, cov, size)))
|
|
59
|
+
y2 = -np.ones(len(X2))
|
|
60
|
+
return np.vstack((X1, X2)), np.hstack((y1, y2))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def generate_nonlinearly_regression_data(size=100, random_state=None):
|
|
64
|
+
rs = np.random.RandomState(random_state)
|
|
65
|
+
X = np.sort(2 * np.pi * rs.uniform(size=size))
|
|
66
|
+
y = np.sin(X)
|
|
67
|
+
y += 0.25 * (0.5 - rs.uniform(size=size)) # noise
|
|
68
|
+
return X.reshape(-1, 1), y
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def generate_centred_and_normalized_regression_data(size=100, random_state=None):
|
|
72
|
+
rs = np.random.RandomState(random_state)
|
|
73
|
+
# generating sine curve and uniform noise
|
|
74
|
+
X = np.linspace(0, 1, size)
|
|
75
|
+
noise = 1 * rs.uniform(size=size)
|
|
76
|
+
y = np.sin(X * 1.5 * np.pi)
|
|
77
|
+
y += noise
|
|
78
|
+
# centering the y data to avoid fit the intercept
|
|
79
|
+
y -= y.mean()
|
|
80
|
+
# design matrix is 2x, x^2
|
|
81
|
+
X = np.vstack((2 * X, X ** 2)).T
|
|
82
|
+
# normalizing the design matrix to facilitate visualization
|
|
83
|
+
X = X / np.linalg.norm(X, axis=0)
|
|
84
|
+
return X, y
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# plot functions
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def plot_svm_hyperplane(svm, X, y):
|
|
91
|
+
ax = plt.axes(facecolor='#E6E6E6') # gray background
|
|
92
|
+
plt.grid(color='w', linestyle='solid') # draw solid white grid lines
|
|
93
|
+
ax.set_axisbelow(True)
|
|
94
|
+
# hide top and right ticks
|
|
95
|
+
ax.xaxis.tick_bottom()
|
|
96
|
+
ax.yaxis.tick_left()
|
|
97
|
+
# hide axis spines
|
|
98
|
+
for spine in ax.spines.values():
|
|
99
|
+
spine.set_visible(False)
|
|
100
|
+
|
|
101
|
+
# axis labels and limits
|
|
102
|
+
if isinstance(svm, ClassifierMixin):
|
|
103
|
+
labels = unique_labels(y)
|
|
104
|
+
X1, X2 = X[y == labels[0]], X[y == labels[1]]
|
|
105
|
+
plt.xlabel('$x_1$', fontsize=9)
|
|
106
|
+
plt.ylabel('$x_2$', fontsize=9)
|
|
107
|
+
ax.set(xlim=(X1.min(), X1.max()), ylim=(X2.min(), X2.max()))
|
|
108
|
+
elif isinstance(svm, RegressorMixin):
|
|
109
|
+
plt.xlabel('$X$', fontsize=9)
|
|
110
|
+
plt.ylabel('$y$', fontsize=9)
|
|
111
|
+
|
|
112
|
+
kernel = ('' if (isinstance(svm, SVM) and not svm.dual or
|
|
113
|
+
isinstance(svm, SKLinearSVC) or isinstance(svm, SKLinearSVR)) else
|
|
114
|
+
'using ' + (svm.kernel + ' kernel' if isinstance(svm.kernel, str) else
|
|
115
|
+
svm.kernel.__class__.__name__ if isinstance(svm.kernel, Kernel) else svm.kernel.__name__))
|
|
116
|
+
plt.title(f'{"" if isinstance(svm, SVM) else "sklearn"} {svm.__class__.__name__} {kernel}', fontsize=9)
|
|
117
|
+
|
|
118
|
+
# set the legend
|
|
119
|
+
if isinstance(svm, ClassifierMixin):
|
|
120
|
+
plt.legend([Line2D([0], [0], linestyle='none', marker='x', color='lightblue',
|
|
121
|
+
markerfacecolor='lightblue', markersize=9),
|
|
122
|
+
Line2D([0], [0], linestyle='none', marker='o', color='darkorange',
|
|
123
|
+
markerfacecolor='darkorange', markersize=9),
|
|
124
|
+
Line2D([0], [0], linestyle='-', marker='.', color='black',
|
|
125
|
+
markerfacecolor='darkorange', markersize=0),
|
|
126
|
+
Line2D([0], [0], linestyle='--', marker='.', color='black',
|
|
127
|
+
markerfacecolor='darkorange', markersize=0),
|
|
128
|
+
Line2D([0], [0], linestyle='none', marker='.', color='navy',
|
|
129
|
+
markerfacecolor='navy', markersize=9)],
|
|
130
|
+
['negative -1', 'positive +1', 'decision boundary', 'margin', 'support vectors'],
|
|
131
|
+
fontsize='7', shadow=True).get_frame().set_facecolor('white')
|
|
132
|
+
elif isinstance(svm, RegressorMixin):
|
|
133
|
+
plt.legend([Line2D([0], [0], linestyle='none', marker='o', color='darkorange',
|
|
134
|
+
markerfacecolor='darkorange', markersize=9),
|
|
135
|
+
Line2D([0], [0], linestyle='-', marker='.', color='black',
|
|
136
|
+
markerfacecolor='darkorange', markersize=0),
|
|
137
|
+
Line2D([0], [0], linestyle='--', marker='.', color='black',
|
|
138
|
+
markerfacecolor='darkorange', markersize=0),
|
|
139
|
+
Line2D([0], [0], linestyle='none', marker='.', color='navy',
|
|
140
|
+
markerfacecolor='navy', markersize=9)],
|
|
141
|
+
['training data', 'decision boundary', r'$\epsilon$-insensitive tube', 'support vectors'],
|
|
142
|
+
fontsize='7', shadow=True).get_frame().set_facecolor('white')
|
|
143
|
+
|
|
144
|
+
# plot training data
|
|
145
|
+
if isinstance(svm, ClassifierMixin):
|
|
146
|
+
plt.plot(X1[:, 0], X1[:, 1], marker='x', markersize=5, color='lightblue', linestyle='none')
|
|
147
|
+
plt.plot(X2[:, 0], X2[:, 1], marker='o', markersize=4, color='darkorange', linestyle='none')
|
|
148
|
+
else:
|
|
149
|
+
plt.plot(X, y, marker='o', markersize=4, color='darkorange', linestyle='none')
|
|
150
|
+
|
|
151
|
+
# plot support vectors
|
|
152
|
+
if isinstance(svm, ClassifierMixin):
|
|
153
|
+
if isinstance(svm, SVC) and svm.dual or isinstance(svm, SKLSVC):
|
|
154
|
+
plt.scatter(X[svm.support_][:, 0], X[svm.support_][:, 1], s=60, color='navy')
|
|
155
|
+
elif isinstance(svm, SVC) and not svm.dual or isinstance(svm, SKLinearSVC):
|
|
156
|
+
support_ = np.argwhere(np.abs(svm.decision_function(X)) <= 1).ravel()
|
|
157
|
+
plt.scatter(X[support_][:, 0], X[support_][:, 1], s=60, color='navy')
|
|
158
|
+
elif isinstance(svm, RegressorMixin):
|
|
159
|
+
if isinstance(svm, SVR) and svm.dual or isinstance(svm, SKLSVR):
|
|
160
|
+
plt.scatter(X[svm.support_], y[svm.support_], s=60, color='navy')
|
|
161
|
+
elif isinstance(svm, SVR) and not svm.dual or isinstance(svm, SKLinearSVR):
|
|
162
|
+
support_ = np.argwhere(np.abs(y - svm.predict(X)) >= svm.epsilon).ravel()
|
|
163
|
+
plt.scatter(X[support_], y[support_], s=60, color='navy')
|
|
164
|
+
|
|
165
|
+
# plot boundaries
|
|
166
|
+
if isinstance(svm, ClassifierMixin):
|
|
167
|
+
_X1, _X2 = np.meshgrid(np.linspace(X1.min(), X1.max(), 50), np.linspace(X1.min(), X1.max(), 50))
|
|
168
|
+
X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(_X1), np.ravel(_X2))])
|
|
169
|
+
Z = svm.decision_function(X).reshape(_X1.shape)
|
|
170
|
+
plt.contour(_X1, _X2, Z, [0.0], colors='k', linewidths=1, origin='lower')
|
|
171
|
+
plt.contour(_X1, _X2, Z + 1, [0.0], colors='grey', linestyles='--', linewidths=1, origin='lower')
|
|
172
|
+
plt.contour(_X1, _X2, Z - 1, [0.0], colors='grey', linestyles='--', linewidths=1, origin='lower')
|
|
173
|
+
elif isinstance(svm, RegressorMixin):
|
|
174
|
+
_X = np.linspace(X.min(), X.max(), 1000).reshape(-1, 1)
|
|
175
|
+
Z = svm.predict(_X)
|
|
176
|
+
ax.plot(_X, Z, color='k', linewidth=1)
|
|
177
|
+
ax.plot(_X, Z + svm.epsilon, color='grey', linestyle='--', linewidth=1)
|
|
178
|
+
ax.plot(_X, Z - svm.epsilon, color='grey', linestyle='--', linewidth=1)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def plot_validation_curve(estimator, X, y, param_name, param_range, scorer, cv=5):
|
|
182
|
+
train_scores, test_scores = validation_curve(estimator, X, y, param_name=param_name, param_range=param_range,
|
|
183
|
+
cv=cv, scoring=scorer, n_jobs=-1)
|
|
184
|
+
|
|
185
|
+
mean_train_score = np.mean(train_scores, axis=1)
|
|
186
|
+
std_train_score = np.std(train_scores, axis=1)
|
|
187
|
+
mean_test_score = np.mean(test_scores, axis=1)
|
|
188
|
+
std_test_score = np.std(test_scores, axis=1)
|
|
189
|
+
|
|
190
|
+
plt.title('validation curve')
|
|
191
|
+
plt.xlabel(param_name)
|
|
192
|
+
plt.ylabel('score')
|
|
193
|
+
|
|
194
|
+
plt.plot(param_range, mean_train_score, label='training score', color='navy', marker='.', lw=2)
|
|
195
|
+
plt.fill_between(param_range, mean_train_score - std_train_score,
|
|
196
|
+
mean_train_score + std_train_score, alpha=0.2, color='navy')
|
|
197
|
+
plt.plot(param_range, mean_test_score, label='cross-validation score', color='darkorange', marker='.', lw=2)
|
|
198
|
+
plt.fill_between(param_range, mean_test_score - std_test_score,
|
|
199
|
+
mean_test_score + std_test_score, alpha=0.2, color='darkorange')
|
|
200
|
+
|
|
201
|
+
plt.legend().get_frame().set_facecolor('white')
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def plot_learning_curve(estimator, X, y, scorer, cv=5, train_sizes=np.linspace(.1, 1.0, 5),
|
|
205
|
+
shuffle=False, random_state=None):
|
|
206
|
+
train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, train_sizes=train_sizes, cv=cv,
|
|
207
|
+
scoring=scorer, n_jobs=-1, shuffle=shuffle,
|
|
208
|
+
random_state=random_state)
|
|
209
|
+
|
|
210
|
+
mean_train_score = np.mean(train_scores, axis=1)
|
|
211
|
+
std_train_score = np.std(train_scores, axis=1)
|
|
212
|
+
mean_test_score = np.mean(test_scores, axis=1)
|
|
213
|
+
std_test_score = np.std(test_scores, axis=1)
|
|
214
|
+
|
|
215
|
+
plt.title('learning curve')
|
|
216
|
+
plt.xlabel('training set size')
|
|
217
|
+
plt.ylabel('score')
|
|
218
|
+
|
|
219
|
+
plt.plot(train_sizes, mean_train_score, label='train score', color='navy', marker='.', lw=2)
|
|
220
|
+
plt.fill_between(train_sizes, mean_train_score + std_train_score,
|
|
221
|
+
mean_train_score - std_train_score, color='navy', alpha=0.2)
|
|
222
|
+
plt.plot(train_sizes, mean_test_score, label='cross-validation score', color='darkorange', marker='.', lw=2)
|
|
223
|
+
plt.fill_between(train_sizes, mean_test_score + std_test_score,
|
|
224
|
+
mean_test_score - std_test_score, color='darkorange', alpha=0.2)
|
|
225
|
+
|
|
226
|
+
plt.legend().get_frame().set_facecolor('white')
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def plot_model_loss(train_loss_history, val_loss_history=None):
|
|
230
|
+
if val_loss_history is None:
|
|
231
|
+
val_loss_history = []
|
|
232
|
+
|
|
233
|
+
fig, loss = plt.subplots()
|
|
234
|
+
loss.plot(train_loss_history, color='navy', lw=2)
|
|
235
|
+
loss.plot(val_loss_history, color='darkorange', lw=2)
|
|
236
|
+
loss.set_title('model loss')
|
|
237
|
+
loss.set_xlabel('epoch')
|
|
238
|
+
loss.set_ylabel('loss')
|
|
239
|
+
loss.legend(['training', 'validation']).get_frame().set_facecolor('white')
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def plot_model_accuracy(train_score_history, val_score_history=None):
|
|
243
|
+
if val_score_history is None:
|
|
244
|
+
val_score_history = []
|
|
245
|
+
|
|
246
|
+
fig, accuracy = plt.subplots()
|
|
247
|
+
accuracy.plot(train_score_history, color='navy', lw=2)
|
|
248
|
+
accuracy.plot(val_score_history, color='darkorange', lw=2)
|
|
249
|
+
accuracy.set_title('model accuracy')
|
|
250
|
+
accuracy.set_xlabel('epoch')
|
|
251
|
+
accuracy.set_ylabel('accuracy')
|
|
252
|
+
accuracy.legend(['training', 'validation']).get_frame().set_facecolor('white')
|
optiml/opti/__init__.py
ADDED