optiml 1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiml/__init__.py +0 -0
- optiml/ml/__init__.py +0 -0
- optiml/ml/neural_network/__init__.py +3 -0
- optiml/ml/neural_network/_base.py +475 -0
- optiml/ml/neural_network/activations.py +79 -0
- optiml/ml/neural_network/initializers.py +66 -0
- optiml/ml/neural_network/layers.py +183 -0
- optiml/ml/neural_network/losses.py +178 -0
- optiml/ml/neural_network/regularizers.py +87 -0
- optiml/ml/svm/__init__.py +3 -0
- optiml/ml/svm/_base.py +1442 -0
- optiml/ml/svm/kernels.py +208 -0
- optiml/ml/svm/losses.py +284 -0
- optiml/ml/svm/smo.py +797 -0
- optiml/ml/tests/__init__.py +0 -0
- optiml/ml/tests/_datasets.py +49 -0
- optiml/ml/tests/_utils.py +28 -0
- optiml/ml/tests/test_initializers.py +33 -0
- optiml/ml/tests/test_neural_network.py +86 -0
- optiml/ml/tests/test_svc.py +245 -0
- optiml/ml/tests/test_svr.py +256 -0
- optiml/ml/utils.py +252 -0
- optiml/opti/__init__.py +4 -0
- optiml/opti/_base.py +309 -0
- optiml/opti/constrained/__init__.py +9 -0
- optiml/opti/constrained/_base.py +404 -0
- optiml/opti/constrained/active_set.py +228 -0
- optiml/opti/constrained/frank_wolfe.py +158 -0
- optiml/opti/constrained/interior_point.py +282 -0
- optiml/opti/constrained/projected_gradient.py +138 -0
- optiml/opti/constrained/tests/__init__.py +0 -0
- optiml/opti/constrained/tests/test_active_set.py +16 -0
- optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
- optiml/opti/constrained/tests/test_interior_point.py +16 -0
- optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
- optiml/opti/constrained/tests/test_lower_bound.py +29 -0
- optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
- optiml/opti/unconstrained/__init__.py +6 -0
- optiml/opti/unconstrained/_base.py +63 -0
- optiml/opti/unconstrained/line_search/__init__.py +10 -0
- optiml/opti/unconstrained/line_search/_base.py +106 -0
- optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
- optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
- optiml/opti/unconstrained/line_search/line_search.py +248 -0
- optiml/opti/unconstrained/line_search/newton.py +198 -0
- optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
- optiml/opti/unconstrained/proximal_bundle.py +219 -0
- optiml/opti/unconstrained/stochastic/__init__.py +12 -0
- optiml/opti/unconstrained/stochastic/_base.py +246 -0
- optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
- optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
- optiml/opti/unconstrained/stochastic/adam.py +179 -0
- optiml/opti/unconstrained/stochastic/adamax.py +178 -0
- optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
- optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
- optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
- optiml/opti/unconstrained/stochastic/schedules.py +89 -0
- optiml/opti/unconstrained/tests/__init__.py +0 -0
- optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
- optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
- optiml/opti/unconstrained/tests/test_adam.py +42 -0
- optiml/opti/unconstrained/tests/test_adamax.py +41 -0
- optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
- optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
- optiml/opti/unconstrained/tests/test_functions.py +34 -0
- optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
- optiml/opti/unconstrained/tests/test_newton.py +20 -0
- optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
- optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
- optiml/opti/unconstrained/tests/test_verbose.py +25 -0
- optiml/opti/utils.py +353 -0
- optiml-1.7.dist-info/METADATA +203 -0
- optiml-1.7.dist-info/RECORD +76 -0
- optiml-1.7.dist-info/WHEEL +5 -0
- optiml-1.7.dist-info/licenses/LICENSE +21 -0
- optiml-1.7.dist-info/top_level.txt +1 -0
optiml/ml/svm/_base.py
ADDED
|
@@ -0,0 +1,1442 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import warnings
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from io import StringIO
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from qpsolvers import solve_qp
|
|
8
|
+
from sklearn.base import ClassifierMixin, BaseEstimator, RegressorMixin
|
|
9
|
+
from sklearn.exceptions import ConvergenceWarning
|
|
10
|
+
from sklearn.model_selection import train_test_split
|
|
11
|
+
from sklearn.preprocessing import LabelBinarizer
|
|
12
|
+
from wurlitzer import pipes, STDOUT
|
|
13
|
+
|
|
14
|
+
from .kernels import gaussian, Kernel, LinearKernel
|
|
15
|
+
from .losses import (squared_hinge, squared_epsilon_insensitive,
|
|
16
|
+
Hinge, SquaredHinge, EpsilonInsensitive, SquaredEpsilonInsensitive)
|
|
17
|
+
from .smo import SMO, SMOClassifier, SMORegression
|
|
18
|
+
from ...opti import Optimizer
|
|
19
|
+
from ...opti import Quadratic
|
|
20
|
+
from ...opti.constrained import BoxConstrainedQuadraticOptimizer, AugmentedLagrangianQuadratic
|
|
21
|
+
from ...opti.unconstrained import ProximalBundle
|
|
22
|
+
from ...opti.unconstrained.line_search import LineSearchOptimizer
|
|
23
|
+
from ...opti.unconstrained.stochastic import StochasticOptimizer, StochasticMomentumOptimizer, StochasticGradientDescent
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SVM(BaseEstimator, ABC):
|
|
27
|
+
"""
|
|
28
|
+
Base abstract class for all SVM-type estimator.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
|
|
33
|
+
loss : `SVMLoss` instance, default=None
|
|
34
|
+
Specifies the loss function.
|
|
35
|
+
|
|
36
|
+
kernel : `Kernel` instance like {linear, poly, gaussian, sigmoid}, default=gaussian
|
|
37
|
+
Specifies the kernel type to be used in the algorithm.
|
|
38
|
+
|
|
39
|
+
C : float, default=1
|
|
40
|
+
Regularization parameter. The strength of the regularization is
|
|
41
|
+
inversely proportional to C. Must be strictly positive.
|
|
42
|
+
|
|
43
|
+
rho: int, default=1
|
|
44
|
+
Rho parameter for the augmented term of the Lagrangian method.
|
|
45
|
+
Must be strictly positive.
|
|
46
|
+
|
|
47
|
+
mu : float, default=1
|
|
48
|
+
Mu parameter for the proximal bundle method.
|
|
49
|
+
Only used when ``optimizer`` is `ProximalBundle`.
|
|
50
|
+
Must be strictly positive.
|
|
51
|
+
|
|
52
|
+
fit_intercept : bool, default=True
|
|
53
|
+
Whether to calculate the intercept for this model. If set
|
|
54
|
+
to False, no intercept will be used in calculations
|
|
55
|
+
(i.e., data is expected to be already centered).
|
|
56
|
+
|
|
57
|
+
intercept_scaling : float, default=1
|
|
58
|
+
When ``fit_intercept`` is True, instance vector x becomes
|
|
59
|
+
[x, intercept_scaling], i.e., a "synthetic" feature with constant
|
|
60
|
+
value equals to ``intercept_scaling`` is appended to the instance vector.
|
|
61
|
+
The intercept becomes intercept_scaling * synthetic feature weight
|
|
62
|
+
Note: the synthetic feature weight is subject to L1/L2 regularization
|
|
63
|
+
as all other features. To lessen the effect of regularization on synthetic
|
|
64
|
+
feature weight (and therefore on the intercept) ``intercept_scaling`` has
|
|
65
|
+
to be increased.
|
|
66
|
+
|
|
67
|
+
reg_intercept : bool, default=False
|
|
68
|
+
Whether to include the intercept in the regularization term.
|
|
69
|
+
|
|
70
|
+
dual : bool, default=False
|
|
71
|
+
Select the algorithm to either solve the dual or primal optimization problem.
|
|
72
|
+
Prefer ``dual=False`` when n_samples > n_features and the instance
|
|
73
|
+
vector is linearly separable in the given space or, if not, consider
|
|
74
|
+
the possibly to apply a non-linear transformation of the instance vector
|
|
75
|
+
using a low-rank kernel matrix approximation, i.e., Nystrom, before training.
|
|
76
|
+
See more at:
|
|
77
|
+
- https://scikit-learn.org/stable/modules/classes.html#module-sklearn.kernel_approximation
|
|
78
|
+
- https://cdn.rawgit.com/mstrazar/mklaren/master/docs/build/html/projection.html
|
|
79
|
+
|
|
80
|
+
optimizer : LineSearchOptimizer or StochasticOptimizer subclass, default=StochasticGradientDescent
|
|
81
|
+
The solver for optimization. It can be a subclass of the `LineSearchOptimizer`
|
|
82
|
+
which can converge faster and perform better for small datasets, e.g., the
|
|
83
|
+
`BFGS` quasi-Newton method or, alternatively, a subclass of the `StochasticOptimizer`
|
|
84
|
+
e.g., the `StochasticGradientDescent` or `Adam`, which works well on relatively
|
|
85
|
+
large datasets (with thousands of training samples or more) in terms of both
|
|
86
|
+
training time and validation score.
|
|
87
|
+
|
|
88
|
+
master_solver : string, default='clarabel'
|
|
89
|
+
Master solver for the proximal bundle method for the CVXPY interface.
|
|
90
|
+
Only used when ``optimizer`` is `ProximalBundle`.
|
|
91
|
+
|
|
92
|
+
learning_rate : 'auto' or double, default='auto'
|
|
93
|
+
The initial learning rate used for weight update. It controls the
|
|
94
|
+
step-size in updating the weights. Only used when ``optimizer`` is a
|
|
95
|
+
subclass of `StochasticOptimizer`.
|
|
96
|
+
If 'auto', 1/L is used where L is the Lipschitz constant.
|
|
97
|
+
|
|
98
|
+
momentum_type : {'none', 'polyak', 'nesterov'}, default='none'
|
|
99
|
+
Momentum type used for weight update. Only used when ``optimizer`` is
|
|
100
|
+
a subclass of `StochasticOptimizer`.
|
|
101
|
+
|
|
102
|
+
momentum : float, default=0.9
|
|
103
|
+
Momentum for weight update. Should be between 0 and 1. Only used when
|
|
104
|
+
``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
105
|
+
|
|
106
|
+
max_iter : int, default=1000
|
|
107
|
+
Maximum number of iterations. The solver iterates until convergence
|
|
108
|
+
(determined by ``tol``) or this number of iterations. If the optimizer
|
|
109
|
+
is a subclass of `StochasticOptimizer`, this value determines the number
|
|
110
|
+
of epochs (how many times each data point will be used), not the number
|
|
111
|
+
of gradient steps.
|
|
112
|
+
|
|
113
|
+
max_f_eval : int, default=15000
|
|
114
|
+
Only used when ``optimizer`` is a subclass of `LineSearchOptimizer`.
|
|
115
|
+
Maximum number of loss function calls. The solver iterates until
|
|
116
|
+
convergence (determined by ``tol``), number of iterations reaches
|
|
117
|
+
``max_iter``, or this number of loss function calls. Note that number
|
|
118
|
+
of loss function calls will be greater than or equal to the number
|
|
119
|
+
of iterations.
|
|
120
|
+
|
|
121
|
+
tol : float, default=1e-4
|
|
122
|
+
Tolerance for stopping criterion.
|
|
123
|
+
|
|
124
|
+
batch_size : int, default=None
|
|
125
|
+
Size of mini batches for stochastic optimizers.
|
|
126
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
127
|
+
|
|
128
|
+
shuffle : bool, default=True
|
|
129
|
+
Whether to shuffle samples for batch sampling in each iteration. Only
|
|
130
|
+
used when the ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
131
|
+
|
|
132
|
+
random_state : int, RandomState instance or None, default=None
|
|
133
|
+
Controls the pseudo random number generation for train-test split if
|
|
134
|
+
``early_stopping`` is True and shuffling the data for batch sampling when
|
|
135
|
+
an instance of `StochasticOptimizer` class is used as ``optimizer`` value.
|
|
136
|
+
Pass an int for reproducible output across multiple function calls.
|
|
137
|
+
|
|
138
|
+
early_stopping : bool, default=False
|
|
139
|
+
Whether to use early stopping to terminate training. If set to True
|
|
140
|
+
and ``validation_split`` is greater than 0, it will automatically set
|
|
141
|
+
aside ``validation_split``% of training data as validation and terminate
|
|
142
|
+
training when validation score is not improving by at least ``tol`` for
|
|
143
|
+
``patience`` consecutive epochs, otherwise terminate training when train
|
|
144
|
+
loss does not improve by more than ``tol`` for ``patience`` consecutive
|
|
145
|
+
passes over the training set.
|
|
146
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
147
|
+
|
|
148
|
+
validation_split : float, default=0.
|
|
149
|
+
The proportion of training data to set aside as validation set for
|
|
150
|
+
early stopping. Must be between 0 and 1.
|
|
151
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`
|
|
152
|
+
and ``early_stopping`` is True.
|
|
153
|
+
|
|
154
|
+
patience : int, default=5
|
|
155
|
+
Maximum number of epochs to not meet ``tol`` improvement.
|
|
156
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
157
|
+
|
|
158
|
+
verbose : bool or int, default=False
|
|
159
|
+
Controls the verbosity of progress messages to stdout. Use a boolean value
|
|
160
|
+
to switch on/off or an int value to show progress each ``verbose`` time
|
|
161
|
+
optimization steps.
|
|
162
|
+
|
|
163
|
+
master_verbose : bool or int, default=False
|
|
164
|
+
Controls the verbosity of the CVXPY interface.
|
|
165
|
+
Only used when ``optimizer`` is `ProximalBundle`.
|
|
166
|
+
|
|
167
|
+
Attributes
|
|
168
|
+
----------
|
|
169
|
+
|
|
170
|
+
coef_ : ndarray of shape (n_features,)
|
|
171
|
+
Weights assigned to the features (coefficients in the primal problem).
|
|
172
|
+
This is only available in the case of a linear kernel.
|
|
173
|
+
|
|
174
|
+
dual_coef_ : ndarray of shape (n_SV,)
|
|
175
|
+
Coefficients of the support vector in the decision function.
|
|
176
|
+
|
|
177
|
+
intercept_ : float
|
|
178
|
+
Constants in decision function.
|
|
179
|
+
|
|
180
|
+
support_ : ndarray of shape (n_SV,)
|
|
181
|
+
Indices of support vectors.
|
|
182
|
+
|
|
183
|
+
support_vectors_ : ndarray of shape (n_SV, n_features)
|
|
184
|
+
Support vectors.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def __init__(self,
|
|
188
|
+
loss=None,
|
|
189
|
+
kernel=gaussian,
|
|
190
|
+
C=1,
|
|
191
|
+
rho=1,
|
|
192
|
+
mu=1,
|
|
193
|
+
fit_intercept=True,
|
|
194
|
+
intercept_scaling=1,
|
|
195
|
+
reg_intercept=False,
|
|
196
|
+
dual=False,
|
|
197
|
+
optimizer=StochasticGradientDescent,
|
|
198
|
+
master_solver='clarabel',
|
|
199
|
+
learning_rate='auto',
|
|
200
|
+
momentum_type='none',
|
|
201
|
+
momentum=0.9,
|
|
202
|
+
max_iter=1000,
|
|
203
|
+
max_f_eval=15000,
|
|
204
|
+
tol=1e-4,
|
|
205
|
+
batch_size=None,
|
|
206
|
+
shuffle=True,
|
|
207
|
+
random_state=None,
|
|
208
|
+
early_stopping=False,
|
|
209
|
+
validation_split=0.,
|
|
210
|
+
patience=5,
|
|
211
|
+
verbose=False,
|
|
212
|
+
master_verbose=False):
|
|
213
|
+
self.loss = loss
|
|
214
|
+
if not isinstance(kernel, Kernel):
|
|
215
|
+
raise TypeError(f'{kernel} is not an allowed kernel function')
|
|
216
|
+
self.kernel = kernel
|
|
217
|
+
if not C > 0:
|
|
218
|
+
raise ValueError('C must be > 0')
|
|
219
|
+
self.C = C
|
|
220
|
+
if not rho > 0:
|
|
221
|
+
raise ValueError('rho must be > 0')
|
|
222
|
+
self.rho = rho
|
|
223
|
+
if not mu > 0:
|
|
224
|
+
raise ValueError('mu must be > 0')
|
|
225
|
+
self.mu = mu
|
|
226
|
+
if not isinstance(fit_intercept, bool):
|
|
227
|
+
raise ValueError('fit_intercept mu be a boolean value')
|
|
228
|
+
self.fit_intercept = fit_intercept
|
|
229
|
+
self.intercept_scaling = intercept_scaling
|
|
230
|
+
if not isinstance(reg_intercept, bool):
|
|
231
|
+
raise ValueError('reg_intercept mu be a boolean value')
|
|
232
|
+
self.reg_intercept = reg_intercept
|
|
233
|
+
if not isinstance(dual, bool):
|
|
234
|
+
raise ValueError('dual must be a boolean value')
|
|
235
|
+
self.dual = dual
|
|
236
|
+
if ((self.dual and not (isinstance(optimizer, str) or
|
|
237
|
+
not issubclass(optimizer, SMO) or
|
|
238
|
+
not issubclass(optimizer, Optimizer))) or
|
|
239
|
+
(not self.dual and not issubclass(optimizer, Optimizer))):
|
|
240
|
+
raise TypeError(f'{optimizer} is not an allowed optimization method')
|
|
241
|
+
self.optimizer = optimizer
|
|
242
|
+
self.master_solver = master_solver
|
|
243
|
+
self.learning_rate = learning_rate
|
|
244
|
+
self.max_iter = max_iter
|
|
245
|
+
self.max_f_eval = max_f_eval
|
|
246
|
+
self.momentum_type = momentum_type
|
|
247
|
+
self.momentum = momentum
|
|
248
|
+
if not tol > 0:
|
|
249
|
+
raise ValueError('tol must be > 0')
|
|
250
|
+
self.tol = tol
|
|
251
|
+
self.batch_size = batch_size
|
|
252
|
+
self.shuffle = shuffle
|
|
253
|
+
self.random_state = random_state
|
|
254
|
+
self.early_stopping = early_stopping
|
|
255
|
+
self.validation_split = validation_split
|
|
256
|
+
self.patience = patience
|
|
257
|
+
self.verbose = verbose
|
|
258
|
+
self.master_verbose = master_verbose
|
|
259
|
+
if (not self.dual or
|
|
260
|
+
(self.dual and isinstance(self.kernel, LinearKernel))):
|
|
261
|
+
self.coef_ = np.zeros(0)
|
|
262
|
+
self.intercept_ = 0.
|
|
263
|
+
self.support_ = np.zeros(0)
|
|
264
|
+
self.support_vectors_ = np.zeros(0)
|
|
265
|
+
if self.dual:
|
|
266
|
+
self.alphas_ = np.zeros(0)
|
|
267
|
+
self.dual_coef_ = np.zeros(0)
|
|
268
|
+
if not isinstance(optimizer, str):
|
|
269
|
+
self.train_loss_history = []
|
|
270
|
+
if not self.dual and issubclass(self.optimizer, StochasticOptimizer):
|
|
271
|
+
self.train_score_history = []
|
|
272
|
+
self._no_improvement_count = 0
|
|
273
|
+
self._avg_epoch_loss = 0
|
|
274
|
+
if self.validation_split:
|
|
275
|
+
self.val_loss_history = []
|
|
276
|
+
self.val_score_history = []
|
|
277
|
+
self.best_val_score = -np.inf
|
|
278
|
+
else:
|
|
279
|
+
self.best_loss = np.inf
|
|
280
|
+
|
|
281
|
+
def fit(self, X, y):
|
|
282
|
+
raise NotImplementedError
|
|
283
|
+
|
|
284
|
+
def decision_function(self, X):
|
|
285
|
+
if self.dual and not isinstance(self.kernel, LinearKernel):
|
|
286
|
+
return np.dot(self.dual_coef_, self.kernel(self.support_vectors_, X)) + self.intercept_
|
|
287
|
+
return np.dot(X, self.coef_) + self.intercept_
|
|
288
|
+
|
|
289
|
+
def _store_train_info(self, opt):
|
|
290
|
+
if opt.is_lagrangian_dual():
|
|
291
|
+
self.train_loss_history.append(opt.primal_f_x)
|
|
292
|
+
else:
|
|
293
|
+
self.train_loss_history.append(opt.f_x)
|
|
294
|
+
|
|
295
|
+
def _unpack(self, packed_coef_inter):
|
|
296
|
+
if self.fit_intercept:
|
|
297
|
+
self.coef_, self.intercept_ = packed_coef_inter[:-1], packed_coef_inter[-1]
|
|
298
|
+
else:
|
|
299
|
+
self.coef_ = packed_coef_inter
|
|
300
|
+
|
|
301
|
+
def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
|
|
302
|
+
self._unpack(opt.x)
|
|
303
|
+
self._avg_epoch_loss += opt.f_x * X_batch.shape[0]
|
|
304
|
+
if opt.is_batch_end():
|
|
305
|
+
self._avg_epoch_loss /= opt.f.X.shape[0] # n_samples
|
|
306
|
+
self.train_loss_history.append(self._avg_epoch_loss)
|
|
307
|
+
if opt.is_verbose() and opt.epoch != opt.iter:
|
|
308
|
+
print('\tavg_loss: {: 1.4e}'.format(self._avg_epoch_loss), end='')
|
|
309
|
+
self._avg_epoch_loss = 0.
|
|
310
|
+
if self.validation_split:
|
|
311
|
+
val_loss = self.loss(opt.x, X_val, y_val)
|
|
312
|
+
self.val_loss_history.append(val_loss)
|
|
313
|
+
if opt.is_verbose():
|
|
314
|
+
print('\tval_loss: {: 1.4e}'.format(val_loss), end='')
|
|
315
|
+
|
|
316
|
+
def _update_no_improvement_count(self, opt):
|
|
317
|
+
if self.early_stopping:
|
|
318
|
+
|
|
319
|
+
if self.validation_split: # monitor val_score
|
|
320
|
+
|
|
321
|
+
if self.val_score_history[-1] < self.best_val_score + self.tol:
|
|
322
|
+
self._no_improvement_count += 1
|
|
323
|
+
else:
|
|
324
|
+
self._no_improvement_count = 0
|
|
325
|
+
if self.val_score_history[-1] > self.best_val_score:
|
|
326
|
+
self.best_val_score = self.val_score_history[-1]
|
|
327
|
+
self._best_coef = self.coef_.copy()
|
|
328
|
+
|
|
329
|
+
else: # monitor train_loss
|
|
330
|
+
|
|
331
|
+
if self.train_loss_history[-1] > self.best_loss - self.tol:
|
|
332
|
+
self._no_improvement_count += 1
|
|
333
|
+
else:
|
|
334
|
+
self._no_improvement_count = 0
|
|
335
|
+
if self.train_loss_history[-1] < self.best_loss:
|
|
336
|
+
self.best_loss = self.train_loss_history[-1]
|
|
337
|
+
|
|
338
|
+
if self._no_improvement_count >= self.patience:
|
|
339
|
+
|
|
340
|
+
if self.validation_split:
|
|
341
|
+
opt.x = self._best_coef
|
|
342
|
+
|
|
343
|
+
if self.verbose:
|
|
344
|
+
if self.validation_split:
|
|
345
|
+
print(f'\ntraining stopped since validation score did not improve more than '
|
|
346
|
+
f'tol={self.tol} for {self.patience} consecutive epochs')
|
|
347
|
+
else:
|
|
348
|
+
print('\ntraining stopped since training loss did not improve more than '
|
|
349
|
+
f'tol={self.tol} for {self.patience} consecutive epochs')
|
|
350
|
+
|
|
351
|
+
raise StopIteration
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class SVC(ClassifierMixin, SVM):
|
|
355
|
+
"""
|
|
356
|
+
C-Support Vector Classification.
|
|
357
|
+
|
|
358
|
+
Parameters
|
|
359
|
+
----------
|
|
360
|
+
|
|
361
|
+
loss : `SVMLoss` instance like {hinge, squared_hinge}, default='squared_hinge'
|
|
362
|
+
Specifies the loss function. The hinge loss is the L1 loss, while the
|
|
363
|
+
squared hinge loss is the L2 loss.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
def __init__(self,
|
|
367
|
+
loss=squared_hinge,
|
|
368
|
+
kernel=gaussian,
|
|
369
|
+
C=1,
|
|
370
|
+
rho=1,
|
|
371
|
+
mu=1,
|
|
372
|
+
fit_intercept=True,
|
|
373
|
+
intercept_scaling=1,
|
|
374
|
+
reg_intercept=False,
|
|
375
|
+
dual=False,
|
|
376
|
+
optimizer=StochasticGradientDescent,
|
|
377
|
+
master_solver='clarabel',
|
|
378
|
+
learning_rate='auto',
|
|
379
|
+
momentum_type='none',
|
|
380
|
+
momentum=0.9,
|
|
381
|
+
max_iter=1000,
|
|
382
|
+
max_f_eval=15000,
|
|
383
|
+
tol=1e-4,
|
|
384
|
+
batch_size=None,
|
|
385
|
+
shuffle=True,
|
|
386
|
+
random_state=None,
|
|
387
|
+
early_stopping=False,
|
|
388
|
+
validation_split=0.,
|
|
389
|
+
patience=5,
|
|
390
|
+
verbose=False,
|
|
391
|
+
master_verbose=False):
|
|
392
|
+
super(SVC, self).__init__(loss=loss,
|
|
393
|
+
kernel=kernel,
|
|
394
|
+
C=C,
|
|
395
|
+
rho=rho,
|
|
396
|
+
mu=mu,
|
|
397
|
+
fit_intercept=fit_intercept,
|
|
398
|
+
intercept_scaling=intercept_scaling,
|
|
399
|
+
reg_intercept=reg_intercept,
|
|
400
|
+
dual=dual,
|
|
401
|
+
optimizer=optimizer,
|
|
402
|
+
master_solver=master_solver,
|
|
403
|
+
learning_rate=learning_rate,
|
|
404
|
+
momentum_type=momentum_type,
|
|
405
|
+
momentum=momentum,
|
|
406
|
+
max_iter=max_iter,
|
|
407
|
+
max_f_eval=max_f_eval,
|
|
408
|
+
tol=tol,
|
|
409
|
+
batch_size=batch_size,
|
|
410
|
+
shuffle=shuffle,
|
|
411
|
+
random_state=random_state,
|
|
412
|
+
early_stopping=early_stopping,
|
|
413
|
+
validation_split=validation_split,
|
|
414
|
+
patience=patience,
|
|
415
|
+
verbose=verbose,
|
|
416
|
+
master_verbose=master_verbose)
|
|
417
|
+
if not loss._loss_type == 'classifier':
|
|
418
|
+
raise TypeError(f'{loss} is not an allowed SVC loss function')
|
|
419
|
+
self.lb = LabelBinarizer(neg_label=-1)
|
|
420
|
+
|
|
421
|
+
def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
|
|
422
|
+
super(SVC, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
|
|
423
|
+
if opt.is_batch_end():
|
|
424
|
+
acc = self.score(X_batch[:, :-1] if self.fit_intercept else X_batch, y_batch)
|
|
425
|
+
self.train_score_history.append(acc)
|
|
426
|
+
if opt.is_verbose():
|
|
427
|
+
print('\tacc: {:1.4f}'.format(acc), end='')
|
|
428
|
+
if self.validation_split:
|
|
429
|
+
val_acc = self.score(X_val[:, :-1] if self.fit_intercept else X_val, y_val)
|
|
430
|
+
self.val_score_history.append(val_acc)
|
|
431
|
+
if opt.is_verbose():
|
|
432
|
+
print('\tval_acc: {:1.4f}'.format(val_acc), end='')
|
|
433
|
+
self._update_no_improvement_count(opt)
|
|
434
|
+
|
|
435
|
+
def fit(self, X, y):
|
|
436
|
+
self.lb.fit(y)
|
|
437
|
+
if len(self.lb.classes_) > 2:
|
|
438
|
+
raise ValueError('use OneVsOneClassifier or OneVsRestClassifier from sklearn.multiclass '
|
|
439
|
+
'to train a model over more than two labels')
|
|
440
|
+
y = self.lb.transform(y).ravel()
|
|
441
|
+
|
|
442
|
+
if not self.dual:
|
|
443
|
+
|
|
444
|
+
if issubclass(self.optimizer, LineSearchOptimizer):
|
|
445
|
+
|
|
446
|
+
if self.fit_intercept:
|
|
447
|
+
X_biased = np.c_[X, np.full_like(y, self.intercept_scaling)]
|
|
448
|
+
else:
|
|
449
|
+
X_biased = X
|
|
450
|
+
|
|
451
|
+
self.loss = self.loss(self, X_biased, y)
|
|
452
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
453
|
+
max_iter=self.max_iter,
|
|
454
|
+
max_f_eval=self.max_f_eval,
|
|
455
|
+
random_state=self.random_state,
|
|
456
|
+
callback=self._store_train_info,
|
|
457
|
+
verbose=self.verbose).minimize()
|
|
458
|
+
|
|
459
|
+
if self.optimizer.status == 'stopped':
|
|
460
|
+
if self.optimizer.iter >= self.max_iter:
|
|
461
|
+
warnings.warn('max_iter reached but the optimization has not converged yet', ConvergenceWarning)
|
|
462
|
+
elif self.optimizer.f_eval >= self.max_f_eval:
|
|
463
|
+
warnings.warn('max_f_eval reached but the optimization has not converged yet',
|
|
464
|
+
ConvergenceWarning)
|
|
465
|
+
|
|
466
|
+
self._unpack(self.optimizer.x)
|
|
467
|
+
|
|
468
|
+
elif issubclass(self.optimizer, ProximalBundle):
|
|
469
|
+
|
|
470
|
+
if self.fit_intercept:
|
|
471
|
+
X_biased = np.c_[X, np.full_like(y, self.intercept_scaling)]
|
|
472
|
+
else:
|
|
473
|
+
X_biased = X
|
|
474
|
+
|
|
475
|
+
self.loss = self.loss(self, X_biased, y)
|
|
476
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
477
|
+
mu=self.mu,
|
|
478
|
+
max_iter=self.max_iter,
|
|
479
|
+
master_solver=self.master_solver,
|
|
480
|
+
master_verbose=self.master_verbose,
|
|
481
|
+
random_state=self.random_state,
|
|
482
|
+
callback=self._store_train_info,
|
|
483
|
+
verbose=self.verbose).minimize()
|
|
484
|
+
|
|
485
|
+
if self.optimizer.status == 'error':
|
|
486
|
+
warnings.warn('failure while computing direction for the master problem', ConvergenceWarning)
|
|
487
|
+
|
|
488
|
+
self._unpack(self.optimizer.x)
|
|
489
|
+
|
|
490
|
+
elif issubclass(self.optimizer, StochasticOptimizer):
|
|
491
|
+
|
|
492
|
+
if self.validation_split:
|
|
493
|
+
X, X_val, y, y_val = train_test_split(X, y,
|
|
494
|
+
test_size=self.validation_split,
|
|
495
|
+
random_state=self.random_state)
|
|
496
|
+
|
|
497
|
+
if self.fit_intercept:
|
|
498
|
+
X_val_biased = np.c_[X_val, np.full_like(y_val, self.intercept_scaling)]
|
|
499
|
+
else:
|
|
500
|
+
X_val_biased = X_val
|
|
501
|
+
|
|
502
|
+
else:
|
|
503
|
+
X_val_biased = None
|
|
504
|
+
y_val = None
|
|
505
|
+
|
|
506
|
+
if self.fit_intercept:
|
|
507
|
+
X_biased = np.c_[X, np.full_like(y, self.intercept_scaling)]
|
|
508
|
+
else:
|
|
509
|
+
X_biased = X
|
|
510
|
+
|
|
511
|
+
self.loss = self.loss(self, X_biased, y)
|
|
512
|
+
|
|
513
|
+
if issubclass(self.optimizer, StochasticMomentumOptimizer):
|
|
514
|
+
|
|
515
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
516
|
+
epochs=self.max_iter,
|
|
517
|
+
step_size=(self.loss.step_size if self.learning_rate == 'auto'
|
|
518
|
+
else self.learning_rate),
|
|
519
|
+
momentum_type=self.momentum_type,
|
|
520
|
+
momentum=self.momentum,
|
|
521
|
+
batch_size=self.batch_size,
|
|
522
|
+
callback=self._store_train_val_info,
|
|
523
|
+
callback_args=(X_val_biased, y_val),
|
|
524
|
+
shuffle=self.shuffle,
|
|
525
|
+
random_state=self.random_state,
|
|
526
|
+
verbose=self.verbose).minimize()
|
|
527
|
+
else:
|
|
528
|
+
|
|
529
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
530
|
+
epochs=self.max_iter,
|
|
531
|
+
step_size=(self.loss.step_size if self.learning_rate == 'auto'
|
|
532
|
+
else self.learning_rate),
|
|
533
|
+
batch_size=self.batch_size,
|
|
534
|
+
callback=self._store_train_val_info,
|
|
535
|
+
callback_args=(X_val_biased, y_val),
|
|
536
|
+
shuffle=self.shuffle,
|
|
537
|
+
random_state=self.random_state,
|
|
538
|
+
verbose=self.verbose).minimize()
|
|
539
|
+
|
|
540
|
+
else:
|
|
541
|
+
|
|
542
|
+
raise TypeError(f'{self.optimizer} is not an allowed optimizer')
|
|
543
|
+
|
|
544
|
+
self.support_ = np.argwhere(np.abs(self.decision_function(X)) <= 1).ravel()
|
|
545
|
+
self.support_vectors_ = X[self.support_]
|
|
546
|
+
|
|
547
|
+
else:
|
|
548
|
+
|
|
549
|
+
n_samples = len(y)
|
|
550
|
+
|
|
551
|
+
# kernel matrix
|
|
552
|
+
K = self.kernel(X)
|
|
553
|
+
|
|
554
|
+
Q = K * np.outer(y, y)
|
|
555
|
+
q = -np.ones(n_samples)
|
|
556
|
+
|
|
557
|
+
if self.loss == Hinge:
|
|
558
|
+
|
|
559
|
+
ub = np.ones(n_samples) * self.C # upper bounds
|
|
560
|
+
|
|
561
|
+
if self.optimizer == 'smo' or self.optimizer == SMO:
|
|
562
|
+
|
|
563
|
+
if not self.reg_intercept:
|
|
564
|
+
|
|
565
|
+
self.obj = Quadratic(Q, q)
|
|
566
|
+
|
|
567
|
+
self.optimizer = SMOClassifier(self.obj, X, y, K, self.kernel, self.C,
|
|
568
|
+
self.tol, self.verbose).minimize()
|
|
569
|
+
self.alphas_ = self.optimizer.alphas
|
|
570
|
+
if isinstance(self.kernel, LinearKernel):
|
|
571
|
+
self.coef_ = self.optimizer.w
|
|
572
|
+
self.intercept_ = self.optimizer.b
|
|
573
|
+
|
|
574
|
+
else:
|
|
575
|
+
|
|
576
|
+
raise NotImplementedError
|
|
577
|
+
|
|
578
|
+
elif isinstance(self.optimizer, str):
|
|
579
|
+
|
|
580
|
+
lb = np.zeros(n_samples) # lower bounds
|
|
581
|
+
|
|
582
|
+
if not self.reg_intercept:
|
|
583
|
+
|
|
584
|
+
self.obj = Quadratic(Q, q)
|
|
585
|
+
|
|
586
|
+
out = StringIO()
|
|
587
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
588
|
+
self.alphas_ = solve_qp(P=Q,
|
|
589
|
+
q=q,
|
|
590
|
+
A=y.astype(float),
|
|
591
|
+
b=np.zeros(1),
|
|
592
|
+
lb=lb,
|
|
593
|
+
ub=ub,
|
|
594
|
+
solver=self.optimizer,
|
|
595
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
596
|
+
|
|
597
|
+
else:
|
|
598
|
+
|
|
599
|
+
Q += np.outer(y, y)
|
|
600
|
+
self.obj = Quadratic(Q, q)
|
|
601
|
+
|
|
602
|
+
out = StringIO()
|
|
603
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
604
|
+
self.alphas_ = solve_qp(P=Q,
|
|
605
|
+
q=q,
|
|
606
|
+
lb=lb,
|
|
607
|
+
ub=ub,
|
|
608
|
+
solver=self.optimizer,
|
|
609
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
610
|
+
|
|
611
|
+
stdout = out.getvalue()
|
|
612
|
+
if stdout:
|
|
613
|
+
self.iter = int(max(re.findall(r'(\d+):', stdout)))
|
|
614
|
+
if self.verbose:
|
|
615
|
+
print(stdout)
|
|
616
|
+
|
|
617
|
+
else:
|
|
618
|
+
|
|
619
|
+
if issubclass(self.optimizer, BoxConstrainedQuadraticOptimizer):
|
|
620
|
+
|
|
621
|
+
if not self.reg_intercept:
|
|
622
|
+
|
|
623
|
+
# TODO constrained optimizer with A x = 0 and 0 <= x <= ub is not available
|
|
624
|
+
raise NotImplementedError
|
|
625
|
+
|
|
626
|
+
else:
|
|
627
|
+
|
|
628
|
+
Q += np.outer(y, y)
|
|
629
|
+
self.obj = Quadratic(Q, q)
|
|
630
|
+
|
|
631
|
+
self.optimizer = self.optimizer(quad=self.obj,
|
|
632
|
+
ub=ub,
|
|
633
|
+
tol=self.tol,
|
|
634
|
+
max_iter=self.max_iter,
|
|
635
|
+
callback=self._store_train_info,
|
|
636
|
+
verbose=self.verbose).minimize()
|
|
637
|
+
|
|
638
|
+
elif issubclass(self.optimizer, Optimizer):
|
|
639
|
+
|
|
640
|
+
lb = np.zeros(n_samples) # lower bounds
|
|
641
|
+
|
|
642
|
+
if not self.reg_intercept:
|
|
643
|
+
|
|
644
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
645
|
+
A=y,
|
|
646
|
+
b=np.zeros(1),
|
|
647
|
+
lb=lb,
|
|
648
|
+
ub=ub,
|
|
649
|
+
rho=self.rho)
|
|
650
|
+
|
|
651
|
+
else:
|
|
652
|
+
|
|
653
|
+
Q += np.outer(y, y)
|
|
654
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
655
|
+
lb=lb,
|
|
656
|
+
ub=ub,
|
|
657
|
+
rho=self.rho)
|
|
658
|
+
|
|
659
|
+
if issubclass(self.optimizer, LineSearchOptimizer):
|
|
660
|
+
|
|
661
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
662
|
+
tol=self.tol,
|
|
663
|
+
max_iter=self.max_iter,
|
|
664
|
+
max_f_eval=self.max_f_eval,
|
|
665
|
+
random_state=self.random_state,
|
|
666
|
+
callback=self._store_train_info,
|
|
667
|
+
verbose=self.verbose).minimize()
|
|
668
|
+
|
|
669
|
+
if self.optimizer.status == 'stopped':
|
|
670
|
+
if self.optimizer.iter >= self.max_iter:
|
|
671
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
672
|
+
ConvergenceWarning)
|
|
673
|
+
elif self.optimizer.f_eval >= self.max_f_eval:
|
|
674
|
+
warnings.warn('max_f_eval reached but the optimization has not converged yet',
|
|
675
|
+
ConvergenceWarning)
|
|
676
|
+
|
|
677
|
+
elif issubclass(self.optimizer, ProximalBundle):
|
|
678
|
+
|
|
679
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
680
|
+
mu=self.mu,
|
|
681
|
+
tol=self.tol,
|
|
682
|
+
max_iter=self.max_iter,
|
|
683
|
+
master_solver=self.master_solver,
|
|
684
|
+
master_verbose=self.master_verbose,
|
|
685
|
+
random_state=self.random_state,
|
|
686
|
+
callback=self._store_train_info,
|
|
687
|
+
verbose=self.verbose).minimize()
|
|
688
|
+
|
|
689
|
+
if self.optimizer.status == 'error':
|
|
690
|
+
warnings.warn('failure while computing direction for the master problem',
|
|
691
|
+
ConvergenceWarning)
|
|
692
|
+
|
|
693
|
+
elif issubclass(self.optimizer, StochasticOptimizer):
|
|
694
|
+
|
|
695
|
+
if issubclass(self.optimizer, StochasticMomentumOptimizer):
|
|
696
|
+
|
|
697
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
698
|
+
tol=self.tol,
|
|
699
|
+
step_size=self.learning_rate,
|
|
700
|
+
epochs=self.max_iter,
|
|
701
|
+
momentum_type=self.momentum_type,
|
|
702
|
+
momentum=self.momentum,
|
|
703
|
+
random_state=self.random_state,
|
|
704
|
+
callback=self._store_train_info,
|
|
705
|
+
verbose=self.verbose).minimize()
|
|
706
|
+
|
|
707
|
+
else:
|
|
708
|
+
|
|
709
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
710
|
+
tol=self.tol,
|
|
711
|
+
step_size=self.learning_rate,
|
|
712
|
+
epochs=self.max_iter,
|
|
713
|
+
random_state=self.random_state,
|
|
714
|
+
callback=self._store_train_info,
|
|
715
|
+
verbose=self.verbose).minimize()
|
|
716
|
+
|
|
717
|
+
if self.optimizer.status == 'stopped':
|
|
718
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
719
|
+
ConvergenceWarning)
|
|
720
|
+
|
|
721
|
+
else:
|
|
722
|
+
|
|
723
|
+
raise TypeError(f'{self.optimizer} is not an allowed optimizer')
|
|
724
|
+
|
|
725
|
+
self.alphas_ = self.optimizer.x
|
|
726
|
+
|
|
727
|
+
elif self.loss == SquaredHinge:
|
|
728
|
+
|
|
729
|
+
D = np.diag(np.ones(n_samples) / (2 * self.C))
|
|
730
|
+
Q += D
|
|
731
|
+
|
|
732
|
+
if isinstance(self.optimizer, str):
|
|
733
|
+
|
|
734
|
+
lb = np.zeros(n_samples) # lower bounds
|
|
735
|
+
|
|
736
|
+
if not self.reg_intercept:
|
|
737
|
+
|
|
738
|
+
self.obj = Quadratic(Q, q)
|
|
739
|
+
|
|
740
|
+
out = StringIO()
|
|
741
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
742
|
+
self.alphas_ = solve_qp(P=Q,
|
|
743
|
+
q=q,
|
|
744
|
+
A=y.astype(float),
|
|
745
|
+
b=np.zeros(1),
|
|
746
|
+
lb=lb,
|
|
747
|
+
solver=self.optimizer,
|
|
748
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
749
|
+
|
|
750
|
+
else:
|
|
751
|
+
|
|
752
|
+
Q += np.outer(y, y)
|
|
753
|
+
self.obj = Quadratic(Q, q)
|
|
754
|
+
|
|
755
|
+
out = StringIO()
|
|
756
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
757
|
+
self.alphas_ = solve_qp(P=Q,
|
|
758
|
+
q=q,
|
|
759
|
+
lb=lb,
|
|
760
|
+
solver=self.optimizer,
|
|
761
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
762
|
+
|
|
763
|
+
stdout = out.getvalue()
|
|
764
|
+
if stdout:
|
|
765
|
+
self.iter = int(max(re.findall(r'(\d+):', stdout)))
|
|
766
|
+
if self.verbose:
|
|
767
|
+
print(stdout)
|
|
768
|
+
|
|
769
|
+
else:
|
|
770
|
+
|
|
771
|
+
if issubclass(self.optimizer, BoxConstrainedQuadraticOptimizer):
|
|
772
|
+
|
|
773
|
+
# TODO bcqp optimizer with 0 <= x <= +inf, i.e., without upper bound, is not available
|
|
774
|
+
raise NotImplementedError
|
|
775
|
+
|
|
776
|
+
elif issubclass(self.optimizer, Optimizer):
|
|
777
|
+
|
|
778
|
+
lb = np.zeros(n_samples) # lower bounds
|
|
779
|
+
|
|
780
|
+
if not self.reg_intercept:
|
|
781
|
+
|
|
782
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
783
|
+
A=y,
|
|
784
|
+
b=np.zeros(1),
|
|
785
|
+
lb=lb,
|
|
786
|
+
rho=self.rho)
|
|
787
|
+
|
|
788
|
+
else:
|
|
789
|
+
|
|
790
|
+
Q += np.outer(y, y)
|
|
791
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
792
|
+
lb=lb,
|
|
793
|
+
rho=self.rho)
|
|
794
|
+
|
|
795
|
+
if issubclass(self.optimizer, LineSearchOptimizer):
|
|
796
|
+
|
|
797
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
798
|
+
tol=self.tol,
|
|
799
|
+
max_iter=self.max_iter,
|
|
800
|
+
max_f_eval=self.max_f_eval,
|
|
801
|
+
random_state=self.random_state,
|
|
802
|
+
callback=self._store_train_info,
|
|
803
|
+
verbose=self.verbose).minimize()
|
|
804
|
+
|
|
805
|
+
if self.optimizer.status == 'stopped':
|
|
806
|
+
if self.optimizer.iter >= self.max_iter:
|
|
807
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
808
|
+
ConvergenceWarning)
|
|
809
|
+
elif self.optimizer.f_eval >= self.max_f_eval:
|
|
810
|
+
warnings.warn('max_f_eval reached but the optimization has not converged yet',
|
|
811
|
+
ConvergenceWarning)
|
|
812
|
+
|
|
813
|
+
elif issubclass(self.optimizer, ProximalBundle):
|
|
814
|
+
|
|
815
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
816
|
+
mu=self.mu,
|
|
817
|
+
tol=self.tol,
|
|
818
|
+
max_iter=self.max_iter,
|
|
819
|
+
master_solver=self.master_solver,
|
|
820
|
+
master_verbose=self.master_verbose,
|
|
821
|
+
random_state=self.random_state,
|
|
822
|
+
callback=self._store_train_info,
|
|
823
|
+
verbose=self.verbose).minimize()
|
|
824
|
+
|
|
825
|
+
if self.optimizer.status == 'error':
|
|
826
|
+
warnings.warn('failure while computing direction for the master problem',
|
|
827
|
+
ConvergenceWarning)
|
|
828
|
+
|
|
829
|
+
elif issubclass(self.optimizer, StochasticOptimizer):
|
|
830
|
+
|
|
831
|
+
if issubclass(self.optimizer, StochasticMomentumOptimizer):
|
|
832
|
+
|
|
833
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
834
|
+
tol=self.tol,
|
|
835
|
+
step_size=self.learning_rate,
|
|
836
|
+
epochs=self.max_iter,
|
|
837
|
+
momentum_type=self.momentum_type,
|
|
838
|
+
momentum=self.momentum,
|
|
839
|
+
random_state=self.random_state,
|
|
840
|
+
callback=self._store_train_info,
|
|
841
|
+
verbose=self.verbose).minimize()
|
|
842
|
+
|
|
843
|
+
else:
|
|
844
|
+
|
|
845
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
846
|
+
tol=self.tol,
|
|
847
|
+
step_size=self.learning_rate,
|
|
848
|
+
epochs=self.max_iter,
|
|
849
|
+
random_state=self.random_state,
|
|
850
|
+
callback=self._store_train_info,
|
|
851
|
+
verbose=self.verbose).minimize()
|
|
852
|
+
|
|
853
|
+
if self.optimizer.status == 'stopped':
|
|
854
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
855
|
+
ConvergenceWarning)
|
|
856
|
+
|
|
857
|
+
else:
|
|
858
|
+
|
|
859
|
+
raise TypeError(f'{self.optimizer} is not an allowed optimizer')
|
|
860
|
+
|
|
861
|
+
self.alphas_ = self.optimizer.x
|
|
862
|
+
|
|
863
|
+
else:
|
|
864
|
+
|
|
865
|
+
raise TypeError(f'{self.loss} is not an allowed loss')
|
|
866
|
+
|
|
867
|
+
sv = self.alphas_ > 1e-6
|
|
868
|
+
self.support_ = np.arange(len(self.alphas_))[sv]
|
|
869
|
+
self.support_vectors_, sv_y, alphas = X[sv], y[sv], self.alphas_[sv]
|
|
870
|
+
self.dual_coef_ = alphas * sv_y
|
|
871
|
+
|
|
872
|
+
if self.optimizer != SMOClassifier:
|
|
873
|
+
|
|
874
|
+
if isinstance(self.kernel, LinearKernel):
|
|
875
|
+
self.coef_ = np.dot(self.dual_coef_, self.support_vectors_)
|
|
876
|
+
|
|
877
|
+
for n in range(len(alphas)):
|
|
878
|
+
self.intercept_ += sv_y[n]
|
|
879
|
+
self.intercept_ -= np.sum(self.dual_coef_ * K[self.support_[n], sv])
|
|
880
|
+
self.intercept_ /= len(alphas)
|
|
881
|
+
|
|
882
|
+
return self
|
|
883
|
+
|
|
884
|
+
def predict(self, X):
|
|
885
|
+
return self.lb.inverse_transform(self.decision_function(X))
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
class SVR(RegressorMixin, SVM):
|
|
889
|
+
"""
|
|
890
|
+
Epsilon-Support Vector Regression.
|
|
891
|
+
|
|
892
|
+
Parameters
|
|
893
|
+
----------
|
|
894
|
+
|
|
895
|
+
loss : `SVMLoss` instance like {epsilon_insensitive, squared_epsilon_insensitive}, \
|
|
896
|
+
default='squared_epsilon_insensitive'
|
|
897
|
+
Specifies the loss function. The epsilon-insensitive loss is the
|
|
898
|
+
L1 loss, while the squared epsilon-insensitive loss is the L2 loss.
|
|
899
|
+
|
|
900
|
+
epsilon : float, default=0.1
|
|
901
|
+
Epsilon parameter in the (squared) epsilon-insensitive loss function.
|
|
902
|
+
It specifies the epsilon-tube within which no penalty is associated
|
|
903
|
+
in the training loss function with points predicted within a distance
|
|
904
|
+
epsilon from the actual value.
|
|
905
|
+
"""
|
|
906
|
+
|
|
907
|
+
def __init__(self,
|
|
908
|
+
loss=squared_epsilon_insensitive,
|
|
909
|
+
epsilon=0.1,
|
|
910
|
+
kernel=gaussian,
|
|
911
|
+
C=1,
|
|
912
|
+
rho=1,
|
|
913
|
+
mu=1,
|
|
914
|
+
fit_intercept=True,
|
|
915
|
+
intercept_scaling=1,
|
|
916
|
+
reg_intercept=False,
|
|
917
|
+
dual=False,
|
|
918
|
+
optimizer=StochasticGradientDescent,
|
|
919
|
+
master_solver='clarabel',
|
|
920
|
+
learning_rate='auto',
|
|
921
|
+
momentum_type='none',
|
|
922
|
+
momentum=0.9,
|
|
923
|
+
max_iter=1000,
|
|
924
|
+
max_f_eval=15000,
|
|
925
|
+
tol=1e-4,
|
|
926
|
+
batch_size=None,
|
|
927
|
+
shuffle=True,
|
|
928
|
+
random_state=None,
|
|
929
|
+
early_stopping=False,
|
|
930
|
+
validation_split=0.,
|
|
931
|
+
patience=5,
|
|
932
|
+
verbose=False,
|
|
933
|
+
master_verbose=False):
|
|
934
|
+
super(SVR, self).__init__(loss=loss,
|
|
935
|
+
kernel=kernel,
|
|
936
|
+
C=C,
|
|
937
|
+
rho=rho,
|
|
938
|
+
mu=mu,
|
|
939
|
+
fit_intercept=fit_intercept,
|
|
940
|
+
intercept_scaling=intercept_scaling,
|
|
941
|
+
reg_intercept=reg_intercept,
|
|
942
|
+
dual=dual,
|
|
943
|
+
optimizer=optimizer,
|
|
944
|
+
master_solver=master_solver,
|
|
945
|
+
learning_rate=learning_rate,
|
|
946
|
+
momentum_type=momentum_type,
|
|
947
|
+
momentum=momentum,
|
|
948
|
+
max_iter=max_iter,
|
|
949
|
+
max_f_eval=max_f_eval,
|
|
950
|
+
tol=tol,
|
|
951
|
+
batch_size=batch_size,
|
|
952
|
+
shuffle=shuffle,
|
|
953
|
+
random_state=random_state,
|
|
954
|
+
early_stopping=early_stopping,
|
|
955
|
+
validation_split=validation_split,
|
|
956
|
+
patience=patience,
|
|
957
|
+
verbose=verbose,
|
|
958
|
+
master_verbose=master_verbose)
|
|
959
|
+
if not loss._loss_type == 'regressor':
|
|
960
|
+
raise TypeError(f'{loss} is not an allowed SVR loss function')
|
|
961
|
+
if not epsilon >= 0:
|
|
962
|
+
raise ValueError('epsilon must be >= 0')
|
|
963
|
+
self.epsilon = epsilon
|
|
964
|
+
|
|
965
|
+
def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
|
|
966
|
+
super(SVR, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
|
|
967
|
+
if opt.is_batch_end():
|
|
968
|
+
r2 = self.score(X_batch[:, :-1] if self.fit_intercept else X_batch, y_batch)
|
|
969
|
+
self.train_score_history.append(r2)
|
|
970
|
+
if opt.is_verbose():
|
|
971
|
+
print('\tr2: {: 1.4f}'.format(r2), end='')
|
|
972
|
+
if self.validation_split:
|
|
973
|
+
val_r2 = self.score(X_val[:, :-1] if self.fit_intercept else X_val, y_val)
|
|
974
|
+
self.val_score_history.append(val_r2)
|
|
975
|
+
if opt.is_verbose():
|
|
976
|
+
print('\tval_r2: {: 1.4f}'.format(val_r2), end='')
|
|
977
|
+
self._update_no_improvement_count(opt)
|
|
978
|
+
|
|
979
|
+
def fit(self, X, y):
|
|
980
|
+
targets = y.shape[1] if y.ndim > 1 else 1
|
|
981
|
+
if targets > 1:
|
|
982
|
+
raise ValueError('use sklearn.multioutput.MultiOutputRegressor '
|
|
983
|
+
'to train a model over more than one target')
|
|
984
|
+
|
|
985
|
+
if not self.dual:
|
|
986
|
+
|
|
987
|
+
if issubclass(self.optimizer, LineSearchOptimizer):
|
|
988
|
+
|
|
989
|
+
if self.fit_intercept:
|
|
990
|
+
X_biased = np.c_[X, np.full_like(y, self.intercept_scaling)]
|
|
991
|
+
else:
|
|
992
|
+
X_biased = X
|
|
993
|
+
|
|
994
|
+
self.loss = self.loss(self, X_biased, y, self.epsilon)
|
|
995
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
996
|
+
max_iter=self.max_iter,
|
|
997
|
+
max_f_eval=self.max_f_eval,
|
|
998
|
+
random_state=self.random_state,
|
|
999
|
+
callback=self._store_train_info,
|
|
1000
|
+
verbose=self.verbose).minimize()
|
|
1001
|
+
|
|
1002
|
+
if self.optimizer.status == 'stopped':
|
|
1003
|
+
if self.optimizer.iter >= self.max_iter:
|
|
1004
|
+
warnings.warn('max_iter reached but the optimization has not converged yet', ConvergenceWarning)
|
|
1005
|
+
elif self.optimizer.f_eval >= self.max_f_eval:
|
|
1006
|
+
warnings.warn('max_f_eval reached but the optimization has not converged yet',
|
|
1007
|
+
ConvergenceWarning)
|
|
1008
|
+
|
|
1009
|
+
self._unpack(self.optimizer.x)
|
|
1010
|
+
|
|
1011
|
+
elif issubclass(self.optimizer, ProximalBundle):
|
|
1012
|
+
|
|
1013
|
+
if self.fit_intercept:
|
|
1014
|
+
X_biased = np.c_[X, np.full_like(y, self.intercept_scaling)]
|
|
1015
|
+
else:
|
|
1016
|
+
X_biased = X
|
|
1017
|
+
|
|
1018
|
+
self.loss = self.loss(self, X_biased, y, self.epsilon)
|
|
1019
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
1020
|
+
mu=self.mu,
|
|
1021
|
+
max_iter=self.max_iter,
|
|
1022
|
+
master_solver=self.master_solver,
|
|
1023
|
+
master_verbose=self.master_verbose,
|
|
1024
|
+
random_state=self.random_state,
|
|
1025
|
+
callback=self._store_train_info,
|
|
1026
|
+
verbose=self.verbose).minimize()
|
|
1027
|
+
|
|
1028
|
+
if self.optimizer.status == 'error':
|
|
1029
|
+
warnings.warn('failure while computing direction for the master problem', ConvergenceWarning)
|
|
1030
|
+
|
|
1031
|
+
self._unpack(self.optimizer.x)
|
|
1032
|
+
|
|
1033
|
+
elif issubclass(self.optimizer, StochasticOptimizer):
|
|
1034
|
+
|
|
1035
|
+
if self.validation_split:
|
|
1036
|
+
X, X_val, y, y_val = train_test_split(X, y,
|
|
1037
|
+
test_size=self.validation_split,
|
|
1038
|
+
random_state=self.random_state)
|
|
1039
|
+
|
|
1040
|
+
if self.fit_intercept:
|
|
1041
|
+
X_val_biased = np.c_[X_val, np.full_like(y_val, self.intercept_scaling)]
|
|
1042
|
+
else:
|
|
1043
|
+
X_val_biased = X_val
|
|
1044
|
+
|
|
1045
|
+
else:
|
|
1046
|
+
X_val_biased = None
|
|
1047
|
+
y_val = None
|
|
1048
|
+
|
|
1049
|
+
if self.fit_intercept:
|
|
1050
|
+
X_biased = np.c_[X, np.full_like(y, self.intercept_scaling)]
|
|
1051
|
+
else:
|
|
1052
|
+
X_biased = X
|
|
1053
|
+
|
|
1054
|
+
self.loss = self.loss(self, X_biased, y, self.epsilon)
|
|
1055
|
+
|
|
1056
|
+
if issubclass(self.optimizer, StochasticMomentumOptimizer):
|
|
1057
|
+
|
|
1058
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
1059
|
+
epochs=self.max_iter,
|
|
1060
|
+
step_size=(self.loss.step_size if self.learning_rate == 'auto'
|
|
1061
|
+
else self.learning_rate),
|
|
1062
|
+
momentum_type=self.momentum_type,
|
|
1063
|
+
momentum=self.momentum,
|
|
1064
|
+
batch_size=self.batch_size,
|
|
1065
|
+
callback=self._store_train_val_info,
|
|
1066
|
+
callback_args=(X_val_biased, y_val),
|
|
1067
|
+
shuffle=self.shuffle,
|
|
1068
|
+
random_state=self.random_state,
|
|
1069
|
+
verbose=self.verbose).minimize()
|
|
1070
|
+
|
|
1071
|
+
else:
|
|
1072
|
+
|
|
1073
|
+
self.optimizer = self.optimizer(f=self.loss,
|
|
1074
|
+
epochs=self.max_iter,
|
|
1075
|
+
step_size=(self.loss.step_size if self.learning_rate == 'auto'
|
|
1076
|
+
else self.learning_rate),
|
|
1077
|
+
batch_size=self.batch_size,
|
|
1078
|
+
callback=self._store_train_val_info,
|
|
1079
|
+
callback_args=(X_val_biased, y_val),
|
|
1080
|
+
shuffle=self.shuffle,
|
|
1081
|
+
random_state=self.random_state,
|
|
1082
|
+
verbose=self.verbose).minimize()
|
|
1083
|
+
|
|
1084
|
+
else:
|
|
1085
|
+
|
|
1086
|
+
raise TypeError(f'{self.optimizer} is not an allowed optimizer')
|
|
1087
|
+
|
|
1088
|
+
self.support_ = np.argwhere(np.abs(y - self.predict(X)) >= self.epsilon).ravel()
|
|
1089
|
+
self.support_vectors_ = X[self.support_]
|
|
1090
|
+
|
|
1091
|
+
else:
|
|
1092
|
+
|
|
1093
|
+
n_samples = len(y)
|
|
1094
|
+
|
|
1095
|
+
# kernel matrix
|
|
1096
|
+
K = self.kernel(X)
|
|
1097
|
+
|
|
1098
|
+
Q = np.vstack((np.hstack((K, -K)),
|
|
1099
|
+
np.hstack((-K, K))))
|
|
1100
|
+
q = np.hstack((-y, y)) + self.epsilon
|
|
1101
|
+
|
|
1102
|
+
if self.loss == EpsilonInsensitive:
|
|
1103
|
+
|
|
1104
|
+
ub = np.ones(2 * n_samples) * self.C # upper bounds
|
|
1105
|
+
|
|
1106
|
+
if self.optimizer == 'smo' or self.optimizer == SMO:
|
|
1107
|
+
|
|
1108
|
+
if not self.reg_intercept:
|
|
1109
|
+
|
|
1110
|
+
self.obj = Quadratic(Q, q)
|
|
1111
|
+
|
|
1112
|
+
self.optimizer = SMORegression(self.obj, X, y, K, self.kernel, self.C,
|
|
1113
|
+
self.epsilon, self.tol, self.verbose).minimize()
|
|
1114
|
+
alphas_p, alphas_n = self.optimizer.alphas_p, self.optimizer.alphas_n
|
|
1115
|
+
self.alphas_ = np.concatenate((alphas_p, alphas_n))
|
|
1116
|
+
if isinstance(self.kernel, LinearKernel):
|
|
1117
|
+
self.coef_ = self.optimizer.w
|
|
1118
|
+
self.intercept_ = self.optimizer.b
|
|
1119
|
+
|
|
1120
|
+
else:
|
|
1121
|
+
|
|
1122
|
+
raise NotImplementedError
|
|
1123
|
+
|
|
1124
|
+
else:
|
|
1125
|
+
|
|
1126
|
+
e = np.hstack((np.ones(n_samples), -np.ones(n_samples))) # equality matrix
|
|
1127
|
+
|
|
1128
|
+
if isinstance(self.optimizer, str):
|
|
1129
|
+
|
|
1130
|
+
lb = np.zeros(2 * n_samples) # lower bounds
|
|
1131
|
+
|
|
1132
|
+
if not self.reg_intercept:
|
|
1133
|
+
|
|
1134
|
+
self.obj = Quadratic(Q, q)
|
|
1135
|
+
|
|
1136
|
+
out = StringIO()
|
|
1137
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
1138
|
+
self.alphas_ = solve_qp(P=Q,
|
|
1139
|
+
q=q,
|
|
1140
|
+
A=e,
|
|
1141
|
+
b=np.zeros(1),
|
|
1142
|
+
lb=lb,
|
|
1143
|
+
ub=ub,
|
|
1144
|
+
solver=self.optimizer,
|
|
1145
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
1146
|
+
|
|
1147
|
+
else:
|
|
1148
|
+
|
|
1149
|
+
Q += np.outer(e, e)
|
|
1150
|
+
self.obj = Quadratic(Q, q)
|
|
1151
|
+
|
|
1152
|
+
out = StringIO()
|
|
1153
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
1154
|
+
self.alphas_ = solve_qp(P=Q,
|
|
1155
|
+
q=q,
|
|
1156
|
+
lb=lb,
|
|
1157
|
+
ub=ub,
|
|
1158
|
+
solver=self.optimizer,
|
|
1159
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
1160
|
+
|
|
1161
|
+
stdout = out.getvalue()
|
|
1162
|
+
if stdout:
|
|
1163
|
+
self.iter = int(max(re.findall(r'(\d+):', stdout)))
|
|
1164
|
+
if self.verbose:
|
|
1165
|
+
print(stdout)
|
|
1166
|
+
|
|
1167
|
+
else:
|
|
1168
|
+
|
|
1169
|
+
if issubclass(self.optimizer, BoxConstrainedQuadraticOptimizer):
|
|
1170
|
+
|
|
1171
|
+
if not self.reg_intercept:
|
|
1172
|
+
|
|
1173
|
+
# TODO constrained optimizer with A x = 0 and 0 <= x <= ub is not available
|
|
1174
|
+
raise NotImplementedError
|
|
1175
|
+
|
|
1176
|
+
else:
|
|
1177
|
+
|
|
1178
|
+
Q += np.outer(e, e)
|
|
1179
|
+
self.obj = Quadratic(Q, q)
|
|
1180
|
+
|
|
1181
|
+
self.optimizer = self.optimizer(quad=self.obj,
|
|
1182
|
+
ub=ub,
|
|
1183
|
+
tol=self.tol,
|
|
1184
|
+
max_iter=self.max_iter,
|
|
1185
|
+
callback=self._store_train_info,
|
|
1186
|
+
verbose=self.verbose).minimize()
|
|
1187
|
+
|
|
1188
|
+
elif issubclass(self.optimizer, Optimizer):
|
|
1189
|
+
|
|
1190
|
+
lb = np.zeros(2 * n_samples) # lower bounds
|
|
1191
|
+
|
|
1192
|
+
if not self.reg_intercept:
|
|
1193
|
+
|
|
1194
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
1195
|
+
A=e,
|
|
1196
|
+
b=np.zeros(1),
|
|
1197
|
+
lb=lb,
|
|
1198
|
+
ub=ub,
|
|
1199
|
+
rho=self.rho)
|
|
1200
|
+
|
|
1201
|
+
else:
|
|
1202
|
+
|
|
1203
|
+
Q += np.outer(e, e)
|
|
1204
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
1205
|
+
lb=lb,
|
|
1206
|
+
ub=ub,
|
|
1207
|
+
rho=self.rho)
|
|
1208
|
+
|
|
1209
|
+
if issubclass(self.optimizer, LineSearchOptimizer):
|
|
1210
|
+
|
|
1211
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1212
|
+
tol=self.tol,
|
|
1213
|
+
max_iter=self.max_iter,
|
|
1214
|
+
max_f_eval=self.max_f_eval,
|
|
1215
|
+
random_state=self.random_state,
|
|
1216
|
+
callback=self._store_train_info,
|
|
1217
|
+
verbose=self.verbose).minimize()
|
|
1218
|
+
|
|
1219
|
+
if self.optimizer.status == 'stopped':
|
|
1220
|
+
if self.optimizer.iter >= self.max_iter:
|
|
1221
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
1222
|
+
ConvergenceWarning)
|
|
1223
|
+
elif self.optimizer.f_eval >= self.max_f_eval:
|
|
1224
|
+
warnings.warn('max_f_eval reached but the optimization has not converged yet',
|
|
1225
|
+
ConvergenceWarning)
|
|
1226
|
+
|
|
1227
|
+
elif issubclass(self.optimizer, ProximalBundle):
|
|
1228
|
+
|
|
1229
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1230
|
+
mu=self.mu,
|
|
1231
|
+
tol=self.tol,
|
|
1232
|
+
max_iter=self.max_iter,
|
|
1233
|
+
master_solver=self.master_solver,
|
|
1234
|
+
master_verbose=self.master_verbose,
|
|
1235
|
+
random_state=self.random_state,
|
|
1236
|
+
callback=self._store_train_info,
|
|
1237
|
+
verbose=self.verbose).minimize()
|
|
1238
|
+
|
|
1239
|
+
if self.optimizer.status == 'error':
|
|
1240
|
+
warnings.warn('failure while computing direction for the master problem',
|
|
1241
|
+
ConvergenceWarning)
|
|
1242
|
+
|
|
1243
|
+
elif issubclass(self.optimizer, StochasticOptimizer):
|
|
1244
|
+
|
|
1245
|
+
if issubclass(self.optimizer, StochasticMomentumOptimizer):
|
|
1246
|
+
|
|
1247
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1248
|
+
tol=self.tol,
|
|
1249
|
+
step_size=self.learning_rate,
|
|
1250
|
+
epochs=self.max_iter,
|
|
1251
|
+
momentum_type=self.momentum_type,
|
|
1252
|
+
momentum=self.momentum,
|
|
1253
|
+
random_state=self.random_state,
|
|
1254
|
+
callback=self._store_train_info,
|
|
1255
|
+
verbose=self.verbose).minimize()
|
|
1256
|
+
|
|
1257
|
+
else:
|
|
1258
|
+
|
|
1259
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1260
|
+
tol=self.tol,
|
|
1261
|
+
step_size=self.learning_rate,
|
|
1262
|
+
epochs=self.max_iter,
|
|
1263
|
+
random_state=self.random_state,
|
|
1264
|
+
callback=self._store_train_info,
|
|
1265
|
+
verbose=self.verbose).minimize()
|
|
1266
|
+
|
|
1267
|
+
if self.optimizer.status == 'stopped':
|
|
1268
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
1269
|
+
ConvergenceWarning)
|
|
1270
|
+
|
|
1271
|
+
else:
|
|
1272
|
+
|
|
1273
|
+
raise TypeError(f'{self.optimizer} is not an allowed optimizer')
|
|
1274
|
+
|
|
1275
|
+
self.alphas_ = self.optimizer.x
|
|
1276
|
+
|
|
1277
|
+
alphas_p, alphas_n = np.split(self.alphas_, 2)
|
|
1278
|
+
|
|
1279
|
+
elif self.loss == SquaredEpsilonInsensitive:
|
|
1280
|
+
|
|
1281
|
+
D = np.diag(np.ones(2 * n_samples) / (2 * self.C))
|
|
1282
|
+
Q += D
|
|
1283
|
+
|
|
1284
|
+
e = np.hstack((np.ones(n_samples), -np.ones(n_samples))) # equality matrix
|
|
1285
|
+
|
|
1286
|
+
if isinstance(self.optimizer, str):
|
|
1287
|
+
|
|
1288
|
+
lb = np.zeros(2 * n_samples) # lower bounds
|
|
1289
|
+
|
|
1290
|
+
if not self.reg_intercept:
|
|
1291
|
+
|
|
1292
|
+
self.obj = Quadratic(Q, q)
|
|
1293
|
+
|
|
1294
|
+
out = StringIO()
|
|
1295
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
1296
|
+
self.alphas_ = solve_qp(P=Q,
|
|
1297
|
+
q=q,
|
|
1298
|
+
A=e,
|
|
1299
|
+
b=np.zeros(1),
|
|
1300
|
+
lb=lb,
|
|
1301
|
+
solver=self.optimizer,
|
|
1302
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
1303
|
+
|
|
1304
|
+
else:
|
|
1305
|
+
|
|
1306
|
+
Q += np.outer(e, e)
|
|
1307
|
+
self.obj = Quadratic(Q, q)
|
|
1308
|
+
|
|
1309
|
+
out = StringIO()
|
|
1310
|
+
with pipes(stdout=out, stderr=STDOUT):
|
|
1311
|
+
self.alphas_ = solve_qp(P=Q,
|
|
1312
|
+
q=q,
|
|
1313
|
+
lb=lb,
|
|
1314
|
+
solver=self.optimizer,
|
|
1315
|
+
verbose=False if self.verbose < 0 else True) # trick for Jupyter
|
|
1316
|
+
|
|
1317
|
+
stdout = out.getvalue()
|
|
1318
|
+
if stdout:
|
|
1319
|
+
self.iter = int(max(re.findall(r'(\d+):', stdout)))
|
|
1320
|
+
if self.verbose:
|
|
1321
|
+
print(stdout)
|
|
1322
|
+
|
|
1323
|
+
else:
|
|
1324
|
+
|
|
1325
|
+
if issubclass(self.optimizer, BoxConstrainedQuadraticOptimizer):
|
|
1326
|
+
|
|
1327
|
+
# TODO bcqp optimizer with 0 <= x <= +inf, i.e., without upper bound, is not available
|
|
1328
|
+
raise NotImplementedError
|
|
1329
|
+
|
|
1330
|
+
elif issubclass(self.optimizer, Optimizer):
|
|
1331
|
+
|
|
1332
|
+
lb = np.zeros(2 * n_samples) # lower bounds
|
|
1333
|
+
|
|
1334
|
+
if not self.reg_intercept:
|
|
1335
|
+
|
|
1336
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
1337
|
+
A=e,
|
|
1338
|
+
b=np.zeros(1),
|
|
1339
|
+
lb=lb,
|
|
1340
|
+
rho=self.rho)
|
|
1341
|
+
|
|
1342
|
+
else:
|
|
1343
|
+
|
|
1344
|
+
Q += np.outer(e, e)
|
|
1345
|
+
self.obj = AugmentedLagrangianQuadratic(primal=Quadratic(Q, q),
|
|
1346
|
+
lb=lb,
|
|
1347
|
+
rho=self.rho)
|
|
1348
|
+
|
|
1349
|
+
if issubclass(self.optimizer, LineSearchOptimizer):
|
|
1350
|
+
|
|
1351
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1352
|
+
tol=self.tol,
|
|
1353
|
+
max_iter=self.max_iter,
|
|
1354
|
+
max_f_eval=self.max_f_eval,
|
|
1355
|
+
random_state=self.random_state,
|
|
1356
|
+
callback=self._store_train_info,
|
|
1357
|
+
verbose=self.verbose).minimize()
|
|
1358
|
+
|
|
1359
|
+
if self.optimizer.status == 'stopped':
|
|
1360
|
+
if self.optimizer.iter >= self.max_iter:
|
|
1361
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
1362
|
+
ConvergenceWarning)
|
|
1363
|
+
elif self.optimizer.f_eval >= self.max_f_eval:
|
|
1364
|
+
warnings.warn('max_f_eval reached but the optimization has not converged yet',
|
|
1365
|
+
ConvergenceWarning)
|
|
1366
|
+
|
|
1367
|
+
elif issubclass(self.optimizer, ProximalBundle):
|
|
1368
|
+
|
|
1369
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1370
|
+
mu=self.mu,
|
|
1371
|
+
tol=self.tol,
|
|
1372
|
+
max_iter=self.max_iter,
|
|
1373
|
+
master_solver=self.master_solver,
|
|
1374
|
+
master_verbose=self.master_verbose,
|
|
1375
|
+
random_state=self.random_state,
|
|
1376
|
+
callback=self._store_train_info,
|
|
1377
|
+
verbose=self.verbose).minimize()
|
|
1378
|
+
|
|
1379
|
+
if self.optimizer.status == 'error':
|
|
1380
|
+
warnings.warn('failure while computing direction for the master problem',
|
|
1381
|
+
ConvergenceWarning)
|
|
1382
|
+
|
|
1383
|
+
elif issubclass(self.optimizer, StochasticOptimizer):
|
|
1384
|
+
|
|
1385
|
+
if issubclass(self.optimizer, StochasticMomentumOptimizer):
|
|
1386
|
+
|
|
1387
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1388
|
+
tol=self.tol,
|
|
1389
|
+
step_size=self.learning_rate,
|
|
1390
|
+
epochs=self.max_iter,
|
|
1391
|
+
momentum_type=self.momentum_type,
|
|
1392
|
+
momentum=self.momentum,
|
|
1393
|
+
random_state=self.random_state,
|
|
1394
|
+
callback=self._store_train_info,
|
|
1395
|
+
verbose=self.verbose).minimize()
|
|
1396
|
+
|
|
1397
|
+
else:
|
|
1398
|
+
|
|
1399
|
+
self.optimizer = self.optimizer(f=self.obj,
|
|
1400
|
+
tol=self.tol,
|
|
1401
|
+
step_size=self.learning_rate,
|
|
1402
|
+
epochs=self.max_iter,
|
|
1403
|
+
random_state=self.random_state,
|
|
1404
|
+
callback=self._store_train_info,
|
|
1405
|
+
verbose=self.verbose).minimize()
|
|
1406
|
+
|
|
1407
|
+
if self.optimizer.status == 'stopped':
|
|
1408
|
+
warnings.warn('max_iter reached but the optimization has not converged yet',
|
|
1409
|
+
ConvergenceWarning)
|
|
1410
|
+
|
|
1411
|
+
else:
|
|
1412
|
+
|
|
1413
|
+
raise TypeError(f'{self.optimizer} is not an allowed optimizer')
|
|
1414
|
+
|
|
1415
|
+
self.alphas_ = self.optimizer.x
|
|
1416
|
+
|
|
1417
|
+
alphas_p, alphas_n = np.split(self.alphas_, 2)
|
|
1418
|
+
|
|
1419
|
+
else:
|
|
1420
|
+
|
|
1421
|
+
raise TypeError(f'{self.loss} is not an allowed loss')
|
|
1422
|
+
|
|
1423
|
+
sv = np.logical_or(alphas_p > 1e-6, alphas_n > 1e-6)
|
|
1424
|
+
self.support_ = np.arange(len(alphas_p))[sv]
|
|
1425
|
+
self.support_vectors_, sv_y, alphas_p, alphas_n = X[sv], y[sv], alphas_p[sv], alphas_n[sv]
|
|
1426
|
+
self.dual_coef_ = alphas_p - alphas_n
|
|
1427
|
+
|
|
1428
|
+
if self.optimizer != SMORegression:
|
|
1429
|
+
|
|
1430
|
+
if isinstance(self.kernel, LinearKernel):
|
|
1431
|
+
self.coef_ = np.dot(self.dual_coef_, self.support_vectors_)
|
|
1432
|
+
|
|
1433
|
+
for n in range(len(alphas_p)):
|
|
1434
|
+
self.intercept_ += sv_y[n]
|
|
1435
|
+
self.intercept_ -= np.sum(self.dual_coef_ * K[self.support_[n], sv])
|
|
1436
|
+
self.intercept_ -= self.epsilon
|
|
1437
|
+
self.intercept_ /= len(alphas_p)
|
|
1438
|
+
|
|
1439
|
+
return self
|
|
1440
|
+
|
|
1441
|
+
def predict(self, X):
|
|
1442
|
+
return self.decision_function(X)
|