optiml 1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. optiml/__init__.py +0 -0
  2. optiml/ml/__init__.py +0 -0
  3. optiml/ml/neural_network/__init__.py +3 -0
  4. optiml/ml/neural_network/_base.py +475 -0
  5. optiml/ml/neural_network/activations.py +79 -0
  6. optiml/ml/neural_network/initializers.py +66 -0
  7. optiml/ml/neural_network/layers.py +183 -0
  8. optiml/ml/neural_network/losses.py +178 -0
  9. optiml/ml/neural_network/regularizers.py +87 -0
  10. optiml/ml/svm/__init__.py +3 -0
  11. optiml/ml/svm/_base.py +1442 -0
  12. optiml/ml/svm/kernels.py +208 -0
  13. optiml/ml/svm/losses.py +284 -0
  14. optiml/ml/svm/smo.py +797 -0
  15. optiml/ml/tests/__init__.py +0 -0
  16. optiml/ml/tests/_datasets.py +49 -0
  17. optiml/ml/tests/_utils.py +28 -0
  18. optiml/ml/tests/test_initializers.py +33 -0
  19. optiml/ml/tests/test_neural_network.py +86 -0
  20. optiml/ml/tests/test_svc.py +245 -0
  21. optiml/ml/tests/test_svr.py +256 -0
  22. optiml/ml/utils.py +252 -0
  23. optiml/opti/__init__.py +4 -0
  24. optiml/opti/_base.py +309 -0
  25. optiml/opti/constrained/__init__.py +9 -0
  26. optiml/opti/constrained/_base.py +404 -0
  27. optiml/opti/constrained/active_set.py +228 -0
  28. optiml/opti/constrained/frank_wolfe.py +158 -0
  29. optiml/opti/constrained/interior_point.py +282 -0
  30. optiml/opti/constrained/projected_gradient.py +138 -0
  31. optiml/opti/constrained/tests/__init__.py +0 -0
  32. optiml/opti/constrained/tests/test_active_set.py +16 -0
  33. optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
  34. optiml/opti/constrained/tests/test_interior_point.py +16 -0
  35. optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
  36. optiml/opti/constrained/tests/test_lower_bound.py +29 -0
  37. optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
  38. optiml/opti/unconstrained/__init__.py +6 -0
  39. optiml/opti/unconstrained/_base.py +63 -0
  40. optiml/opti/unconstrained/line_search/__init__.py +10 -0
  41. optiml/opti/unconstrained/line_search/_base.py +106 -0
  42. optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
  43. optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
  44. optiml/opti/unconstrained/line_search/line_search.py +248 -0
  45. optiml/opti/unconstrained/line_search/newton.py +198 -0
  46. optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
  47. optiml/opti/unconstrained/proximal_bundle.py +219 -0
  48. optiml/opti/unconstrained/stochastic/__init__.py +12 -0
  49. optiml/opti/unconstrained/stochastic/_base.py +246 -0
  50. optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
  51. optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
  52. optiml/opti/unconstrained/stochastic/adam.py +179 -0
  53. optiml/opti/unconstrained/stochastic/adamax.py +178 -0
  54. optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
  55. optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
  56. optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
  57. optiml/opti/unconstrained/stochastic/schedules.py +89 -0
  58. optiml/opti/unconstrained/tests/__init__.py +0 -0
  59. optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
  60. optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
  61. optiml/opti/unconstrained/tests/test_adam.py +42 -0
  62. optiml/opti/unconstrained/tests/test_adamax.py +41 -0
  63. optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
  64. optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
  65. optiml/opti/unconstrained/tests/test_functions.py +34 -0
  66. optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
  67. optiml/opti/unconstrained/tests/test_newton.py +20 -0
  68. optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
  69. optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
  70. optiml/opti/unconstrained/tests/test_verbose.py +25 -0
  71. optiml/opti/utils.py +353 -0
  72. optiml-1.7.dist-info/METADATA +203 -0
  73. optiml-1.7.dist-info/RECORD +76 -0
  74. optiml-1.7.dist-info/WHEEL +5 -0
  75. optiml-1.7.dist-info/licenses/LICENSE +21 -0
  76. optiml-1.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,208 @@
1
+ from abc import ABC
2
+
3
+ import numpy as np
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.metrics.pairwise import check_pairwise_arrays, euclidean_distances, manhattan_distances
6
+ from sklearn.utils.extmath import safe_sparse_dot
7
+
8
+
9
+ class Kernel(BaseEstimator, ABC):
10
+ """
11
+ Base abstract class for all kernel functions. A kernel computes the
12
+ pairwise similarities (i.e., the Gram matrix) between two sets of samples.
13
+
14
+ Subclasses must implement ``__call__``.
15
+ """
16
+
17
+ def __call__(self, X, Y=None):
18
+ """
19
+ Compute the kernel (Gram) matrix between X and Y.
20
+
21
+ Parameters
22
+ ----------
23
+
24
+ X : ndarray of shape (n_samples_X, n_features)
25
+ Left argument of the kernel function.
26
+
27
+ Y : ndarray of shape (n_samples_Y, n_features), default=None
28
+ Right argument of the kernel function. If None, ``Y`` is
29
+ set to ``X`` and the kernel matrix between X and itself is computed.
30
+
31
+ Returns
32
+ -------
33
+
34
+ K : ndarray of shape (n_samples_X, n_samples_Y)
35
+ The computed kernel matrix.
36
+ """
37
+ raise NotImplementedError
38
+
39
+
40
+ class LinearKernel(Kernel):
41
+ r"""
42
+ Compute the linear kernel between X and Y:
43
+
44
+ .. math::
45
+
46
+ K(X, Y) = \langle X, Y \rangle
47
+ """
48
+
49
+ def __call__(self, X, Y=None):
50
+ X, Y = check_pairwise_arrays(X, Y)
51
+ return safe_sparse_dot(X, Y.T, dense_output=True)
52
+
53
+
54
+ class PolyKernel(Kernel):
55
+ r"""
56
+ Compute the polynomial kernel between X and Y:
57
+
58
+ .. math::
59
+
60
+ K(X, Y) = (\gamma \langle X, Y \rangle + coef_0)^{degree}
61
+
62
+ Parameters
63
+ ----------
64
+
65
+ degree : int, default=3
66
+ Degree of the polynomial kernel function.
67
+
68
+ gamma : {'scale', 'auto'} or float, default='scale'
69
+ Kernel coefficient for kernel function.
70
+
71
+ - if `gamma='scale'` (default) is passed then it uses
72
+ 1 / (n_features * X.var()) as value of gamma,
73
+ - if `gamma='auto'`, uses 1 / n_features.
74
+
75
+ coef0 : float, default=0.0
76
+ Independent term in kernel function.
77
+ """
78
+
79
+ def __init__(self, degree=3, gamma='scale', coef0=0.):
80
+ if not degree > 0:
81
+ raise ValueError('degree must be > 0')
82
+ self.degree = degree
83
+ if isinstance(gamma, str):
84
+ if gamma not in ('scale', 'auto'):
85
+ raise ValueError(f'unknown gamma type {gamma}')
86
+ elif not gamma > 0:
87
+ raise ValueError('gamma must be > 0')
88
+ self.gamma = gamma
89
+ self.coef0 = coef0
90
+
91
+ def __call__(self, X, Y=None):
92
+ X, Y = check_pairwise_arrays(X, Y)
93
+ gamma = (1. / (X.shape[1] * X.var()) if self.gamma == 'scale' else
94
+ 1. / X.shape[1] if self.gamma == 'auto' else self.gamma)
95
+ return (gamma * safe_sparse_dot(X, Y.T, dense_output=True) + self.coef0) ** self.degree
96
+
97
+
98
+ class GaussianKernel(Kernel):
99
+ r"""
100
+ Compute the gaussian RBF kernel between X and Y:
101
+
102
+ .. math::
103
+
104
+ K(X, Y) = e^{-\gamma \lVert X - Y \rVert_2^2}
105
+
106
+ Parameters
107
+ ----------
108
+
109
+ gamma : {'scale', 'auto'} or float, default='scale'
110
+ Kernel coefficient for kernel function.
111
+
112
+ - if `gamma='scale'` (default) is passed then it uses
113
+ 1 / (n_features * X.var()) as value of gamma,
114
+ - if `gamma='auto'`, uses 1 / n_features.
115
+ """
116
+
117
+ def __init__(self, gamma='scale'):
118
+ if isinstance(gamma, str):
119
+ if gamma not in ('scale', 'auto'):
120
+ raise ValueError(f'unknown gamma type {gamma}')
121
+ elif not gamma > 0:
122
+ raise ValueError('gamma must be > 0')
123
+ self.gamma = gamma
124
+
125
+ def __call__(self, X, Y=None):
126
+ X, Y = check_pairwise_arrays(X, Y)
127
+ gamma = (1. / (X.shape[1] * X.var()) if self.gamma == 'scale' else
128
+ 1. / X.shape[1] if self.gamma == 'auto' else self.gamma)
129
+ return np.exp(-gamma * euclidean_distances(X, Y, squared=True))
130
+
131
+
132
+ class LaplacianKernel(Kernel):
133
+ r"""
134
+ Compute the laplacian RBF kernel between X and Y:
135
+
136
+ .. math::
137
+
138
+ K(X, Y) = e^{-\gamma \lVert X - Y \rVert_1}
139
+
140
+ Parameters
141
+ ----------
142
+
143
+ gamma : {'scale', 'auto'} or float, default='scale'
144
+ Kernel coefficient for kernel function.
145
+
146
+ - if `gamma='scale'` (default) is passed then it uses
147
+ 1 / (n_features * X.var()) as value of gamma,
148
+ - if `gamma='auto'`, uses 1 / n_features.
149
+ """
150
+
151
+ def __init__(self, gamma='scale'):
152
+ if isinstance(gamma, str):
153
+ if gamma not in ('scale', 'auto'):
154
+ raise ValueError(f'unknown gamma type {gamma}')
155
+ elif not gamma > 0:
156
+ raise ValueError('gamma must be > 0')
157
+ self.gamma = gamma
158
+
159
+ def __call__(self, X, Y=None):
160
+ X, Y = check_pairwise_arrays(X, Y)
161
+ gamma = (1. / (X.shape[1] * X.var()) if self.gamma == 'scale' else
162
+ 1. / X.shape[1] if self.gamma == 'auto' else self.gamma)
163
+ return np.exp(-gamma * manhattan_distances(X, Y))
164
+
165
+
166
+ class SigmoidKernel(Kernel):
167
+ r"""
168
+ Compute the sigmoid kernel between X and Y:
169
+
170
+ .. math::
171
+
172
+ K(X, Y) = \tanh(\gamma \langle X, Y \rangle + coef_0)
173
+
174
+ Parameters
175
+ ----------
176
+
177
+ gamma : {'scale', 'auto'} or float, default='scale'
178
+ Kernel coefficient for kernel function.
179
+
180
+ - if `gamma='scale'` (default) is passed then it uses
181
+ 1 / (n_features * X.var()) as value of gamma,
182
+ - if `gamma='auto'`, uses 1 / n_features.
183
+
184
+ coef0 : float, default=0.0
185
+ Independent term in kernel function.
186
+ """
187
+
188
+ def __init__(self, gamma='scale', coef0=0.):
189
+ if isinstance(gamma, str):
190
+ if gamma not in ('scale', 'auto'):
191
+ raise ValueError(f'unknown gamma type {gamma}')
192
+ elif not gamma > 0:
193
+ raise ValueError('gamma must be > 0')
194
+ self.gamma = gamma
195
+ self.coef0 = coef0
196
+
197
+ def __call__(self, X, Y=None):
198
+ X, Y = check_pairwise_arrays(X, Y)
199
+ gamma = (1. / (X.shape[1] * X.var()) if self.gamma == 'scale' else
200
+ 1. / X.shape[1] if self.gamma == 'auto' else self.gamma)
201
+ return np.tanh(gamma * safe_sparse_dot(X, Y.T, dense_output=True) + self.coef0)
202
+
203
+
204
+ linear = LinearKernel()
205
+ poly = PolyKernel()
206
+ gaussian = GaussianKernel()
207
+ laplacian = LaplacianKernel()
208
+ sigmoid = SigmoidKernel()
@@ -0,0 +1,284 @@
1
+ from abc import ABC
2
+
3
+ import autograd.numpy as np
4
+ import cvxpy as cp
5
+
6
+ from ...opti import OptimizationFunction
7
+
8
+
9
+ class SVMLoss(OptimizationFunction, ABC):
10
+ """
11
+ Base abstract class for all SVM loss functions. It defines the
12
+ primal objective, i.e., the regularization term plus the loss term
13
+ averaged over the training samples, together with its jacobian.
14
+
15
+ Subclasses must implement ``loss``, ``loss_jacobian`` and ``step_size``.
16
+ """
17
+
18
+ def __init__(self, svm, X, y):
19
+ """
20
+ Parameters
21
+ ----------
22
+
23
+ svm : `SVM` instance
24
+ The SVM estimator this loss is attached to. It provides the
25
+ hyper-parameters used by the objective, e.g., ``C`` and
26
+ ``fit_intercept``.
27
+
28
+ X : ndarray of shape (n_samples, n_features)
29
+ Training data over which the loss is evaluated.
30
+
31
+ y : ndarray of shape (n_samples,)
32
+ Target values associated with ``X``.
33
+ """
34
+ super(SVMLoss, self).__init__(X.shape[1])
35
+ self.svm = svm
36
+ self.X = X
37
+ self.y = y
38
+
39
+ def args(self):
40
+ return self.X, self.y
41
+
42
+ def x_star(self):
43
+ # Compute the exact minimizer of the *same* primal objective that the
44
+ # optimizers minimize, i.e., 1/(2n) ||theta||^2 + C/n sum(loss), by solving
45
+ # it directly as a convex program to high accuracy with a reliable conic
46
+ # solver, instead of recovering it (less accurately) from the dual. This
47
+ # makes f_star() = function(x_star()) a genuine, solver-certified optimum.
48
+ if not hasattr(self, 'x_opt'):
49
+ n_samples = self.X.shape[0]
50
+ theta = cp.Variable(self.X.shape[1])
51
+ objective = cp.Minimize(1 / (2 * n_samples) * cp.sum_squares(theta) + # regularization term
52
+ self.svm.C / n_samples * self._cvxpy_loss(theta)) # loss term
53
+ problem = cp.Problem(objective)
54
+ # solve to high accuracy, falling back to other available solvers if needed
55
+ for solver in (cp.CLARABEL, cp.ECOS, cp.OSQP, cp.SCS):
56
+ try:
57
+ problem.solve(solver=solver)
58
+ except (cp.error.SolverError, cp.error.DCPError, KeyError):
59
+ continue
60
+ if problem.status in (cp.OPTIMAL, cp.OPTIMAL_INACCURATE):
61
+ break
62
+ if problem.status not in (cp.OPTIMAL, cp.OPTIMAL_INACCURATE):
63
+ raise ValueError(f'could not compute the optimal solution x_star '
64
+ f'(solver status: {problem.status})')
65
+ self.x_opt = np.asarray(theta.value, dtype=float)
66
+ return self.x_opt
67
+
68
+ def f_star(self):
69
+ return self.function(self.x_star())
70
+
71
+ def _cvxpy_loss(self, theta):
72
+ """
73
+ The cvxpy expression of the (summed over the samples) loss term as a
74
+ function of the optimization variable ``theta``, used to build the convex
75
+ primal program whose optimum defines f_star.
76
+
77
+ :param theta: the cvxpy variable of the packed coefficients and intercept.
78
+ :return: the cvxpy expression of sum(loss(y, X theta)).
79
+ """
80
+ raise NotImplementedError
81
+
82
+ def function(self, packed_coef_inter, X_batch=None, y_batch=None):
83
+ if X_batch is None:
84
+ X_batch = self.X
85
+ if y_batch is None:
86
+ y_batch = self.y
87
+
88
+ n_samples = X_batch.shape[0]
89
+ y_pred = np.dot(X_batch, packed_coef_inter) # svm decision function
90
+ return (1 / (2 * n_samples) * np.linalg.norm(packed_coef_inter) ** 2 + # regularization term
91
+ self.svm.C / n_samples * np.sum(self.loss(y_pred, y_batch))) # loss term
92
+
93
+ def loss(self, y_pred, y_true):
94
+ raise NotImplementedError
95
+
96
+ def jacobian(self, packed_coef_inter, X_batch=None, y_batch=None):
97
+ if X_batch is None:
98
+ X_batch = self.X
99
+ if y_batch is None:
100
+ y_batch = self.y
101
+
102
+ n_samples = X_batch.shape[0]
103
+ return ((1 / n_samples) * packed_coef_inter - # jacobian wrt the regularization term
104
+ self.svm.C / n_samples * self.loss_jacobian(
105
+ packed_coef_inter, X_batch, y_batch)) # jacobian wrt the loss term
106
+
107
+ def loss_jacobian(self, packed_coef_inter, X_batch, y_batch):
108
+ raise NotImplementedError
109
+
110
+ def step_size(self, X_batch, y_batch):
111
+ raise NotImplementedError
112
+
113
+
114
+ class Hinge(SVMLoss):
115
+ r"""
116
+ Compute the hinge loss for classification as:
117
+
118
+ .. math::
119
+
120
+ L(y_{pred}, y_{true}) = \max(0, 1 - y_{true} \, y_{pred})
121
+ """
122
+
123
+ _loss_type = 'classifier'
124
+
125
+ def loss(self, y_pred, y_true):
126
+ return np.maximum(0, 1 - y_true * y_pred)
127
+
128
+ def _cvxpy_loss(self, theta):
129
+ return cp.sum(cp.pos(1 - cp.multiply(self.y, self.X @ theta)))
130
+
131
+ def loss_jacobian(self, packed_coef_inter, X_batch, y_batch):
132
+ y_pred = np.dot(X_batch, packed_coef_inter) # svm decision function
133
+ idx = np.argwhere(y_batch * y_pred < 1.).ravel()
134
+ return np.dot(y_batch[idx], X_batch[idx])
135
+
136
+ def step_size(self, X_batch, y_batch):
137
+ if np.array_equal(X_batch, self.X): # no mini batches
138
+ if not hasattr(self, '_step_size'):
139
+ n_samples = self.X.shape[0]
140
+ L = self.svm.C / n_samples * np.linalg.norm(self.X) ** 2
141
+ self._step_size = 1 / L
142
+ yield self._step_size
143
+ else:
144
+ n_samples = X_batch.shape[0]
145
+ L = self.svm.C / n_samples * np.linalg.norm(X_batch) ** 2
146
+ yield 1 / L
147
+
148
+
149
+ class SquaredHinge(Hinge):
150
+ r"""
151
+ Compute the squared hinge loss for classification as:
152
+
153
+ .. math::
154
+
155
+ L(y_{pred}, y_{true}) = \max(0, 1 - y_{true} \, y_{pred})^2
156
+ """
157
+
158
+ def loss(self, y_pred, y_true):
159
+ return np.square(super(SquaredHinge, self).loss(y_pred, y_true))
160
+
161
+ def _cvxpy_loss(self, theta):
162
+ return cp.sum(cp.square(cp.pos(1 - cp.multiply(self.y, self.X @ theta))))
163
+
164
+ def loss_jacobian(self, packed_coef_inter, X_batch, y_batch):
165
+ y_pred = np.dot(X_batch, packed_coef_inter) # svm decision function
166
+ idx = np.argwhere(y_batch * y_pred < 1.).ravel()
167
+ return 2 * np.dot(np.maximum(0, 1 - y_batch[idx] * y_pred[idx]) * y_batch[idx], X_batch[idx])
168
+
169
+ def step_size(self, X_batch, y_batch):
170
+ if np.array_equal(X_batch, self.X): # no mini batches
171
+ if not hasattr(self, '_step_size'):
172
+ mu = 1
173
+ n_samples = self.X.shape[0]
174
+ L = (1 / n_samples * mu + # Lipschitz constant wrt the regularization term (strictly convex)
175
+ self.svm.C / n_samples * np.linalg.norm(self.X) ** 2) # Lipschitz constant wrt the loss term
176
+ self._step_size = 1 / L
177
+ yield self._step_size
178
+ else:
179
+ mu = 1
180
+ n_samples = X_batch.shape[0]
181
+ L = (1 / n_samples * mu + # Lipschitz constant wrt the regularization term (strictly convex)
182
+ self.svm.C / n_samples * np.linalg.norm(X_batch) ** 2) # Lipschitz constant wrt the loss term
183
+ yield 1 / L
184
+
185
+
186
+ class EpsilonInsensitive(SVMLoss):
187
+ r"""
188
+ Compute the epsilon-insensitive loss for regression as:
189
+
190
+ .. math::
191
+
192
+ L(y_{pred}, y_{true}) = \max(0, \lvert y_{true} - y_{pred} \rvert - \epsilon)
193
+ """
194
+
195
+ _loss_type = 'regressor'
196
+
197
+ def __init__(self, svm, X, y, epsilon):
198
+ """
199
+ Parameters
200
+ ----------
201
+
202
+ svm : `SVM` instance
203
+ The SVM estimator this loss is attached to.
204
+
205
+ X : ndarray of shape (n_samples, n_features)
206
+ Training data over which the loss is evaluated.
207
+
208
+ y : ndarray of shape (n_samples,)
209
+ Target values associated with ``X``.
210
+
211
+ epsilon : float
212
+ Width of the epsilon-tube within which no penalty is associated
213
+ with points predicted within a distance epsilon from the actual value.
214
+ """
215
+ super(EpsilonInsensitive, self).__init__(svm, X, y)
216
+ self.epsilon = epsilon
217
+
218
+ def loss(self, y_pred, y_true):
219
+ return np.maximum(0, np.abs(y_true - y_pred) - self.epsilon)
220
+
221
+ def _cvxpy_loss(self, theta):
222
+ return cp.sum(cp.pos(cp.abs(self.y - self.X @ theta) - self.epsilon))
223
+
224
+ def loss_jacobian(self, packed_coef_inter, X_batch, y_batch):
225
+ y_pred = np.dot(X_batch, packed_coef_inter) # svm decision function
226
+ idx = np.argwhere(np.abs(y_batch - y_pred) >= self.epsilon).ravel()
227
+ z = y_batch[idx] - y_pred[idx]
228
+ return np.dot(np.sign(z), X_batch[idx]) # or np.dot(np.divide(z, np.abs(z)), X_batch[idx])
229
+
230
+ def step_size(self, X_batch, y_batch):
231
+ if np.array_equal(X_batch, self.X): # no mini batches
232
+ if not hasattr(self, '_step_size'):
233
+ n_samples = self.X.shape[0]
234
+ L = self.svm.C / n_samples * np.linalg.norm(self.X) ** 2
235
+ self._step_size = 1 / L
236
+ yield self._step_size
237
+ else:
238
+ n_samples = X_batch.shape[0]
239
+ L = self.svm.C / n_samples * np.linalg.norm(X_batch) ** 2
240
+ yield 1 / L
241
+
242
+
243
+ class SquaredEpsilonInsensitive(EpsilonInsensitive):
244
+ r"""
245
+ Compute the squared epsilon-insensitive loss for regression as:
246
+
247
+ .. math::
248
+
249
+ L(y_{pred}, y_{true}) = \max(0, \lvert y_{true} - y_{pred} \rvert - \epsilon)^2
250
+ """
251
+
252
+ def loss(self, y_pred, y_true):
253
+ return np.square(super(SquaredEpsilonInsensitive, self).loss(y_pred, y_true))
254
+
255
+ def _cvxpy_loss(self, theta):
256
+ return cp.sum(cp.square(cp.pos(cp.abs(self.y - self.X @ theta) - self.epsilon)))
257
+
258
+ def loss_jacobian(self, packed_coef_inter, X_batch, y_batch):
259
+ y_pred = np.dot(X_batch, packed_coef_inter) # svm decision function
260
+ idx = np.argwhere(np.abs(y_batch - y_pred) >= self.epsilon).ravel()
261
+ z = y_batch[idx] - y_pred[idx]
262
+ return 2 * np.dot(np.sign(z) * (np.abs(z) - self.epsilon), X_batch[idx])
263
+
264
+ def step_size(self, X_batch, y_batch):
265
+ if np.array_equal(X_batch, self.X): # no mini batches
266
+ if not hasattr(self, '_step_size'):
267
+ mu = 1
268
+ n_samples = self.X.shape[0]
269
+ L = (1 / n_samples * mu + # Lipschitz constant wrt the regularization term (strictly convex)
270
+ self.svm.C / n_samples * np.linalg.norm(self.X) ** 2) # Lipschitz constant wrt the loss term
271
+ self._step_size = 1 / L
272
+ yield self._step_size
273
+ else:
274
+ mu = 1
275
+ n_samples = X_batch.shape[0]
276
+ L = (1 / n_samples * mu + # Lipschitz constant wrt the regularization term (strictly convex)
277
+ self.svm.C / n_samples * np.linalg.norm(X_batch) ** 2) # Lipschitz constant wrt the loss term
278
+ yield 1 / L
279
+
280
+
281
+ hinge = Hinge
282
+ squared_hinge = SquaredHinge
283
+ epsilon_insensitive = EpsilonInsensitive
284
+ squared_epsilon_insensitive = SquaredEpsilonInsensitive