optiml 1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiml/__init__.py +0 -0
- optiml/ml/__init__.py +0 -0
- optiml/ml/neural_network/__init__.py +3 -0
- optiml/ml/neural_network/_base.py +475 -0
- optiml/ml/neural_network/activations.py +79 -0
- optiml/ml/neural_network/initializers.py +66 -0
- optiml/ml/neural_network/layers.py +183 -0
- optiml/ml/neural_network/losses.py +178 -0
- optiml/ml/neural_network/regularizers.py +87 -0
- optiml/ml/svm/__init__.py +3 -0
- optiml/ml/svm/_base.py +1442 -0
- optiml/ml/svm/kernels.py +208 -0
- optiml/ml/svm/losses.py +284 -0
- optiml/ml/svm/smo.py +797 -0
- optiml/ml/tests/__init__.py +0 -0
- optiml/ml/tests/_datasets.py +49 -0
- optiml/ml/tests/_utils.py +28 -0
- optiml/ml/tests/test_initializers.py +33 -0
- optiml/ml/tests/test_neural_network.py +86 -0
- optiml/ml/tests/test_svc.py +245 -0
- optiml/ml/tests/test_svr.py +256 -0
- optiml/ml/utils.py +252 -0
- optiml/opti/__init__.py +4 -0
- optiml/opti/_base.py +309 -0
- optiml/opti/constrained/__init__.py +9 -0
- optiml/opti/constrained/_base.py +404 -0
- optiml/opti/constrained/active_set.py +228 -0
- optiml/opti/constrained/frank_wolfe.py +158 -0
- optiml/opti/constrained/interior_point.py +282 -0
- optiml/opti/constrained/projected_gradient.py +138 -0
- optiml/opti/constrained/tests/__init__.py +0 -0
- optiml/opti/constrained/tests/test_active_set.py +16 -0
- optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
- optiml/opti/constrained/tests/test_interior_point.py +16 -0
- optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
- optiml/opti/constrained/tests/test_lower_bound.py +29 -0
- optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
- optiml/opti/unconstrained/__init__.py +6 -0
- optiml/opti/unconstrained/_base.py +63 -0
- optiml/opti/unconstrained/line_search/__init__.py +10 -0
- optiml/opti/unconstrained/line_search/_base.py +106 -0
- optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
- optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
- optiml/opti/unconstrained/line_search/line_search.py +248 -0
- optiml/opti/unconstrained/line_search/newton.py +198 -0
- optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
- optiml/opti/unconstrained/proximal_bundle.py +219 -0
- optiml/opti/unconstrained/stochastic/__init__.py +12 -0
- optiml/opti/unconstrained/stochastic/_base.py +246 -0
- optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
- optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
- optiml/opti/unconstrained/stochastic/adam.py +179 -0
- optiml/opti/unconstrained/stochastic/adamax.py +178 -0
- optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
- optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
- optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
- optiml/opti/unconstrained/stochastic/schedules.py +89 -0
- optiml/opti/unconstrained/tests/__init__.py +0 -0
- optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
- optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
- optiml/opti/unconstrained/tests/test_adam.py +42 -0
- optiml/opti/unconstrained/tests/test_adamax.py +41 -0
- optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
- optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
- optiml/opti/unconstrained/tests/test_functions.py +34 -0
- optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
- optiml/opti/unconstrained/tests/test_newton.py +20 -0
- optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
- optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
- optiml/opti/unconstrained/tests/test_verbose.py +25 -0
- optiml/opti/utils.py +353 -0
- optiml-1.7.dist-info/METADATA +203 -0
- optiml-1.7.dist-info/RECORD +76 -0
- optiml-1.7.dist-info/WHEEL +5 -0
- optiml-1.7.dist-info/licenses/LICENSE +21 -0
- optiml-1.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
|
|
3
|
+
import autograd.numpy as np
|
|
4
|
+
|
|
5
|
+
from .activations import Activation, linear
|
|
6
|
+
from .initializers import glorot_uniform
|
|
7
|
+
from .regularizers import l2
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Layer(ABC):
|
|
11
|
+
"""
|
|
12
|
+
Base abstract class for all neural network layers. A layer implements
|
|
13
|
+
the ``forward`` pass that maps its input to its output and the
|
|
14
|
+
``backward`` pass that back-propagates the error signal.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def forward(self, X):
|
|
18
|
+
raise NotImplementedError
|
|
19
|
+
|
|
20
|
+
def backward(self, delta):
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ParamLayer(Layer, ABC):
|
|
25
|
+
"""
|
|
26
|
+
Base abstract class for all layers with trainable parameters, i.e., a
|
|
27
|
+
coefficient (weight) tensor and, optionally, an intercept (bias) tensor,
|
|
28
|
+
each with its own initializer and regularizer.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self,
|
|
32
|
+
coef_shape,
|
|
33
|
+
activation,
|
|
34
|
+
coef_init,
|
|
35
|
+
inter_init,
|
|
36
|
+
coef_reg,
|
|
37
|
+
inter_reg,
|
|
38
|
+
fit_intercept,
|
|
39
|
+
random_state=None):
|
|
40
|
+
"""
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
|
|
44
|
+
coef_shape : tuple of int
|
|
45
|
+
Shape of the coefficient (weight) tensor.
|
|
46
|
+
|
|
47
|
+
activation : `Activation` instance
|
|
48
|
+
The activation function applied by the layer.
|
|
49
|
+
|
|
50
|
+
coef_init : callable, array-like or None
|
|
51
|
+
Initializer for the coefficient tensor. If None, `glorot_uniform`
|
|
52
|
+
is used; if callable, it is called with ``coef_shape`` and
|
|
53
|
+
``random_state``; otherwise it is used as the initial values.
|
|
54
|
+
|
|
55
|
+
inter_init : callable, array-like or None
|
|
56
|
+
Initializer for the intercept tensor. If None, zeros are used;
|
|
57
|
+
if callable, it is called with the intercept shape; otherwise it
|
|
58
|
+
is used as the initial values. Only used when ``fit_intercept`` is True.
|
|
59
|
+
|
|
60
|
+
coef_reg : `Regularizer` instance or None
|
|
61
|
+
Regularizer applied to the coefficient tensor. If None, `l2` is used.
|
|
62
|
+
|
|
63
|
+
inter_reg : `Regularizer` instance or None
|
|
64
|
+
Regularizer applied to the intercept tensor. If None, `l2` is used.
|
|
65
|
+
|
|
66
|
+
fit_intercept : bool
|
|
67
|
+
Whether the layer has an intercept (bias) term.
|
|
68
|
+
|
|
69
|
+
random_state : int, RandomState instance or None, default=None
|
|
70
|
+
Controls the pseudo random number generation for the parameters
|
|
71
|
+
initialization.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
if isinstance(activation, Activation):
|
|
75
|
+
self.activation = activation
|
|
76
|
+
else:
|
|
77
|
+
raise TypeError(f'{activation} is not an allowed activation function')
|
|
78
|
+
|
|
79
|
+
if coef_init is None:
|
|
80
|
+
self.coef_ = glorot_uniform(coef_shape, random_state=random_state)
|
|
81
|
+
elif callable(coef_init):
|
|
82
|
+
self.coef_ = coef_init(coef_shape, random_state=random_state)
|
|
83
|
+
else:
|
|
84
|
+
self.coef_ = np.asarray(coef_init, dtype=float).reshape(-1, 1)
|
|
85
|
+
|
|
86
|
+
self.fit_intercept = fit_intercept
|
|
87
|
+
if self.fit_intercept:
|
|
88
|
+
shape = [1] * len(coef_shape)
|
|
89
|
+
shape[-1] = coef_shape[-1]
|
|
90
|
+
if inter_init is None:
|
|
91
|
+
self.inter_ = np.zeros(shape)
|
|
92
|
+
elif callable(inter_init):
|
|
93
|
+
self.inter_ = inter_init(shape)
|
|
94
|
+
else:
|
|
95
|
+
self.inter_ = np.asarray(inter_init, dtype=float).reshape(-1, 1)
|
|
96
|
+
|
|
97
|
+
if coef_reg is None:
|
|
98
|
+
self.coef_reg = l2
|
|
99
|
+
else:
|
|
100
|
+
self.coef_reg = coef_reg
|
|
101
|
+
|
|
102
|
+
if inter_reg is None:
|
|
103
|
+
self.inter_reg = l2
|
|
104
|
+
else:
|
|
105
|
+
self.inter_reg = inter_reg
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class FullyConnected(ParamLayer):
|
|
109
|
+
"""
|
|
110
|
+
Fully connected (dense) layer that computes ``activation(X @ W + b)``.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self,
|
|
114
|
+
n_in,
|
|
115
|
+
n_out,
|
|
116
|
+
activation=linear,
|
|
117
|
+
coef_init=glorot_uniform,
|
|
118
|
+
inter_init=np.zeros,
|
|
119
|
+
coef_reg=l2,
|
|
120
|
+
inter_reg=l2,
|
|
121
|
+
fit_intercept=True,
|
|
122
|
+
random_state=None):
|
|
123
|
+
"""
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
|
|
127
|
+
n_in : int
|
|
128
|
+
Number of input units (fan-in) of the layer.
|
|
129
|
+
|
|
130
|
+
n_out : int
|
|
131
|
+
Number of output units (fan-out) of the layer, i.e., the number
|
|
132
|
+
of neurons.
|
|
133
|
+
|
|
134
|
+
activation : `Activation` instance, default=linear
|
|
135
|
+
The activation function applied by the layer.
|
|
136
|
+
|
|
137
|
+
coef_init : callable or array-like, default=glorot_uniform
|
|
138
|
+
Initializer for the coefficient (weight) tensor.
|
|
139
|
+
|
|
140
|
+
inter_init : callable or array-like, default=np.zeros
|
|
141
|
+
Initializer for the intercept (bias) tensor. Only used when
|
|
142
|
+
``fit_intercept`` is True.
|
|
143
|
+
|
|
144
|
+
coef_reg : `Regularizer` instance, default=l2
|
|
145
|
+
Regularizer applied to the coefficient tensor.
|
|
146
|
+
|
|
147
|
+
inter_reg : `Regularizer` instance, default=l2
|
|
148
|
+
Regularizer applied to the intercept tensor.
|
|
149
|
+
|
|
150
|
+
fit_intercept : bool, default=True
|
|
151
|
+
Whether to add an intercept (bias) term to the layer.
|
|
152
|
+
|
|
153
|
+
random_state : int, RandomState instance or None, default=None
|
|
154
|
+
Controls the pseudo random number generation for the parameters
|
|
155
|
+
initialization.
|
|
156
|
+
"""
|
|
157
|
+
super(FullyConnected, self).__init__(coef_shape=(n_in, n_out),
|
|
158
|
+
activation=activation,
|
|
159
|
+
coef_init=coef_init,
|
|
160
|
+
inter_init=inter_init,
|
|
161
|
+
coef_reg=coef_reg,
|
|
162
|
+
inter_reg=inter_reg,
|
|
163
|
+
fit_intercept=fit_intercept,
|
|
164
|
+
random_state=random_state)
|
|
165
|
+
self.fan_in = n_in
|
|
166
|
+
self.fan_out = n_out
|
|
167
|
+
|
|
168
|
+
def forward(self, X):
|
|
169
|
+
self._X = X
|
|
170
|
+
self._WX_b = np.dot(self._X, self.coef_)
|
|
171
|
+
if self.fit_intercept:
|
|
172
|
+
self._WX_b += self.inter_
|
|
173
|
+
return self.activation(self._WX_b)
|
|
174
|
+
|
|
175
|
+
def backward(self, delta):
|
|
176
|
+
# dW, db
|
|
177
|
+
dZ = delta * self.activation.jacobian(self._WX_b)
|
|
178
|
+
grads = {'dW': self._X.T.dot(dZ)}
|
|
179
|
+
if self.fit_intercept:
|
|
180
|
+
grads['db'] = np.sum(dZ, axis=0, keepdims=True)
|
|
181
|
+
# dX
|
|
182
|
+
dX = dZ.dot(self.coef_.T)
|
|
183
|
+
return dX, grads
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
|
|
3
|
+
import autograd.numpy as np
|
|
4
|
+
from scipy.special import xlogy
|
|
5
|
+
|
|
6
|
+
from .activations import Linear
|
|
7
|
+
from .layers import ParamLayer
|
|
8
|
+
from .regularizers import L2
|
|
9
|
+
from ...opti import OptimizationFunction
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NeuralNetworkLoss(OptimizationFunction, ABC):
|
|
13
|
+
"""
|
|
14
|
+
Base abstract class for all neural network loss functions. It defines the
|
|
15
|
+
objective minimized during training, i.e., the data loss averaged over the
|
|
16
|
+
samples plus the layers regularization terms, together with its jacobian
|
|
17
|
+
computed via back-propagation.
|
|
18
|
+
|
|
19
|
+
Subclasses must implement ``loss`` and, optionally, override ``delta``.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, neural_net, X, y):
|
|
23
|
+
"""
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
|
|
27
|
+
neural_net : `NeuralNetwork` instance
|
|
28
|
+
The neural network estimator this loss is attached to. It provides
|
|
29
|
+
the layers and the forward/backward passes used by the objective.
|
|
30
|
+
|
|
31
|
+
X : ndarray of shape (n_samples, n_features)
|
|
32
|
+
Training data over which the loss is evaluated.
|
|
33
|
+
|
|
34
|
+
y : ndarray of shape (n_samples, n_outputs)
|
|
35
|
+
Target values associated with ``X``.
|
|
36
|
+
"""
|
|
37
|
+
super(NeuralNetworkLoss, self).__init__(X.shape[1])
|
|
38
|
+
self.neural_net = neural_net
|
|
39
|
+
self.X = X
|
|
40
|
+
self.y = y
|
|
41
|
+
|
|
42
|
+
def args(self):
|
|
43
|
+
return self.X, self.y
|
|
44
|
+
|
|
45
|
+
def loss(self, y_pred, y_true):
|
|
46
|
+
raise NotImplementedError
|
|
47
|
+
|
|
48
|
+
def delta(self, y_pred, y_true):
|
|
49
|
+
return y_pred - y_true
|
|
50
|
+
|
|
51
|
+
def function(self, packed_coef_inter, X_batch=None, y_batch=None):
|
|
52
|
+
if X_batch is None:
|
|
53
|
+
X_batch = self.X
|
|
54
|
+
if y_batch is None:
|
|
55
|
+
y_batch = self.y
|
|
56
|
+
|
|
57
|
+
self.neural_net._unpack(packed_coef_inter)
|
|
58
|
+
|
|
59
|
+
n_samples = X_batch.shape[0]
|
|
60
|
+
coef_regs = sum(layer.coef_reg(layer.coef_) for layer in self.neural_net.layers
|
|
61
|
+
if isinstance(layer, ParamLayer)) / (2 * n_samples)
|
|
62
|
+
inter_regs = sum(layer.inter_reg(layer.inter_) for layer in self.neural_net.layers
|
|
63
|
+
if isinstance(layer, ParamLayer) and layer.fit_intercept) / (2 * n_samples)
|
|
64
|
+
return 1 / (2 * n_samples) * self.loss(self.neural_net.forward(X_batch), y_batch) + coef_regs + inter_regs
|
|
65
|
+
|
|
66
|
+
def jacobian(self, packed_coef_inter, X_batch=None, y_batch=None):
|
|
67
|
+
if X_batch is None:
|
|
68
|
+
X_batch = self.X
|
|
69
|
+
if y_batch is None:
|
|
70
|
+
y_batch = self.y
|
|
71
|
+
|
|
72
|
+
self.neural_net._unpack(packed_coef_inter)
|
|
73
|
+
|
|
74
|
+
n_samples = X_batch.shape[0]
|
|
75
|
+
delta = 1 / n_samples * self.delta(self.neural_net.forward(X_batch), y_batch)
|
|
76
|
+
return self.neural_net._pack(*self.neural_net.backward(delta))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class MeanSquaredError(NeuralNetworkLoss):
|
|
80
|
+
r"""
|
|
81
|
+
Compute the mean squared error loss for regression as:
|
|
82
|
+
|
|
83
|
+
.. math::
|
|
84
|
+
|
|
85
|
+
L(y_{pred}, y_{true}) = \sum (y_{pred} - y_{true})^2
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
def x_star(self):
|
|
89
|
+
if (len(self.neural_net.layers) == 1 and
|
|
90
|
+
isinstance(self.neural_net.layers[-1].activation, Linear) and
|
|
91
|
+
isinstance(self.neural_net.layers[-1].coef_reg, L2) and
|
|
92
|
+
not self.neural_net.layers[-1].fit_intercept):
|
|
93
|
+
if not hasattr(self, 'x_opt'):
|
|
94
|
+
if self.neural_net.layers[-1].coef_reg.lmbda == 0.:
|
|
95
|
+
self.x_opt = np.linalg.inv(self.X.T.dot(self.X)).dot(self.X.T).dot(self.y)
|
|
96
|
+
else:
|
|
97
|
+
self.x_opt = np.linalg.inv(self.X.T.dot(self.X) + np.eye(self.ndim) *
|
|
98
|
+
self.neural_net.layers[-1].coef_reg.lmbda).dot(self.X.T).dot(self.y)
|
|
99
|
+
return self.x_opt
|
|
100
|
+
return np.full(fill_value=np.nan, shape=self.ndim)
|
|
101
|
+
|
|
102
|
+
def f_star(self):
|
|
103
|
+
if not np.isnan(self.x_star()).all():
|
|
104
|
+
return self.function(self.x_star())
|
|
105
|
+
return np.inf
|
|
106
|
+
|
|
107
|
+
def loss(self, y_pred, y_true):
|
|
108
|
+
return np.sum(np.square(y_pred - y_true))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class MeanAbsoluteError(NeuralNetworkLoss):
|
|
112
|
+
r"""
|
|
113
|
+
Compute the mean absolute error loss for regression as:
|
|
114
|
+
|
|
115
|
+
.. math::
|
|
116
|
+
|
|
117
|
+
L(y_{pred}, y_{true}) = \sum \lvert y_{pred} - y_{true} \rvert
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def loss(self, y_pred, y_true):
|
|
121
|
+
return np.sum(np.abs(y_pred - y_true))
|
|
122
|
+
|
|
123
|
+
def delta(self, y_pred, y_true):
|
|
124
|
+
return np.sign(y_pred - y_true)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class BinaryCrossEntropy(NeuralNetworkLoss):
|
|
128
|
+
r"""Binary Cross-Entropy aka Sigmoid Cross-Entropy loss
|
|
129
|
+
function for binary and multi-label classification
|
|
130
|
+
or regression between 0 and 1 with sigmoid output layer:
|
|
131
|
+
|
|
132
|
+
.. math::
|
|
133
|
+
|
|
134
|
+
L(y_{pred}, y_{true}) = -\sum \left[ y_{true} \log(y_{pred}) +
|
|
135
|
+
(1 - y_{true}) \log(1 - y_{pred}) \right]
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def loss(self, y_pred, y_true):
|
|
139
|
+
return -np.sum(xlogy(y_true, y_pred) + xlogy(1. - y_true, 1. - y_pred))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class CategoricalCrossEntropy(NeuralNetworkLoss):
|
|
143
|
+
r"""Categorical Cross-Entropy loss function for multi-class (single-label)
|
|
144
|
+
classification with softmax output layer and one-hot encoded target data:
|
|
145
|
+
|
|
146
|
+
.. math::
|
|
147
|
+
|
|
148
|
+
L(y_{pred}, y_{true}) = -\sum y_{true} \log(y_{pred})
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
def loss(self, y_pred, y_true):
|
|
152
|
+
return -np.sum(xlogy(y_true, y_pred))
|
|
153
|
+
|
|
154
|
+
def delta(self, y_pred, y_true):
|
|
155
|
+
# according to: https://deepnotes.io/softmax-crossentropy
|
|
156
|
+
one_hot_mask = y_true.astype(bool)
|
|
157
|
+
y_pred[one_hot_mask] -= 1.
|
|
158
|
+
return y_pred
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class SparseCategoricalCrossEntropy(NeuralNetworkLoss):
|
|
162
|
+
"""Sparse Categorical Cross-Entropy loss function for multi-class
|
|
163
|
+
(single-label) classification with softmax output layer"""
|
|
164
|
+
|
|
165
|
+
def loss(self, y_pred, y_true):
|
|
166
|
+
assert y_pred.shape[0] == y_true.shape[0]
|
|
167
|
+
return -np.sum(np.log(y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()]))
|
|
168
|
+
|
|
169
|
+
def delta(self, y_pred, y_true):
|
|
170
|
+
y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()] -= 1.
|
|
171
|
+
return y_pred
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
mean_squared_error = MeanSquaredError
|
|
175
|
+
mean_absolute_error = MeanAbsoluteError
|
|
176
|
+
binary_cross_entropy = BinaryCrossEntropy
|
|
177
|
+
categorical_cross_entropy = CategoricalCrossEntropy
|
|
178
|
+
sparse_categorical_cross_entropy = SparseCategoricalCrossEntropy
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Regularizer(ABC):
|
|
7
|
+
"""
|
|
8
|
+
Base abstract class for all regularizers. A regularizer penalizes the
|
|
9
|
+
magnitude of the parameters and exposes both its ``function`` and its
|
|
10
|
+
``jacobian``.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, lmbda=0.):
|
|
14
|
+
"""
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
|
|
18
|
+
lmbda : float, default=0.
|
|
19
|
+
Regularization strength. The higher the value, the stronger
|
|
20
|
+
the penalty on the parameters.
|
|
21
|
+
"""
|
|
22
|
+
self.lmbda = lmbda
|
|
23
|
+
|
|
24
|
+
def function(self, theta):
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
|
|
27
|
+
def jacobian(self, theta):
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
def __call__(self, theta):
|
|
31
|
+
return self.function(theta)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class L1(Regularizer):
|
|
35
|
+
r"""
|
|
36
|
+
L1 (Lasso) regularizer:
|
|
37
|
+
|
|
38
|
+
.. math::
|
|
39
|
+
|
|
40
|
+
R(\theta) = \lambda \sum \lvert \theta \rvert
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, lmbda=0.):
|
|
44
|
+
"""
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
|
|
48
|
+
lmbda : float, default=0.
|
|
49
|
+
Regularization strength.
|
|
50
|
+
"""
|
|
51
|
+
super(L1, self).__init__(lmbda)
|
|
52
|
+
|
|
53
|
+
def function(self, theta):
|
|
54
|
+
return self.lmbda * np.sum(np.abs(theta))
|
|
55
|
+
|
|
56
|
+
def jacobian(self, theta):
|
|
57
|
+
return self.lmbda * np.sign(theta)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class L2(Regularizer):
|
|
61
|
+
r"""
|
|
62
|
+
L2 (Ridge) regularizer:
|
|
63
|
+
|
|
64
|
+
.. math::
|
|
65
|
+
|
|
66
|
+
R(\theta) = \lambda \sum \theta^2
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self, lmbda=0.):
|
|
70
|
+
"""
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
|
|
74
|
+
lmbda : float, default=0.
|
|
75
|
+
Regularization strength.
|
|
76
|
+
"""
|
|
77
|
+
super(L2, self).__init__(lmbda)
|
|
78
|
+
|
|
79
|
+
def function(self, theta):
|
|
80
|
+
return self.lmbda * np.sum(np.square(theta))
|
|
81
|
+
|
|
82
|
+
def jacobian(self, theta):
|
|
83
|
+
return self.lmbda * theta
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
l1 = L1()
|
|
87
|
+
l2 = L2()
|