optiml 1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. optiml/__init__.py +0 -0
  2. optiml/ml/__init__.py +0 -0
  3. optiml/ml/neural_network/__init__.py +3 -0
  4. optiml/ml/neural_network/_base.py +475 -0
  5. optiml/ml/neural_network/activations.py +79 -0
  6. optiml/ml/neural_network/initializers.py +66 -0
  7. optiml/ml/neural_network/layers.py +183 -0
  8. optiml/ml/neural_network/losses.py +178 -0
  9. optiml/ml/neural_network/regularizers.py +87 -0
  10. optiml/ml/svm/__init__.py +3 -0
  11. optiml/ml/svm/_base.py +1442 -0
  12. optiml/ml/svm/kernels.py +208 -0
  13. optiml/ml/svm/losses.py +284 -0
  14. optiml/ml/svm/smo.py +797 -0
  15. optiml/ml/tests/__init__.py +0 -0
  16. optiml/ml/tests/_datasets.py +49 -0
  17. optiml/ml/tests/_utils.py +28 -0
  18. optiml/ml/tests/test_initializers.py +33 -0
  19. optiml/ml/tests/test_neural_network.py +86 -0
  20. optiml/ml/tests/test_svc.py +245 -0
  21. optiml/ml/tests/test_svr.py +256 -0
  22. optiml/ml/utils.py +252 -0
  23. optiml/opti/__init__.py +4 -0
  24. optiml/opti/_base.py +309 -0
  25. optiml/opti/constrained/__init__.py +9 -0
  26. optiml/opti/constrained/_base.py +404 -0
  27. optiml/opti/constrained/active_set.py +228 -0
  28. optiml/opti/constrained/frank_wolfe.py +158 -0
  29. optiml/opti/constrained/interior_point.py +282 -0
  30. optiml/opti/constrained/projected_gradient.py +138 -0
  31. optiml/opti/constrained/tests/__init__.py +0 -0
  32. optiml/opti/constrained/tests/test_active_set.py +16 -0
  33. optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
  34. optiml/opti/constrained/tests/test_interior_point.py +16 -0
  35. optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
  36. optiml/opti/constrained/tests/test_lower_bound.py +29 -0
  37. optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
  38. optiml/opti/unconstrained/__init__.py +6 -0
  39. optiml/opti/unconstrained/_base.py +63 -0
  40. optiml/opti/unconstrained/line_search/__init__.py +10 -0
  41. optiml/opti/unconstrained/line_search/_base.py +106 -0
  42. optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
  43. optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
  44. optiml/opti/unconstrained/line_search/line_search.py +248 -0
  45. optiml/opti/unconstrained/line_search/newton.py +198 -0
  46. optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
  47. optiml/opti/unconstrained/proximal_bundle.py +219 -0
  48. optiml/opti/unconstrained/stochastic/__init__.py +12 -0
  49. optiml/opti/unconstrained/stochastic/_base.py +246 -0
  50. optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
  51. optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
  52. optiml/opti/unconstrained/stochastic/adam.py +179 -0
  53. optiml/opti/unconstrained/stochastic/adamax.py +178 -0
  54. optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
  55. optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
  56. optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
  57. optiml/opti/unconstrained/stochastic/schedules.py +89 -0
  58. optiml/opti/unconstrained/tests/__init__.py +0 -0
  59. optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
  60. optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
  61. optiml/opti/unconstrained/tests/test_adam.py +42 -0
  62. optiml/opti/unconstrained/tests/test_adamax.py +41 -0
  63. optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
  64. optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
  65. optiml/opti/unconstrained/tests/test_functions.py +34 -0
  66. optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
  67. optiml/opti/unconstrained/tests/test_newton.py +20 -0
  68. optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
  69. optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
  70. optiml/opti/unconstrained/tests/test_verbose.py +25 -0
  71. optiml/opti/utils.py +353 -0
  72. optiml-1.7.dist-info/METADATA +203 -0
  73. optiml-1.7.dist-info/RECORD +76 -0
  74. optiml-1.7.dist-info/WHEEL +5 -0
  75. optiml-1.7.dist-info/licenses/LICENSE +21 -0
  76. optiml-1.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,183 @@
1
+ from abc import ABC
2
+
3
+ import autograd.numpy as np
4
+
5
+ from .activations import Activation, linear
6
+ from .initializers import glorot_uniform
7
+ from .regularizers import l2
8
+
9
+
10
+ class Layer(ABC):
11
+ """
12
+ Base abstract class for all neural network layers. A layer implements
13
+ the ``forward`` pass that maps its input to its output and the
14
+ ``backward`` pass that back-propagates the error signal.
15
+ """
16
+
17
+ def forward(self, X):
18
+ raise NotImplementedError
19
+
20
+ def backward(self, delta):
21
+ raise NotImplementedError
22
+
23
+
24
+ class ParamLayer(Layer, ABC):
25
+ """
26
+ Base abstract class for all layers with trainable parameters, i.e., a
27
+ coefficient (weight) tensor and, optionally, an intercept (bias) tensor,
28
+ each with its own initializer and regularizer.
29
+ """
30
+
31
+ def __init__(self,
32
+ coef_shape,
33
+ activation,
34
+ coef_init,
35
+ inter_init,
36
+ coef_reg,
37
+ inter_reg,
38
+ fit_intercept,
39
+ random_state=None):
40
+ """
41
+ Parameters
42
+ ----------
43
+
44
+ coef_shape : tuple of int
45
+ Shape of the coefficient (weight) tensor.
46
+
47
+ activation : `Activation` instance
48
+ The activation function applied by the layer.
49
+
50
+ coef_init : callable, array-like or None
51
+ Initializer for the coefficient tensor. If None, `glorot_uniform`
52
+ is used; if callable, it is called with ``coef_shape`` and
53
+ ``random_state``; otherwise it is used as the initial values.
54
+
55
+ inter_init : callable, array-like or None
56
+ Initializer for the intercept tensor. If None, zeros are used;
57
+ if callable, it is called with the intercept shape; otherwise it
58
+ is used as the initial values. Only used when ``fit_intercept`` is True.
59
+
60
+ coef_reg : `Regularizer` instance or None
61
+ Regularizer applied to the coefficient tensor. If None, `l2` is used.
62
+
63
+ inter_reg : `Regularizer` instance or None
64
+ Regularizer applied to the intercept tensor. If None, `l2` is used.
65
+
66
+ fit_intercept : bool
67
+ Whether the layer has an intercept (bias) term.
68
+
69
+ random_state : int, RandomState instance or None, default=None
70
+ Controls the pseudo random number generation for the parameters
71
+ initialization.
72
+ """
73
+
74
+ if isinstance(activation, Activation):
75
+ self.activation = activation
76
+ else:
77
+ raise TypeError(f'{activation} is not an allowed activation function')
78
+
79
+ if coef_init is None:
80
+ self.coef_ = glorot_uniform(coef_shape, random_state=random_state)
81
+ elif callable(coef_init):
82
+ self.coef_ = coef_init(coef_shape, random_state=random_state)
83
+ else:
84
+ self.coef_ = np.asarray(coef_init, dtype=float).reshape(-1, 1)
85
+
86
+ self.fit_intercept = fit_intercept
87
+ if self.fit_intercept:
88
+ shape = [1] * len(coef_shape)
89
+ shape[-1] = coef_shape[-1]
90
+ if inter_init is None:
91
+ self.inter_ = np.zeros(shape)
92
+ elif callable(inter_init):
93
+ self.inter_ = inter_init(shape)
94
+ else:
95
+ self.inter_ = np.asarray(inter_init, dtype=float).reshape(-1, 1)
96
+
97
+ if coef_reg is None:
98
+ self.coef_reg = l2
99
+ else:
100
+ self.coef_reg = coef_reg
101
+
102
+ if inter_reg is None:
103
+ self.inter_reg = l2
104
+ else:
105
+ self.inter_reg = inter_reg
106
+
107
+
108
+ class FullyConnected(ParamLayer):
109
+ """
110
+ Fully connected (dense) layer that computes ``activation(X @ W + b)``.
111
+ """
112
+
113
+ def __init__(self,
114
+ n_in,
115
+ n_out,
116
+ activation=linear,
117
+ coef_init=glorot_uniform,
118
+ inter_init=np.zeros,
119
+ coef_reg=l2,
120
+ inter_reg=l2,
121
+ fit_intercept=True,
122
+ random_state=None):
123
+ """
124
+ Parameters
125
+ ----------
126
+
127
+ n_in : int
128
+ Number of input units (fan-in) of the layer.
129
+
130
+ n_out : int
131
+ Number of output units (fan-out) of the layer, i.e., the number
132
+ of neurons.
133
+
134
+ activation : `Activation` instance, default=linear
135
+ The activation function applied by the layer.
136
+
137
+ coef_init : callable or array-like, default=glorot_uniform
138
+ Initializer for the coefficient (weight) tensor.
139
+
140
+ inter_init : callable or array-like, default=np.zeros
141
+ Initializer for the intercept (bias) tensor. Only used when
142
+ ``fit_intercept`` is True.
143
+
144
+ coef_reg : `Regularizer` instance, default=l2
145
+ Regularizer applied to the coefficient tensor.
146
+
147
+ inter_reg : `Regularizer` instance, default=l2
148
+ Regularizer applied to the intercept tensor.
149
+
150
+ fit_intercept : bool, default=True
151
+ Whether to add an intercept (bias) term to the layer.
152
+
153
+ random_state : int, RandomState instance or None, default=None
154
+ Controls the pseudo random number generation for the parameters
155
+ initialization.
156
+ """
157
+ super(FullyConnected, self).__init__(coef_shape=(n_in, n_out),
158
+ activation=activation,
159
+ coef_init=coef_init,
160
+ inter_init=inter_init,
161
+ coef_reg=coef_reg,
162
+ inter_reg=inter_reg,
163
+ fit_intercept=fit_intercept,
164
+ random_state=random_state)
165
+ self.fan_in = n_in
166
+ self.fan_out = n_out
167
+
168
+ def forward(self, X):
169
+ self._X = X
170
+ self._WX_b = np.dot(self._X, self.coef_)
171
+ if self.fit_intercept:
172
+ self._WX_b += self.inter_
173
+ return self.activation(self._WX_b)
174
+
175
+ def backward(self, delta):
176
+ # dW, db
177
+ dZ = delta * self.activation.jacobian(self._WX_b)
178
+ grads = {'dW': self._X.T.dot(dZ)}
179
+ if self.fit_intercept:
180
+ grads['db'] = np.sum(dZ, axis=0, keepdims=True)
181
+ # dX
182
+ dX = dZ.dot(self.coef_.T)
183
+ return dX, grads
@@ -0,0 +1,178 @@
1
+ from abc import ABC
2
+
3
+ import autograd.numpy as np
4
+ from scipy.special import xlogy
5
+
6
+ from .activations import Linear
7
+ from .layers import ParamLayer
8
+ from .regularizers import L2
9
+ from ...opti import OptimizationFunction
10
+
11
+
12
+ class NeuralNetworkLoss(OptimizationFunction, ABC):
13
+ """
14
+ Base abstract class for all neural network loss functions. It defines the
15
+ objective minimized during training, i.e., the data loss averaged over the
16
+ samples plus the layers regularization terms, together with its jacobian
17
+ computed via back-propagation.
18
+
19
+ Subclasses must implement ``loss`` and, optionally, override ``delta``.
20
+ """
21
+
22
+ def __init__(self, neural_net, X, y):
23
+ """
24
+ Parameters
25
+ ----------
26
+
27
+ neural_net : `NeuralNetwork` instance
28
+ The neural network estimator this loss is attached to. It provides
29
+ the layers and the forward/backward passes used by the objective.
30
+
31
+ X : ndarray of shape (n_samples, n_features)
32
+ Training data over which the loss is evaluated.
33
+
34
+ y : ndarray of shape (n_samples, n_outputs)
35
+ Target values associated with ``X``.
36
+ """
37
+ super(NeuralNetworkLoss, self).__init__(X.shape[1])
38
+ self.neural_net = neural_net
39
+ self.X = X
40
+ self.y = y
41
+
42
+ def args(self):
43
+ return self.X, self.y
44
+
45
+ def loss(self, y_pred, y_true):
46
+ raise NotImplementedError
47
+
48
+ def delta(self, y_pred, y_true):
49
+ return y_pred - y_true
50
+
51
+ def function(self, packed_coef_inter, X_batch=None, y_batch=None):
52
+ if X_batch is None:
53
+ X_batch = self.X
54
+ if y_batch is None:
55
+ y_batch = self.y
56
+
57
+ self.neural_net._unpack(packed_coef_inter)
58
+
59
+ n_samples = X_batch.shape[0]
60
+ coef_regs = sum(layer.coef_reg(layer.coef_) for layer in self.neural_net.layers
61
+ if isinstance(layer, ParamLayer)) / (2 * n_samples)
62
+ inter_regs = sum(layer.inter_reg(layer.inter_) for layer in self.neural_net.layers
63
+ if isinstance(layer, ParamLayer) and layer.fit_intercept) / (2 * n_samples)
64
+ return 1 / (2 * n_samples) * self.loss(self.neural_net.forward(X_batch), y_batch) + coef_regs + inter_regs
65
+
66
+ def jacobian(self, packed_coef_inter, X_batch=None, y_batch=None):
67
+ if X_batch is None:
68
+ X_batch = self.X
69
+ if y_batch is None:
70
+ y_batch = self.y
71
+
72
+ self.neural_net._unpack(packed_coef_inter)
73
+
74
+ n_samples = X_batch.shape[0]
75
+ delta = 1 / n_samples * self.delta(self.neural_net.forward(X_batch), y_batch)
76
+ return self.neural_net._pack(*self.neural_net.backward(delta))
77
+
78
+
79
+ class MeanSquaredError(NeuralNetworkLoss):
80
+ r"""
81
+ Compute the mean squared error loss for regression as:
82
+
83
+ .. math::
84
+
85
+ L(y_{pred}, y_{true}) = \sum (y_{pred} - y_{true})^2
86
+ """
87
+
88
+ def x_star(self):
89
+ if (len(self.neural_net.layers) == 1 and
90
+ isinstance(self.neural_net.layers[-1].activation, Linear) and
91
+ isinstance(self.neural_net.layers[-1].coef_reg, L2) and
92
+ not self.neural_net.layers[-1].fit_intercept):
93
+ if not hasattr(self, 'x_opt'):
94
+ if self.neural_net.layers[-1].coef_reg.lmbda == 0.:
95
+ self.x_opt = np.linalg.inv(self.X.T.dot(self.X)).dot(self.X.T).dot(self.y)
96
+ else:
97
+ self.x_opt = np.linalg.inv(self.X.T.dot(self.X) + np.eye(self.ndim) *
98
+ self.neural_net.layers[-1].coef_reg.lmbda).dot(self.X.T).dot(self.y)
99
+ return self.x_opt
100
+ return np.full(fill_value=np.nan, shape=self.ndim)
101
+
102
+ def f_star(self):
103
+ if not np.isnan(self.x_star()).all():
104
+ return self.function(self.x_star())
105
+ return np.inf
106
+
107
+ def loss(self, y_pred, y_true):
108
+ return np.sum(np.square(y_pred - y_true))
109
+
110
+
111
+ class MeanAbsoluteError(NeuralNetworkLoss):
112
+ r"""
113
+ Compute the mean absolute error loss for regression as:
114
+
115
+ .. math::
116
+
117
+ L(y_{pred}, y_{true}) = \sum \lvert y_{pred} - y_{true} \rvert
118
+ """
119
+
120
+ def loss(self, y_pred, y_true):
121
+ return np.sum(np.abs(y_pred - y_true))
122
+
123
+ def delta(self, y_pred, y_true):
124
+ return np.sign(y_pred - y_true)
125
+
126
+
127
+ class BinaryCrossEntropy(NeuralNetworkLoss):
128
+ r"""Binary Cross-Entropy aka Sigmoid Cross-Entropy loss
129
+ function for binary and multi-label classification
130
+ or regression between 0 and 1 with sigmoid output layer:
131
+
132
+ .. math::
133
+
134
+ L(y_{pred}, y_{true}) = -\sum \left[ y_{true} \log(y_{pred}) +
135
+ (1 - y_{true}) \log(1 - y_{pred}) \right]
136
+ """
137
+
138
+ def loss(self, y_pred, y_true):
139
+ return -np.sum(xlogy(y_true, y_pred) + xlogy(1. - y_true, 1. - y_pred))
140
+
141
+
142
+ class CategoricalCrossEntropy(NeuralNetworkLoss):
143
+ r"""Categorical Cross-Entropy loss function for multi-class (single-label)
144
+ classification with softmax output layer and one-hot encoded target data:
145
+
146
+ .. math::
147
+
148
+ L(y_{pred}, y_{true}) = -\sum y_{true} \log(y_{pred})
149
+ """
150
+
151
+ def loss(self, y_pred, y_true):
152
+ return -np.sum(xlogy(y_true, y_pred))
153
+
154
+ def delta(self, y_pred, y_true):
155
+ # according to: https://deepnotes.io/softmax-crossentropy
156
+ one_hot_mask = y_true.astype(bool)
157
+ y_pred[one_hot_mask] -= 1.
158
+ return y_pred
159
+
160
+
161
+ class SparseCategoricalCrossEntropy(NeuralNetworkLoss):
162
+ """Sparse Categorical Cross-Entropy loss function for multi-class
163
+ (single-label) classification with softmax output layer"""
164
+
165
+ def loss(self, y_pred, y_true):
166
+ assert y_pred.shape[0] == y_true.shape[0]
167
+ return -np.sum(np.log(y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()]))
168
+
169
+ def delta(self, y_pred, y_true):
170
+ y_pred[np.arange(y_pred.shape[0]), y_true.astype(int).ravel()] -= 1.
171
+ return y_pred
172
+
173
+
174
+ mean_squared_error = MeanSquaredError
175
+ mean_absolute_error = MeanAbsoluteError
176
+ binary_cross_entropy = BinaryCrossEntropy
177
+ categorical_cross_entropy = CategoricalCrossEntropy
178
+ sparse_categorical_cross_entropy = SparseCategoricalCrossEntropy
@@ -0,0 +1,87 @@
1
+ from abc import ABC
2
+
3
+ import numpy as np
4
+
5
+
6
+ class Regularizer(ABC):
7
+ """
8
+ Base abstract class for all regularizers. A regularizer penalizes the
9
+ magnitude of the parameters and exposes both its ``function`` and its
10
+ ``jacobian``.
11
+ """
12
+
13
+ def __init__(self, lmbda=0.):
14
+ """
15
+ Parameters
16
+ ----------
17
+
18
+ lmbda : float, default=0.
19
+ Regularization strength. The higher the value, the stronger
20
+ the penalty on the parameters.
21
+ """
22
+ self.lmbda = lmbda
23
+
24
+ def function(self, theta):
25
+ raise NotImplementedError
26
+
27
+ def jacobian(self, theta):
28
+ raise NotImplementedError
29
+
30
+ def __call__(self, theta):
31
+ return self.function(theta)
32
+
33
+
34
+ class L1(Regularizer):
35
+ r"""
36
+ L1 (Lasso) regularizer:
37
+
38
+ .. math::
39
+
40
+ R(\theta) = \lambda \sum \lvert \theta \rvert
41
+ """
42
+
43
+ def __init__(self, lmbda=0.):
44
+ """
45
+ Parameters
46
+ ----------
47
+
48
+ lmbda : float, default=0.
49
+ Regularization strength.
50
+ """
51
+ super(L1, self).__init__(lmbda)
52
+
53
+ def function(self, theta):
54
+ return self.lmbda * np.sum(np.abs(theta))
55
+
56
+ def jacobian(self, theta):
57
+ return self.lmbda * np.sign(theta)
58
+
59
+
60
+ class L2(Regularizer):
61
+ r"""
62
+ L2 (Ridge) regularizer:
63
+
64
+ .. math::
65
+
66
+ R(\theta) = \lambda \sum \theta^2
67
+ """
68
+
69
+ def __init__(self, lmbda=0.):
70
+ """
71
+ Parameters
72
+ ----------
73
+
74
+ lmbda : float, default=0.
75
+ Regularization strength.
76
+ """
77
+ super(L2, self).__init__(lmbda)
78
+
79
+ def function(self, theta):
80
+ return self.lmbda * np.sum(np.square(theta))
81
+
82
+ def jacobian(self, theta):
83
+ return self.lmbda * theta
84
+
85
+
86
+ l1 = L1()
87
+ l2 = L2()
@@ -0,0 +1,3 @@
1
+ __all__ = ['SVM', 'SVC', 'SVR']
2
+
3
+ from ._base import SVM, SVC, SVR