optiml 1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiml/__init__.py +0 -0
- optiml/ml/__init__.py +0 -0
- optiml/ml/neural_network/__init__.py +3 -0
- optiml/ml/neural_network/_base.py +475 -0
- optiml/ml/neural_network/activations.py +79 -0
- optiml/ml/neural_network/initializers.py +66 -0
- optiml/ml/neural_network/layers.py +183 -0
- optiml/ml/neural_network/losses.py +178 -0
- optiml/ml/neural_network/regularizers.py +87 -0
- optiml/ml/svm/__init__.py +3 -0
- optiml/ml/svm/_base.py +1442 -0
- optiml/ml/svm/kernels.py +208 -0
- optiml/ml/svm/losses.py +284 -0
- optiml/ml/svm/smo.py +797 -0
- optiml/ml/tests/__init__.py +0 -0
- optiml/ml/tests/_datasets.py +49 -0
- optiml/ml/tests/_utils.py +28 -0
- optiml/ml/tests/test_initializers.py +33 -0
- optiml/ml/tests/test_neural_network.py +86 -0
- optiml/ml/tests/test_svc.py +245 -0
- optiml/ml/tests/test_svr.py +256 -0
- optiml/ml/utils.py +252 -0
- optiml/opti/__init__.py +4 -0
- optiml/opti/_base.py +309 -0
- optiml/opti/constrained/__init__.py +9 -0
- optiml/opti/constrained/_base.py +404 -0
- optiml/opti/constrained/active_set.py +228 -0
- optiml/opti/constrained/frank_wolfe.py +158 -0
- optiml/opti/constrained/interior_point.py +282 -0
- optiml/opti/constrained/projected_gradient.py +138 -0
- optiml/opti/constrained/tests/__init__.py +0 -0
- optiml/opti/constrained/tests/test_active_set.py +16 -0
- optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
- optiml/opti/constrained/tests/test_interior_point.py +16 -0
- optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
- optiml/opti/constrained/tests/test_lower_bound.py +29 -0
- optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
- optiml/opti/unconstrained/__init__.py +6 -0
- optiml/opti/unconstrained/_base.py +63 -0
- optiml/opti/unconstrained/line_search/__init__.py +10 -0
- optiml/opti/unconstrained/line_search/_base.py +106 -0
- optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
- optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
- optiml/opti/unconstrained/line_search/line_search.py +248 -0
- optiml/opti/unconstrained/line_search/newton.py +198 -0
- optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
- optiml/opti/unconstrained/proximal_bundle.py +219 -0
- optiml/opti/unconstrained/stochastic/__init__.py +12 -0
- optiml/opti/unconstrained/stochastic/_base.py +246 -0
- optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
- optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
- optiml/opti/unconstrained/stochastic/adam.py +179 -0
- optiml/opti/unconstrained/stochastic/adamax.py +178 -0
- optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
- optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
- optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
- optiml/opti/unconstrained/stochastic/schedules.py +89 -0
- optiml/opti/unconstrained/tests/__init__.py +0 -0
- optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
- optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
- optiml/opti/unconstrained/tests/test_adam.py +42 -0
- optiml/opti/unconstrained/tests/test_adamax.py +41 -0
- optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
- optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
- optiml/opti/unconstrained/tests/test_functions.py +34 -0
- optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
- optiml/opti/unconstrained/tests/test_newton.py +20 -0
- optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
- optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
- optiml/opti/unconstrained/tests/test_verbose.py +25 -0
- optiml/opti/utils.py +353 -0
- optiml-1.7.dist-info/METADATA +203 -0
- optiml-1.7.dist-info/RECORD +76 -0
- optiml-1.7.dist-info/WHEEL +5 -0
- optiml-1.7.dist-info/licenses/LICENSE +21 -0
- optiml-1.7.dist-info/top_level.txt +1 -0
optiml/opti/_base.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
|
|
3
|
+
import autograd.numpy as np
|
|
4
|
+
from autograd import jacobian, hessian
|
|
5
|
+
from scipy.linalg import cho_solve, cho_factor
|
|
6
|
+
from scipy.sparse.linalg import minres
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Optimizer(ABC):
|
|
10
|
+
|
|
11
|
+
def __init__(self,
|
|
12
|
+
f,
|
|
13
|
+
x=None,
|
|
14
|
+
eps=1e-6,
|
|
15
|
+
tol=1e-8,
|
|
16
|
+
max_iter=1000,
|
|
17
|
+
callback=None,
|
|
18
|
+
callback_args=(),
|
|
19
|
+
random_state=None,
|
|
20
|
+
verbose=False):
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
:param f: the objective function.
|
|
24
|
+
:param x: ([n x 1] real column vector): 1D array of points at which the Hessian is to be computed.
|
|
25
|
+
:param eps: (real scalar, optional, default value 1e-6): the accuracy in the stopping
|
|
26
|
+
criterion: the algorithm is stopped when the norm of the gradient is less
|
|
27
|
+
than or equal to eps.
|
|
28
|
+
:param max_iter: (integer scalar, optional, default value 1000): the maximum number of iterations.
|
|
29
|
+
:param verbose: (boolean, optional, default value False): print details about each iteration
|
|
30
|
+
if True, nothing otherwise.
|
|
31
|
+
"""
|
|
32
|
+
if not isinstance(f, OptimizationFunction):
|
|
33
|
+
raise TypeError(f'{f} is not an allowed optimization function')
|
|
34
|
+
self.f = f
|
|
35
|
+
if x is None:
|
|
36
|
+
if hasattr(self.f, 'primal'): # is_lagrangian_dual()
|
|
37
|
+
if hasattr(self.f, 'rho'): # is_augmented_lagrangian_dual()
|
|
38
|
+
# dual_x is handled and initialized to 0 inside the `AugmentedLagrangianQuadratic`
|
|
39
|
+
# class, so initialize the primal variable, i.e., x, as a random uniform
|
|
40
|
+
if random_state is None:
|
|
41
|
+
x = np.random.uniform
|
|
42
|
+
else:
|
|
43
|
+
x = np.random.RandomState(random_state).uniform
|
|
44
|
+
else:
|
|
45
|
+
# initialize the primal variable, i.e., x, as a random uniform and
|
|
46
|
+
# and the dual variable, i.e., mu_lmbda, to 0
|
|
47
|
+
if random_state is None:
|
|
48
|
+
x = np.concatenate((np.random.uniform(size=f.primal.ndim), # x
|
|
49
|
+
np.zeros(f.AG.shape[0]))) # mu_lmbda
|
|
50
|
+
else:
|
|
51
|
+
x = np.concatenate((np.random.RandomState(random_state).uniform(size=f.primal.ndim), # x
|
|
52
|
+
np.zeros(f.AG.shape[0]))) # mu_lmbda
|
|
53
|
+
else:
|
|
54
|
+
if random_state is None:
|
|
55
|
+
x = np.random.uniform
|
|
56
|
+
else:
|
|
57
|
+
x = np.random.RandomState(random_state).uniform
|
|
58
|
+
if callable(x):
|
|
59
|
+
try:
|
|
60
|
+
self.x = x(size=f.ndim)
|
|
61
|
+
except TypeError:
|
|
62
|
+
self.x = x(shape=f.ndim)
|
|
63
|
+
else:
|
|
64
|
+
self.x = np.asarray(x, dtype=float)
|
|
65
|
+
self.f_x = np.nan
|
|
66
|
+
if self.is_lagrangian_dual():
|
|
67
|
+
self.past_x = self.x.copy()
|
|
68
|
+
self.primal_f_x = np.nan
|
|
69
|
+
self.dgap = np.nan
|
|
70
|
+
self.g_x = np.zeros(0)
|
|
71
|
+
self.eps = eps
|
|
72
|
+
self.tol = tol
|
|
73
|
+
if not max_iter > 0:
|
|
74
|
+
raise ValueError('max_iter must be > 0')
|
|
75
|
+
self.max_iter = max_iter
|
|
76
|
+
self.iter = 0
|
|
77
|
+
self.status = 'unknown'
|
|
78
|
+
if (self.f.ndim <= 3 or
|
|
79
|
+
hasattr(self.f, 'primal') and self.f.primal.ndim <= 3):
|
|
80
|
+
self.x0_history = []
|
|
81
|
+
self.x1_history = []
|
|
82
|
+
self.f_x_history = []
|
|
83
|
+
self._callback = callback
|
|
84
|
+
self.callback_args = callback_args
|
|
85
|
+
self.random_state = random_state
|
|
86
|
+
self.verbose = verbose
|
|
87
|
+
|
|
88
|
+
def is_lagrangian_dual(self):
|
|
89
|
+
return hasattr(self.f, 'primal')
|
|
90
|
+
|
|
91
|
+
def is_augmented_lagrangian_dual(self):
|
|
92
|
+
return self.is_lagrangian_dual() and hasattr(self.f, 'rho')
|
|
93
|
+
|
|
94
|
+
def callback(self, args=()):
|
|
95
|
+
|
|
96
|
+
if hasattr(self.f, 'primal'): # is_lagrangian_dual()
|
|
97
|
+
|
|
98
|
+
if hasattr(self.f, 'rho'): # is_augmented_lagrangian_dual()
|
|
99
|
+
self.primal_f_x = self.f.primal.function(self.x)
|
|
100
|
+
else:
|
|
101
|
+
self.primal_f_x = self.f.primal.function(self.x[:self.f.primal.ndim])
|
|
102
|
+
|
|
103
|
+
self.dgap = abs((self.primal_f_x - self.f_x) / max(abs(self.primal_f_x), 1))
|
|
104
|
+
|
|
105
|
+
if self.is_verbose():
|
|
106
|
+
print('\tpcost: {: 1.4e}'.format(self.primal_f_x), end='')
|
|
107
|
+
print('\tdgap: {: 1.4e}'.format(self.dgap), end='')
|
|
108
|
+
|
|
109
|
+
if self.f.primal.ndim == 2:
|
|
110
|
+
self.x0_history.append(self.x[0])
|
|
111
|
+
self.x1_history.append(self.x[1])
|
|
112
|
+
self.f_x_history.append(self.primal_f_x)
|
|
113
|
+
|
|
114
|
+
if callable(self._callback): # custom callback
|
|
115
|
+
self._callback(self, *args, *self.callback_args)
|
|
116
|
+
|
|
117
|
+
self.past_x = self.x.copy() # backup primal x before upgrade it outside the callback
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
|
|
121
|
+
if self.f.ndim <= 3:
|
|
122
|
+
self.x0_history.append(self.x[0])
|
|
123
|
+
self.x1_history.append(self.x[1])
|
|
124
|
+
self.f_x_history.append(self.f_x)
|
|
125
|
+
|
|
126
|
+
if callable(self._callback): # custom callback
|
|
127
|
+
self._callback(self, *args, *self.callback_args)
|
|
128
|
+
|
|
129
|
+
def check_lagrangian_dual_optimality(self):
|
|
130
|
+
|
|
131
|
+
if hasattr(self.f, 'primal'): # is_lagrangian_dual()
|
|
132
|
+
|
|
133
|
+
constraints = self.f.constraints(self.x)
|
|
134
|
+
|
|
135
|
+
if hasattr(self.f, 'rho'): # is_augmented_lagrangian_dual()
|
|
136
|
+
|
|
137
|
+
self.f.past_dual_x = self.f.dual_x.copy() # backup dual_x before upgrade it
|
|
138
|
+
|
|
139
|
+
# upgrade dual_x and clip lmbda
|
|
140
|
+
self.f.dual_x += self.f.rho * constraints
|
|
141
|
+
self.f.dual_x[self.f.n_eq:] = np.clip(self.f.dual_x[self.f.n_eq:], a_min=0, a_max=None)
|
|
142
|
+
|
|
143
|
+
# check optimality conditions
|
|
144
|
+
if ((np.linalg.norm(self.f.dual_x - self.f.past_dual_x) +
|
|
145
|
+
np.linalg.norm(self.x - self.past_x) <= self.tol) or
|
|
146
|
+
np.linalg.norm(constraints) <= self.tol):
|
|
147
|
+
self.status = 'optimal'
|
|
148
|
+
raise StopIteration
|
|
149
|
+
|
|
150
|
+
else:
|
|
151
|
+
|
|
152
|
+
# clip lmbda and backup mu_lmbda
|
|
153
|
+
self.x[self.f.primal.ndim + self.f.n_eq:] = np.clip(self.x[self.f.primal.ndim + self.f.n_eq:],
|
|
154
|
+
a_min=0, a_max=None)
|
|
155
|
+
self.f.dual_x = self.x[self.f.primal.ndim:].copy()
|
|
156
|
+
|
|
157
|
+
# check optimality conditions
|
|
158
|
+
if ((np.linalg.norm(self.x - self.past_x) <= self.tol) or # x_mu_lmbda - past_x_mu_lmbda
|
|
159
|
+
np.linalg.norm(constraints) <= self.tol):
|
|
160
|
+
self.status = 'optimal'
|
|
161
|
+
raise StopIteration
|
|
162
|
+
|
|
163
|
+
def check_lagrangian_dual_conditions(self):
|
|
164
|
+
# check if the Lagrange multipliers that controls the inequality constraints are >= 0
|
|
165
|
+
if hasattr(self.f, 'primal'): # is_lagrangian_dual()
|
|
166
|
+
if hasattr(self.f, 'rho'): # is_augmented_lagrangian_dual()
|
|
167
|
+
assert all(self.f.dual_x[self.f.n_eq:] >= 0)
|
|
168
|
+
else:
|
|
169
|
+
assert all(self.x[self.f.primal.ndim + self.f.n_eq:] >= 0)
|
|
170
|
+
|
|
171
|
+
def is_verbose(self):
|
|
172
|
+
return self.verbose and not self.iter % self.verbose
|
|
173
|
+
|
|
174
|
+
def minimize(self):
|
|
175
|
+
raise NotImplementedError
|
|
176
|
+
|
|
177
|
+
def _print_header(self):
|
|
178
|
+
raise NotImplementedError
|
|
179
|
+
|
|
180
|
+
def _print_info(self):
|
|
181
|
+
raise NotImplementedError
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class OptimizationFunction(ABC):
|
|
185
|
+
|
|
186
|
+
def __init__(self, ndim=2):
|
|
187
|
+
self.auto_jac = jacobian(self.function)
|
|
188
|
+
self.auto_hess = hessian(self.function)
|
|
189
|
+
self.ndim = ndim
|
|
190
|
+
|
|
191
|
+
def x_star(self):
|
|
192
|
+
return np.full(fill_value=np.nan, shape=self.ndim)
|
|
193
|
+
|
|
194
|
+
def f_star(self):
|
|
195
|
+
return np.inf
|
|
196
|
+
|
|
197
|
+
def args(self):
|
|
198
|
+
return ()
|
|
199
|
+
|
|
200
|
+
def function(self, x):
|
|
201
|
+
raise NotImplementedError
|
|
202
|
+
|
|
203
|
+
def jacobian(self, x):
|
|
204
|
+
"""
|
|
205
|
+
The Jacobian (i.e., the gradient) of the function.
|
|
206
|
+
:param x: 1D array of points at which the Jacobian is to be computed.
|
|
207
|
+
:return: the Jacobian of the function at x.
|
|
208
|
+
"""
|
|
209
|
+
return self.auto_jac(x)
|
|
210
|
+
|
|
211
|
+
def function_jacobian(self, *args, **kwargs):
|
|
212
|
+
return self.function(*args, **kwargs), self.jacobian(*args, **kwargs)
|
|
213
|
+
|
|
214
|
+
def hessian(self, x):
|
|
215
|
+
"""
|
|
216
|
+
The Hessian matrix of the function.
|
|
217
|
+
:param x: 1D array of points at which the Hessian is to be computed.
|
|
218
|
+
:return: the Hessian matrix of the function at x.
|
|
219
|
+
"""
|
|
220
|
+
return self.auto_hess(x)
|
|
221
|
+
|
|
222
|
+
def __call__(self, *args, **kwargs):
|
|
223
|
+
return self.function(*args, **kwargs)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class Quadratic(OptimizationFunction):
|
|
227
|
+
|
|
228
|
+
def __init__(self, Q, q):
|
|
229
|
+
r"""
|
|
230
|
+
Construct a quadratic function from its linear and quadratic part defined as
|
|
231
|
+
|
|
232
|
+
.. math::
|
|
233
|
+
|
|
234
|
+
\tfrac{1}{2} x^\top Q x + q^\top x
|
|
235
|
+
|
|
236
|
+
:param Q: ([n x n] real symmetric matrix, not necessarily positive semidefinite):
|
|
237
|
+
the Hessian (i.e., the quadratic part) of f. If it is not
|
|
238
|
+
positive semidefinite, f(x) will be unbounded below.
|
|
239
|
+
:param q: ([n x 1] real column vector): the linear part of f.
|
|
240
|
+
"""
|
|
241
|
+
Q = np.array(Q)
|
|
242
|
+
q = np.array(q)
|
|
243
|
+
|
|
244
|
+
n = len(Q)
|
|
245
|
+
super(Quadratic, self).__init__(n)
|
|
246
|
+
|
|
247
|
+
if n <= 1:
|
|
248
|
+
raise ValueError('Q is too small')
|
|
249
|
+
if n != Q.shape[0]:
|
|
250
|
+
raise ValueError('Q is not square')
|
|
251
|
+
self.Q = Q
|
|
252
|
+
|
|
253
|
+
if q.size != n:
|
|
254
|
+
raise ValueError('q size does not match with Q')
|
|
255
|
+
self.q = q
|
|
256
|
+
|
|
257
|
+
def x_star(self):
|
|
258
|
+
if not hasattr(self, 'x_opt'):
|
|
259
|
+
try:
|
|
260
|
+
# use the Cholesky factorization to solve the linear system if Q is
|
|
261
|
+
# symmetric and positive definite, i.e., the function is strictly convex
|
|
262
|
+
self.x_opt = cho_solve(cho_factor(self.Q), -self.q)
|
|
263
|
+
except np.linalg.LinAlgError:
|
|
264
|
+
# since Q is not strictly psd, i.e., the function is linear along the
|
|
265
|
+
# eigenvectors correspondent to the null eigenvalues, the system has infinite
|
|
266
|
+
# solutions, so we will choose the one that minimizes the residue
|
|
267
|
+
self.x_opt = minres(self.Q, -self.q)[0]
|
|
268
|
+
return self.x_opt
|
|
269
|
+
|
|
270
|
+
def f_star(self):
|
|
271
|
+
return self.function(self.x_star())
|
|
272
|
+
|
|
273
|
+
def function(self, x):
|
|
274
|
+
r"""
|
|
275
|
+
A general quadratic function :math:`f(x) = \tfrac{1}{2} x^\top Q x + q^\top x`.
|
|
276
|
+
|
|
277
|
+
:param x: ([n x 1] real column vector): 1D array of points at which the Hessian is to be computed.
|
|
278
|
+
:return: the value :math:`\tfrac{1}{2} x^\top Q x + q^\top x` of the general quadratic function at x.
|
|
279
|
+
"""
|
|
280
|
+
return 0.5 * x @ self.Q @ x + self.q @ x
|
|
281
|
+
|
|
282
|
+
def jacobian(self, x):
|
|
283
|
+
r"""
|
|
284
|
+
The Jacobian (i.e., the gradient) of a general quadratic function :math:`J f(x) = Q x + q`.
|
|
285
|
+
:param x: ([n x 1] real column vector): 1D array of points at which the Hessian is to be computed.
|
|
286
|
+
:return: the Jacobian of a general quadratic function.
|
|
287
|
+
"""
|
|
288
|
+
return self.Q @ x + self.q
|
|
289
|
+
|
|
290
|
+
def hessian(self, x):
|
|
291
|
+
r"""
|
|
292
|
+
The Hessian matrix of a general quadratic function :math:`H f(x) = Q`.
|
|
293
|
+
:param x: 1D array of points at which the Hessian is to be computed.
|
|
294
|
+
:return: the Hessian matrix (i.e., the the quadratic part) of a general quadratic function at x.
|
|
295
|
+
"""
|
|
296
|
+
return self.Q
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
# 2x2 quadratic function with nicely conditioned Hessian
|
|
300
|
+
quad1 = Quadratic(Q=[[6, -2], [-2, 6]], q=[10, 5])
|
|
301
|
+
# 2x2 quadratic function with less nicely conditioned Hessian
|
|
302
|
+
quad2 = Quadratic(Q=[[5, -3], [-3, 5]], q=[10, 5])
|
|
303
|
+
# 2x2 quadratic function with Hessian having one zero eigenvalue (singular matrix)
|
|
304
|
+
quad3 = Quadratic(Q=[[4, -4], [-4, 4]], q=[10, 5])
|
|
305
|
+
# 2x2 quadratic function with indefinite Hessian (one positive and one negative eigenvalue)
|
|
306
|
+
quad4 = Quadratic(Q=[[3, -5], [-5, 3]], q=[10, 5])
|
|
307
|
+
# 2x2 quadratic function with "very elongated" Hessian
|
|
308
|
+
# (a very small positive minimum eigenvalue, the other much larger)
|
|
309
|
+
quad5 = Quadratic(Q=[[101, -99], [-99, 101]], q=[10, 5])
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
__all__ = ['BoxConstrainedQuadraticOptimizer', 'LagrangianQuadratic', 'AugmentedLagrangianQuadratic',
|
|
2
|
+
'ProjectedGradient', 'ActiveSet', 'FrankWolfe', 'InteriorPoint']
|
|
3
|
+
|
|
4
|
+
from ._base import BoxConstrainedQuadraticOptimizer, LagrangianQuadratic, AugmentedLagrangianQuadratic
|
|
5
|
+
|
|
6
|
+
from .projected_gradient import ProjectedGradient
|
|
7
|
+
from .active_set import ActiveSet
|
|
8
|
+
from .frank_wolfe import FrankWolfe
|
|
9
|
+
from .interior_point import InteriorPoint
|