optiml 1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiml/__init__.py +0 -0
- optiml/ml/__init__.py +0 -0
- optiml/ml/neural_network/__init__.py +3 -0
- optiml/ml/neural_network/_base.py +475 -0
- optiml/ml/neural_network/activations.py +79 -0
- optiml/ml/neural_network/initializers.py +66 -0
- optiml/ml/neural_network/layers.py +183 -0
- optiml/ml/neural_network/losses.py +178 -0
- optiml/ml/neural_network/regularizers.py +87 -0
- optiml/ml/svm/__init__.py +3 -0
- optiml/ml/svm/_base.py +1442 -0
- optiml/ml/svm/kernels.py +208 -0
- optiml/ml/svm/losses.py +284 -0
- optiml/ml/svm/smo.py +797 -0
- optiml/ml/tests/__init__.py +0 -0
- optiml/ml/tests/_datasets.py +49 -0
- optiml/ml/tests/_utils.py +28 -0
- optiml/ml/tests/test_initializers.py +33 -0
- optiml/ml/tests/test_neural_network.py +86 -0
- optiml/ml/tests/test_svc.py +245 -0
- optiml/ml/tests/test_svr.py +256 -0
- optiml/ml/utils.py +252 -0
- optiml/opti/__init__.py +4 -0
- optiml/opti/_base.py +309 -0
- optiml/opti/constrained/__init__.py +9 -0
- optiml/opti/constrained/_base.py +404 -0
- optiml/opti/constrained/active_set.py +228 -0
- optiml/opti/constrained/frank_wolfe.py +158 -0
- optiml/opti/constrained/interior_point.py +282 -0
- optiml/opti/constrained/projected_gradient.py +138 -0
- optiml/opti/constrained/tests/__init__.py +0 -0
- optiml/opti/constrained/tests/test_active_set.py +16 -0
- optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
- optiml/opti/constrained/tests/test_interior_point.py +16 -0
- optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
- optiml/opti/constrained/tests/test_lower_bound.py +29 -0
- optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
- optiml/opti/unconstrained/__init__.py +6 -0
- optiml/opti/unconstrained/_base.py +63 -0
- optiml/opti/unconstrained/line_search/__init__.py +10 -0
- optiml/opti/unconstrained/line_search/_base.py +106 -0
- optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
- optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
- optiml/opti/unconstrained/line_search/line_search.py +248 -0
- optiml/opti/unconstrained/line_search/newton.py +198 -0
- optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
- optiml/opti/unconstrained/proximal_bundle.py +219 -0
- optiml/opti/unconstrained/stochastic/__init__.py +12 -0
- optiml/opti/unconstrained/stochastic/_base.py +246 -0
- optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
- optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
- optiml/opti/unconstrained/stochastic/adam.py +179 -0
- optiml/opti/unconstrained/stochastic/adamax.py +178 -0
- optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
- optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
- optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
- optiml/opti/unconstrained/stochastic/schedules.py +89 -0
- optiml/opti/unconstrained/tests/__init__.py +0 -0
- optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
- optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
- optiml/opti/unconstrained/tests/test_adam.py +42 -0
- optiml/opti/unconstrained/tests/test_adamax.py +41 -0
- optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
- optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
- optiml/opti/unconstrained/tests/test_functions.py +34 -0
- optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
- optiml/opti/unconstrained/tests/test_newton.py +20 -0
- optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
- optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
- optiml/opti/unconstrained/tests/test_verbose.py +25 -0
- optiml/opti/utils.py +353 -0
- optiml-1.7.dist-info/METADATA +203 -0
- optiml-1.7.dist-info/RECORD +76 -0
- optiml-1.7.dist-info/WHEEL +5 -0
- optiml-1.7.dist-info/licenses/LICENSE +21 -0
- optiml-1.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
|
|
3
|
+
import autograd.numpy as np
|
|
4
|
+
from autograd import hessian, jacobian
|
|
5
|
+
from qpsolvers import solve_qp
|
|
6
|
+
|
|
7
|
+
from optiml.opti import Optimizer, Quadratic
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BoxConstrainedQuadraticOptimizer(Optimizer, ABC):
|
|
11
|
+
r"""
|
|
12
|
+
Abstract base class for the optimizers that solve the convex Box-Constrained
|
|
13
|
+
Quadratic program
|
|
14
|
+
|
|
15
|
+
.. math::
|
|
16
|
+
|
|
17
|
+
(P) \quad \min \left\{ \tfrac{1}{2} x^\top Q x + q^\top x : 0 \le x \le ub \right\}
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self,
|
|
21
|
+
quad,
|
|
22
|
+
ub,
|
|
23
|
+
lb=None,
|
|
24
|
+
x=None,
|
|
25
|
+
eps=1e-6,
|
|
26
|
+
tol=1e-8,
|
|
27
|
+
max_iter=1000,
|
|
28
|
+
callback=None,
|
|
29
|
+
callback_args=(),
|
|
30
|
+
verbose=False):
|
|
31
|
+
r"""
|
|
32
|
+
|
|
33
|
+
:param quad: the quadratic function :math:`\tfrac{1}{2} x^\top Q x + q^\top x` to be minimized.
|
|
34
|
+
:param ub: ([n x 1] real column vector): the upper bound of the box, i.e., the
|
|
35
|
+
variables are constrained to lie in :math:`lb \le x \le ub`.
|
|
36
|
+
:param lb: ([n x 1] real column vector, optional): the lower bound of the box; if not
|
|
37
|
+
provided it defaults to the all-zeros vector, i.e., :math:`0 \le x \le ub`.
|
|
38
|
+
:param x: ([n x 1] real column vector, optional): the point where to start the
|
|
39
|
+
algorithm from; if not provided, it starts from the middle of the box.
|
|
40
|
+
:param eps: (real scalar, optional, default value 1e-6): the accuracy in the stopping
|
|
41
|
+
criterion: the algorithm is stopped when the norm of the gradient is less
|
|
42
|
+
than or equal to eps.
|
|
43
|
+
:param tol: (real scalar, optional, default value 1e-8): the tolerance used to check the
|
|
44
|
+
optimality conditions when f is a Lagrangian dual relaxation.
|
|
45
|
+
:param max_iter: (integer scalar, optional, default value 1000): the maximum number of iterations.
|
|
46
|
+
:param callback: (callable, optional, default value None): a function called at each iteration
|
|
47
|
+
with the optimizer instance as first argument.
|
|
48
|
+
:param callback_args: (tuple, optional, default value ()): additional arguments passed to callback.
|
|
49
|
+
:param verbose: (boolean, optional, default value False): print details about each iteration
|
|
50
|
+
if True, nothing otherwise.
|
|
51
|
+
:return x: ([n x 1] real column vector): the best solution found so far.
|
|
52
|
+
:return status: (string): a string describing the status of the algorithm at termination
|
|
53
|
+
('optimal', 'stopped', 'unbounded' or 'error').
|
|
54
|
+
"""
|
|
55
|
+
if not isinstance(quad, Quadratic):
|
|
56
|
+
raise TypeError(f'{quad} is not an allowed quadratic function')
|
|
57
|
+
ub = np.asarray(ub, dtype=float)
|
|
58
|
+
lb = np.zeros_like(ub) if lb is None else np.asarray(lb, dtype=float)
|
|
59
|
+
super(BoxConstrainedQuadraticOptimizer, self).__init__(f=quad,
|
|
60
|
+
# starts from the middle of the box
|
|
61
|
+
x=x if x is not None else (lb + ub) / 2,
|
|
62
|
+
eps=eps,
|
|
63
|
+
tol=tol,
|
|
64
|
+
max_iter=max_iter,
|
|
65
|
+
callback=callback,
|
|
66
|
+
callback_args=callback_args,
|
|
67
|
+
verbose=verbose)
|
|
68
|
+
self.lb = lb
|
|
69
|
+
self.ub = ub
|
|
70
|
+
|
|
71
|
+
def f_star(self):
|
|
72
|
+
return self.f.function(self.x_star())
|
|
73
|
+
|
|
74
|
+
def x_star(self):
|
|
75
|
+
if not hasattr(self, 'x_opt'):
|
|
76
|
+
self.x_opt = solve_qp(P=self.f.Q,
|
|
77
|
+
q=self.f.q,
|
|
78
|
+
lb=self.lb,
|
|
79
|
+
ub=self.ub,
|
|
80
|
+
solver='quadprog')
|
|
81
|
+
return self.x_opt
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class LagrangianQuadratic(Quadratic):
|
|
85
|
+
r"""
|
|
86
|
+
Construct the lagrangian relaxation of a constrained quadratic function defined as
|
|
87
|
+
|
|
88
|
+
.. math::
|
|
89
|
+
|
|
90
|
+
\tfrac{1}{2} x^\top Q x + q^\top x : A x = b, \; G x \le h, \; lb \le x \le ub
|
|
91
|
+
|
|
92
|
+
i.e.,
|
|
93
|
+
|
|
94
|
+
.. math::
|
|
95
|
+
|
|
96
|
+
\tfrac{1}{2} x^\top Q x + q^\top x : A x = b, \; \hat{G} x \le \hat{h}
|
|
97
|
+
|
|
98
|
+
where :math:`\hat{G}^\top = [\, G \;\; -I \;\; I \,]` and :math:`\hat{h} = [\, h \;\; -lb \;\; ub \,]`.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(self, primal, A=None, b=None, G=None, h=None, lb=None, ub=None):
|
|
102
|
+
if not isinstance(primal, Quadratic):
|
|
103
|
+
raise TypeError(f'{primal} is not an allowed quadratic function')
|
|
104
|
+
super(LagrangianQuadratic, self).__init__(primal.Q, primal.q)
|
|
105
|
+
self.primal = primal
|
|
106
|
+
self.A = np.atleast_2d(A).astype(float) if A is not None else None
|
|
107
|
+
self.b = b
|
|
108
|
+
self.G = np.atleast_2d(G).astype(float) if G is not None else None
|
|
109
|
+
self.h = h
|
|
110
|
+
self.lb = np.asarray(lb, dtype=float) if lb is not None else None
|
|
111
|
+
if self.lb is not None:
|
|
112
|
+
if self.G is None:
|
|
113
|
+
self.G = -np.eye(self.ndim)
|
|
114
|
+
self.h = -self.lb
|
|
115
|
+
else:
|
|
116
|
+
self.G = np.concatenate((self.G, -np.eye(self.ndim)), axis=0)
|
|
117
|
+
self.h = np.concatenate((self.h, -self.lb))
|
|
118
|
+
self.ub = np.asarray(ub, dtype=float) if ub is not None else None
|
|
119
|
+
if self.ub is not None:
|
|
120
|
+
if self.G is None:
|
|
121
|
+
self.G = np.eye(self.ndim)
|
|
122
|
+
self.h = self.ub
|
|
123
|
+
else:
|
|
124
|
+
self.G = np.concatenate((self.G, np.eye(self.ndim)), axis=0)
|
|
125
|
+
self.h = np.concatenate((self.h, self.ub))
|
|
126
|
+
if G is None and h is not None:
|
|
127
|
+
raise ValueError('incomplete inequality constraint (missing G)')
|
|
128
|
+
if G is not None and h is None:
|
|
129
|
+
raise ValueError('incomplete inequality constraint (missing h)')
|
|
130
|
+
if A is None and b is not None:
|
|
131
|
+
raise ValueError('incomplete equality constraint (missing A)')
|
|
132
|
+
if A is not None and b is None:
|
|
133
|
+
raise ValueError('incomplete equality constraint (missing b)')
|
|
134
|
+
# concatenate A with G and b with h for convenience and save the
|
|
135
|
+
# first idx of the Lagrange multipliers constrained to be >= 0
|
|
136
|
+
self.n_eq = self.A.shape[0] if self.A is not None else 0
|
|
137
|
+
if self.A is not None and self.G is not None:
|
|
138
|
+
self.AG = np.concatenate((self.A, self.G))
|
|
139
|
+
elif self.A is not None:
|
|
140
|
+
self.AG = self.A
|
|
141
|
+
self.G = np.zeros((self.ndim, self.ndim)) # G is None
|
|
142
|
+
elif self.G is not None:
|
|
143
|
+
self.AG = self.G
|
|
144
|
+
self.A = np.zeros((self.ndim, self.ndim)) # A is None
|
|
145
|
+
else:
|
|
146
|
+
self.A = np.zeros((self.ndim, self.ndim))
|
|
147
|
+
self.G = np.zeros((self.ndim, self.ndim))
|
|
148
|
+
self.AG = np.concatenate((self.A, self.G)) # A and G are None
|
|
149
|
+
if self.b is not None and self.h is not None:
|
|
150
|
+
self.bh = np.concatenate((self.b, self.h))
|
|
151
|
+
elif self.b is not None:
|
|
152
|
+
self.h = np.zeros(self.ndim) # h is None
|
|
153
|
+
self.bh = self.b
|
|
154
|
+
elif self.h is not None:
|
|
155
|
+
self.b = np.zeros(self.ndim) # b is None
|
|
156
|
+
self.bh = self.h
|
|
157
|
+
else:
|
|
158
|
+
self.b = np.zeros(self.ndim)
|
|
159
|
+
self.h = np.zeros(self.ndim)
|
|
160
|
+
self.bh = np.concatenate((self.b, self.h)) # b and h are None
|
|
161
|
+
self.ndim += self.AG.shape[0]
|
|
162
|
+
# backup Lagrange multipliers
|
|
163
|
+
self.dual_x = None # mu_lmbda
|
|
164
|
+
|
|
165
|
+
def f_star(self):
|
|
166
|
+
return self.primal.function(self.x_star())
|
|
167
|
+
|
|
168
|
+
def x_star(self):
|
|
169
|
+
if not hasattr(self, 'x_opt'):
|
|
170
|
+
self.x_opt = solve_qp(P=self.Q,
|
|
171
|
+
q=self.q,
|
|
172
|
+
A=self.A if not np.all((self.A == 0)) else None,
|
|
173
|
+
b=self.b if not np.all((self.A == 0)) else None, # check for A since b can be zero
|
|
174
|
+
G=self.G if not np.all((self.G == 0)) else None,
|
|
175
|
+
h=self.h if not np.all((self.G == 0)) else None, # check for G since h can be zero
|
|
176
|
+
solver='cvxopt')
|
|
177
|
+
return self.x_opt
|
|
178
|
+
|
|
179
|
+
def constraints(self, x_mu_lmbda):
|
|
180
|
+
return self.AG @ x_mu_lmbda[:self.primal.ndim] - self.bh
|
|
181
|
+
|
|
182
|
+
def function(self, x_mu_lmbda):
|
|
183
|
+
r"""
|
|
184
|
+
Compute the value of the augmented lagrangian relaxation defined as
|
|
185
|
+
|
|
186
|
+
.. math::
|
|
187
|
+
|
|
188
|
+
L(x, \mu, \lambda) = \tfrac{1}{2} x^\top Q x + q^\top x + \mu^\top (A x - b) + \lambda^\top (G x - h)
|
|
189
|
+
|
|
190
|
+
:param x_mu_lmbda: the primal-dual variable wrt evaluate the function
|
|
191
|
+
:return: the function value wrt primal-dual variable
|
|
192
|
+
"""
|
|
193
|
+
x, mu_lmbda = np.split(x_mu_lmbda, [self.primal.ndim])
|
|
194
|
+
return self.primal.function(x) + mu_lmbda @ (self.AG @ x - self.bh)
|
|
195
|
+
|
|
196
|
+
def jacobian(self, x_mu_lmbda):
|
|
197
|
+
r"""
|
|
198
|
+
Compute the jacobian of the lagrangian relaxation defined as
|
|
199
|
+
|
|
200
|
+
.. math::
|
|
201
|
+
|
|
202
|
+
J L(x, \mu, \lambda) = Q x + q + \mu^\top A + \lambda^\top G
|
|
203
|
+
|
|
204
|
+
:param x_mu_lmbda: the primal-dual variable wrt evaluate the jacobian
|
|
205
|
+
:return: the jacobian wrt primal-dual variable
|
|
206
|
+
"""
|
|
207
|
+
# jac = self.auto_jac(x_mu_lmbda) # slower
|
|
208
|
+
x, mu_lmbda = np.split(x_mu_lmbda, [self.primal.ndim])
|
|
209
|
+
jac = np.concatenate((self.primal.jacobian(x) + mu_lmbda @ self.AG, # gradient wrt x
|
|
210
|
+
self.A @ x - self.b if not np.all((self.A == 0)) else [], # gradient wrt mu
|
|
211
|
+
self.G @ x - self.h if not np.all((self.G == 0)) else [])) # gradient wrt lmbda
|
|
212
|
+
# gradient ascent for the dual since we need to maximize wrt mu_lmbda, so we change the sign
|
|
213
|
+
jac[self.primal.ndim:] = -jac[self.primal.ndim:]
|
|
214
|
+
return jac
|
|
215
|
+
|
|
216
|
+
def hessian(self, x):
|
|
217
|
+
return self.auto_hess(x)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class AugmentedLagrangianQuadratic(Quadratic):
|
|
221
|
+
r"""
|
|
222
|
+
Construct the augmented lagrangian relaxation of a constrained quadratic function defined as
|
|
223
|
+
|
|
224
|
+
.. math::
|
|
225
|
+
|
|
226
|
+
\tfrac{1}{2} x^\top Q x + q^\top x : A x = b, \; G x \le h, \; lb \le x \le ub
|
|
227
|
+
|
|
228
|
+
i.e.,
|
|
229
|
+
|
|
230
|
+
.. math::
|
|
231
|
+
|
|
232
|
+
\tfrac{1}{2} x^\top Q x + q^\top x : A x = b, \; \hat{G} x \le \hat{h}
|
|
233
|
+
|
|
234
|
+
where :math:`\hat{G}^\top = [\, G \;\; -I \;\; I \,]` and :math:`\hat{h} = [\, h \;\; -lb \;\; ub \,]`.
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
def __init__(self, primal, A=None, b=None, G=None, h=None, lb=None, ub=None, rho=1):
|
|
238
|
+
if not isinstance(primal, Quadratic):
|
|
239
|
+
raise TypeError(f'{primal} is not an allowed quadratic function')
|
|
240
|
+
super(AugmentedLagrangianQuadratic, self).__init__(primal.Q, primal.q)
|
|
241
|
+
self.primal = primal
|
|
242
|
+
self.A = np.atleast_2d(A).astype(float) if A is not None else None
|
|
243
|
+
self.b = b
|
|
244
|
+
self.G = np.atleast_2d(G).astype(float) if G is not None else None
|
|
245
|
+
self.h = h
|
|
246
|
+
self.lb = np.asarray(lb, dtype=float) if lb is not None else None
|
|
247
|
+
if self.lb is not None:
|
|
248
|
+
if self.G is None:
|
|
249
|
+
self.G = -np.eye(self.ndim)
|
|
250
|
+
self.h = -self.lb
|
|
251
|
+
else:
|
|
252
|
+
self.G = np.concatenate((self.G, -np.eye(self.ndim)), axis=0)
|
|
253
|
+
self.h = np.concatenate((self.h, -self.lb))
|
|
254
|
+
self.ub = np.asarray(ub, dtype=float) if ub is not None else None
|
|
255
|
+
if self.ub is not None:
|
|
256
|
+
if self.G is None:
|
|
257
|
+
self.G = np.eye(self.ndim)
|
|
258
|
+
self.h = self.ub
|
|
259
|
+
else:
|
|
260
|
+
self.G = np.concatenate((self.G, np.eye(self.ndim)), axis=0)
|
|
261
|
+
self.h = np.concatenate((self.h, self.ub))
|
|
262
|
+
if G is None and h is not None:
|
|
263
|
+
raise ValueError('incomplete inequality constraint (missing G)')
|
|
264
|
+
if G is not None and h is None:
|
|
265
|
+
raise ValueError('incomplete inequality constraint (missing h)')
|
|
266
|
+
if A is None and b is not None:
|
|
267
|
+
raise ValueError('incomplete equality constraint (missing A)')
|
|
268
|
+
if A is not None and b is None:
|
|
269
|
+
raise ValueError('incomplete equality constraint (missing b)')
|
|
270
|
+
if not rho > 0:
|
|
271
|
+
raise ValueError('rho must be must > 0')
|
|
272
|
+
self.rho = rho
|
|
273
|
+
# concatenate A with G and b with h for convenience and save the
|
|
274
|
+
# first idx of the Lagrange multipliers constrained to be >= 0
|
|
275
|
+
self.n_eq = self.A.shape[0] if self.A is not None else 0
|
|
276
|
+
if self.A is not None and self.G is not None:
|
|
277
|
+
self.AG = np.concatenate((self.A, self.G))
|
|
278
|
+
elif self.A is not None:
|
|
279
|
+
self.AG = self.A
|
|
280
|
+
self.G = np.zeros((self.ndim, self.ndim)) # G is None
|
|
281
|
+
elif self.G is not None:
|
|
282
|
+
self.AG = self.G
|
|
283
|
+
self.A = np.zeros((self.ndim, self.ndim)) # A is None
|
|
284
|
+
else:
|
|
285
|
+
self.A = np.zeros((self.ndim, self.ndim))
|
|
286
|
+
self.G = np.zeros((self.ndim, self.ndim))
|
|
287
|
+
self.AG = np.concatenate((self.A, self.G)) # A and G are None
|
|
288
|
+
if self.b is not None and self.h is not None:
|
|
289
|
+
self.bh = np.concatenate((self.b, self.h))
|
|
290
|
+
elif self.b is not None:
|
|
291
|
+
self.h = np.zeros(self.ndim) # h is None
|
|
292
|
+
self.bh = self.b
|
|
293
|
+
elif self.h is not None:
|
|
294
|
+
self.b = np.zeros(self.ndim) # b is None
|
|
295
|
+
self.bh = self.h
|
|
296
|
+
else:
|
|
297
|
+
self.b = np.zeros(self.ndim)
|
|
298
|
+
self.h = np.zeros(self.ndim)
|
|
299
|
+
self.bh = np.concatenate((self.b, self.h)) # b and h are None
|
|
300
|
+
# initialize Lagrange multipliers to 0
|
|
301
|
+
self.dual_x = np.zeros(self.AG.shape[0]) # mu_lmbda
|
|
302
|
+
self.past_dual_x = self.dual_x.copy()
|
|
303
|
+
# overwrite autograd utils
|
|
304
|
+
self.auto_jac = jacobian(self._autograd_function)
|
|
305
|
+
self.auto_hess = hessian(self._autograd_function)
|
|
306
|
+
# backup {x: constraints} to speedup by reducing
|
|
307
|
+
# the number of matrix-vector products
|
|
308
|
+
self.last_x = None
|
|
309
|
+
self.last_constraints = None
|
|
310
|
+
|
|
311
|
+
def f_star(self):
|
|
312
|
+
return self.primal.function(self.x_star())
|
|
313
|
+
|
|
314
|
+
def x_star(self):
|
|
315
|
+
if not hasattr(self, 'x_opt'):
|
|
316
|
+
self.x_opt = solve_qp(P=self.Q,
|
|
317
|
+
q=self.q,
|
|
318
|
+
A=self.A if not np.all((self.A == 0)) else None,
|
|
319
|
+
b=self.b if not np.all((self.A == 0)) else None, # check for A since b can be zero
|
|
320
|
+
G=self.G if not np.all((self.G == 0)) else None,
|
|
321
|
+
h=self.h if not np.all((self.G == 0)) else None, # check for G since h can be zero
|
|
322
|
+
solver='cvxopt')
|
|
323
|
+
return self.x_opt
|
|
324
|
+
|
|
325
|
+
def constraints(self, x):
|
|
326
|
+
if np.array_equal(self.last_x, x):
|
|
327
|
+
constraints = self.last_constraints.copy() # speedup: just restore
|
|
328
|
+
else:
|
|
329
|
+
constraints = self.AG @ x - self.bh
|
|
330
|
+
# backup {x: constraints}
|
|
331
|
+
self.last_x = x.copy()
|
|
332
|
+
self.last_constraints = constraints.copy()
|
|
333
|
+
return constraints
|
|
334
|
+
|
|
335
|
+
def function(self, x):
|
|
336
|
+
r"""
|
|
337
|
+
Compute the value of the augmented lagrangian relaxation defined as
|
|
338
|
+
|
|
339
|
+
.. math::
|
|
340
|
+
|
|
341
|
+
L(x, \mu, \lambda) = \tfrac{1}{2} x^\top Q x + q^\top x + \mu^\top (A x - b) + \lambda^\top (G x - h) + \tfrac{\rho}{2} \| (A x - b) + (G x - h) \|^2
|
|
342
|
+
|
|
343
|
+
:param x: the primal variable wrt evaluate the function
|
|
344
|
+
:return: the function value wrt primal-dual variable
|
|
345
|
+
"""
|
|
346
|
+
constraints = self.constraints(x)
|
|
347
|
+
clipped_constraints = constraints.copy()
|
|
348
|
+
clipped_constraints[self.n_eq:] = np.clip(constraints[self.n_eq:], a_min=0, a_max=None)
|
|
349
|
+
return (self.primal.function(x) + self.dual_x @ constraints +
|
|
350
|
+
0.5 * self.rho * np.linalg.norm(clipped_constraints) ** 2)
|
|
351
|
+
|
|
352
|
+
def _autograd_function(self, x):
|
|
353
|
+
r"""
|
|
354
|
+
Compute the value of the augmented lagrangian relaxation defined as
|
|
355
|
+
|
|
356
|
+
.. math::
|
|
357
|
+
|
|
358
|
+
L(x, \mu, \lambda) = \tfrac{1}{2} x^\top Q x + q^\top x + \mu^\top (A x - b) + \lambda^\top (G x - h) + \tfrac{\rho}{2} \| (A x - b) + (G x - h) \|^2
|
|
359
|
+
|
|
360
|
+
Returns the same value of `function(self, x)` but it is written avoiding vector assignments
|
|
361
|
+
to make it understandable by autograd, so it perform more matrix-vector products and for this
|
|
362
|
+
reason it is more computationally expensive.
|
|
363
|
+
|
|
364
|
+
:param x: the primal variable wrt evaluate the function
|
|
365
|
+
:return: the function value wrt primal-dual variable
|
|
366
|
+
"""
|
|
367
|
+
return (self.primal.function(x) + self.dual_x @ (self.AG @ x - self.bh) +
|
|
368
|
+
0.5 * self.rho * np.sum(np.square(self.A @ x - self.b)) +
|
|
369
|
+
0.5 * self.rho * np.sum(np.square(np.clip(self.G @ x - self.h, a_min=0, a_max=None))))
|
|
370
|
+
|
|
371
|
+
def jacobian(self, x):
|
|
372
|
+
r"""
|
|
373
|
+
Compute the jacobian of the augmented lagrangian relaxation defined as
|
|
374
|
+
|
|
375
|
+
.. math::
|
|
376
|
+
|
|
377
|
+
J L(x, \mu, \lambda) = Q x + q + \mu^\top A + \lambda^\top G + \rho ((A x - b) + (G x - h))
|
|
378
|
+
|
|
379
|
+
:param x: the primal variable wrt evaluate the jacobian
|
|
380
|
+
:return: the jacobian wrt primal-dual variable
|
|
381
|
+
"""
|
|
382
|
+
# return self.auto_jac(x) # slower
|
|
383
|
+
constraints = self.constraints(x)
|
|
384
|
+
clipped_constraints = constraints.copy()
|
|
385
|
+
clipped_constraints[self.n_eq:] = np.clip(constraints[self.n_eq:], a_min=0, a_max=None)
|
|
386
|
+
idx_nonclipped = clipped_constraints != 0
|
|
387
|
+
return (self.primal.jacobian(x) + self.dual_x @ self.AG +
|
|
388
|
+
self.rho * self.AG[idx_nonclipped].T @ self.AG[idx_nonclipped] @ x -
|
|
389
|
+
self.rho * self.bh[idx_nonclipped] @ self.AG[idx_nonclipped])
|
|
390
|
+
|
|
391
|
+
def function_jacobian(self, x):
|
|
392
|
+
constraints = self.constraints(x)
|
|
393
|
+
clipped_constraints = constraints.copy()
|
|
394
|
+
clipped_constraints[self.n_eq:] = np.clip(constraints[self.n_eq:], a_min=0, a_max=None)
|
|
395
|
+
fun = (self.primal.function(x) + self.dual_x @ constraints +
|
|
396
|
+
0.5 * self.rho * np.linalg.norm(clipped_constraints) ** 2)
|
|
397
|
+
idx_nonclipped = clipped_constraints != 0
|
|
398
|
+
jac = (self.primal.jacobian(x) + self.dual_x @ self.AG +
|
|
399
|
+
self.rho * self.AG[idx_nonclipped].T @ self.AG[idx_nonclipped] @ x -
|
|
400
|
+
self.rho * self.bh[idx_nonclipped] @ self.AG[idx_nonclipped])
|
|
401
|
+
return fun, jac
|
|
402
|
+
|
|
403
|
+
def hessian(self, x):
|
|
404
|
+
return self.auto_hess(x)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.linalg import cho_solve, cho_factor
|
|
3
|
+
from scipy.sparse.linalg import minres
|
|
4
|
+
|
|
5
|
+
from optiml.opti.constrained import BoxConstrainedQuadraticOptimizer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ActiveSet(BoxConstrainedQuadraticOptimizer):
|
|
9
|
+
r"""
|
|
10
|
+
Apply the Active Set Method to the convex Box-Constrained Quadratic program
|
|
11
|
+
|
|
12
|
+
.. math::
|
|
13
|
+
|
|
14
|
+
(P) \quad \min \left\{ \tfrac{1}{2} x^\top Q x + q^\top x : lb \le x \le ub \right\}
|
|
15
|
+
|
|
16
|
+
Since all the constraints are box ones, the active set is logically partitioned
|
|
17
|
+
onto the variables fixed to the lower bound and those fixed to the upper bound;
|
|
18
|
+
at each iteration the problem restricted to the remaining free variables is
|
|
19
|
+
solved and the active sets are updated accordingly.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self,
|
|
23
|
+
quad,
|
|
24
|
+
ub,
|
|
25
|
+
lb=None,
|
|
26
|
+
x=None,
|
|
27
|
+
eps=1e-6,
|
|
28
|
+
tol=1e-8,
|
|
29
|
+
max_iter=1000,
|
|
30
|
+
callback=None,
|
|
31
|
+
callback_args=(),
|
|
32
|
+
verbose=False):
|
|
33
|
+
r"""
|
|
34
|
+
|
|
35
|
+
:param quad: the quadratic function :math:`\tfrac{1}{2} x^\top Q x + q^\top x` to be minimized.
|
|
36
|
+
:param ub: ([n x 1] real column vector): the upper bound of the box, i.e., the
|
|
37
|
+
variables are constrained to lie in :math:`lb \le x \le ub`.
|
|
38
|
+
:param lb: ([n x 1] real column vector, optional): the lower bound of the box;
|
|
39
|
+
if not provided it defaults to the all-zeros vector.
|
|
40
|
+
:param x: ([n x 1] real column vector, optional): the point where to start the
|
|
41
|
+
algorithm from; if not provided, it starts from the middle of the box.
|
|
42
|
+
:param eps: (real scalar, optional, default value 1e-6): the accuracy in the stopping
|
|
43
|
+
criterion: the algorithm is stopped when the norm of the gradient at x is
|
|
44
|
+
less than or equal to eps.
|
|
45
|
+
:param tol: (real scalar, optional, default value 1e-8): the tolerance used to check the
|
|
46
|
+
optimality conditions when f is a Lagrangian dual relaxation.
|
|
47
|
+
:param max_iter: (integer scalar, optional, default value 1000): the maximum number of iterations.
|
|
48
|
+
:param callback: (callable, optional, default value None): a function called at each iteration
|
|
49
|
+
with the optimizer instance as first argument.
|
|
50
|
+
:param callback_args: (tuple, optional, default value ()): additional arguments passed to callback.
|
|
51
|
+
:param verbose: (boolean, optional, default value False): print details about each iteration
|
|
52
|
+
if True, nothing otherwise.
|
|
53
|
+
:return x: ([n x 1] real column vector): the best solution found so far (possibly the
|
|
54
|
+
optimal one).
|
|
55
|
+
:return status: (string): a string describing the status of the algorithm at termination:
|
|
56
|
+
- 'optimal': the algorithm terminated having proven that x is a(n approximately)
|
|
57
|
+
optimal solution, i.e., the norm of the gradient at x is less than the required
|
|
58
|
+
threshold;
|
|
59
|
+
- 'stopped': the algorithm terminated having exhausted the maximum number of
|
|
60
|
+
iterations: x is the best solution found so far, but not necessarily the optimal one.
|
|
61
|
+
"""
|
|
62
|
+
super(ActiveSet, self).__init__(quad=quad,
|
|
63
|
+
ub=ub,
|
|
64
|
+
lb=lb,
|
|
65
|
+
x=x,
|
|
66
|
+
eps=eps,
|
|
67
|
+
tol=tol,
|
|
68
|
+
max_iter=max_iter,
|
|
69
|
+
callback=callback,
|
|
70
|
+
callback_args=callback_args,
|
|
71
|
+
verbose=verbose)
|
|
72
|
+
|
|
73
|
+
def minimize(self):
|
|
74
|
+
|
|
75
|
+
self.f_x = self.f.function(self.x)
|
|
76
|
+
|
|
77
|
+
# because all constraints are box ones, the active set is logically
|
|
78
|
+
# partitioned onto the set of lower and upper bound constraints that are
|
|
79
|
+
# active, L and U respectively. Of course, L and U have to be disjoint.
|
|
80
|
+
# Since we start from the middle of the box, both the initial active sets
|
|
81
|
+
# are empty
|
|
82
|
+
L = np.full(self.f.ndim, False) # indexes of variables fixed to the lower bound
|
|
83
|
+
U = np.full(self.f.ndim, False) # indexes of variables fixed to the upper bound
|
|
84
|
+
|
|
85
|
+
# the set of "active variables", those that do *not* belong to any of the
|
|
86
|
+
# two active sets and therefore are "free", is therefore the complement to
|
|
87
|
+
# 1 : n of L union U; since L and U are empty now, A = 1 : n
|
|
88
|
+
A = np.full(self.f.ndim, True)
|
|
89
|
+
|
|
90
|
+
if self.verbose:
|
|
91
|
+
print('iter\t cost\t\t|B|', end='')
|
|
92
|
+
|
|
93
|
+
while True:
|
|
94
|
+
if self.is_verbose():
|
|
95
|
+
print('\n{:4d}\t{: 1.4e}\t{:d}\t'.format(self.iter, self.f_x, sum(L) + sum(U)), end='')
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
self.callback()
|
|
99
|
+
except StopIteration:
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
if self.iter >= self.max_iter:
|
|
103
|
+
self.status = 'stopped'
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
# solve the unconstrained problem restricted to A the problem reads:
|
|
107
|
+
#
|
|
108
|
+
# min { (1/2) x_A^T Q_{AA} x_A + (q_A + u_U^T Q_{UA}) x_A }
|
|
109
|
+
# [ + (1/2) x_U^T Q_{UU} x_U + q_U u_U ]
|
|
110
|
+
#
|
|
111
|
+
# and therefore the optimal solution is:
|
|
112
|
+
#
|
|
113
|
+
# x_A* = -Q_{AA}^{-1} (q_A + u_U^T Q_{UA})
|
|
114
|
+
#
|
|
115
|
+
# not that this actually is a constrained problem subject to equality
|
|
116
|
+
# constraints, but in our case equality constraints just fix variables
|
|
117
|
+
# (and anyway, any QP problem with equality constraints reduces to an
|
|
118
|
+
# unconstrained one)
|
|
119
|
+
|
|
120
|
+
xs = np.zeros_like(self.x)
|
|
121
|
+
xs[U] = self.ub[U]
|
|
122
|
+
xs[L] = self.lb[L]
|
|
123
|
+
|
|
124
|
+
# the variables fixed to the lower (L) and upper (U) bounds contribute the
|
|
125
|
+
# constant term Q_{A,L} lb_L + Q_{A,U} ub_U to the linear part of the
|
|
126
|
+
# subproblem restricted to the free variables A
|
|
127
|
+
q_A = self.f.q[A] + self.f.Q[A, :][:, U].dot(self.ub[U]) + self.f.Q[A, :][:, L].dot(self.lb[L])
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
# use the Cholesky factorization to solve the linear system if Q_{AA} is
|
|
131
|
+
# symmetric and positive definite, i.e., the function is strictly convex
|
|
132
|
+
xs[A] = cho_solve(cho_factor(self.f.Q[A, :][:, A]), -q_A)
|
|
133
|
+
except: # np.linalg.LinAlgError:
|
|
134
|
+
# since Q is not strictly psd, i.e., the function is linear along the
|
|
135
|
+
# eigenvectors correspondent to the null eigenvalues, the system has infinite
|
|
136
|
+
# solutions, so we will choose the one that minimizes the 2-norm
|
|
137
|
+
Q = self.f.Q[A, :][:, A]
|
|
138
|
+
q = q_A
|
|
139
|
+
# `min ||Qx - q||` is formally equivalent to solve the linear system:
|
|
140
|
+
# (Q^T Q) x = (Q^T q)^T x
|
|
141
|
+
Q, q = np.inner(Q, Q), Q.T.dot(q)
|
|
142
|
+
xs[A] = minres(Q, -q)[0]
|
|
143
|
+
|
|
144
|
+
if np.logical_and(xs[A] <= self.ub[A] + 1e-12, xs[A] >= self.lb[A] - 1e-12).all():
|
|
145
|
+
# the solution of the unconstrained problem is actually feasible
|
|
146
|
+
|
|
147
|
+
# move the current point right there
|
|
148
|
+
self.x = xs
|
|
149
|
+
|
|
150
|
+
# compute function value and gradient
|
|
151
|
+
self.f_x, self.g_x = self.f.function(self.x), self.f.jacobian(self.x)
|
|
152
|
+
|
|
153
|
+
h = np.nonzero(np.logical_and(L, self.g_x < -1e-12))[0]
|
|
154
|
+
if h.size > 0:
|
|
155
|
+
uppr = False
|
|
156
|
+
else:
|
|
157
|
+
h = np.nonzero(np.logical_and(U, self.g_x > 1e-12))[0]
|
|
158
|
+
uppr = True
|
|
159
|
+
|
|
160
|
+
if h.size == 0:
|
|
161
|
+
|
|
162
|
+
if self.f.ndim <= 3:
|
|
163
|
+
self.x0_history.append(self.x[0])
|
|
164
|
+
self.x1_history.append(self.x[1])
|
|
165
|
+
self.f_x_history.append(self.f_x)
|
|
166
|
+
|
|
167
|
+
self.status = 'optimal'
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
else:
|
|
171
|
+
h = h[0] # that's probably Bland's anti-cycle rule
|
|
172
|
+
A[h] = True
|
|
173
|
+
if uppr:
|
|
174
|
+
U[h] = False
|
|
175
|
+
if self.is_verbose():
|
|
176
|
+
print('\tI/O: O {:d}(U)'.format(h), end='')
|
|
177
|
+
else:
|
|
178
|
+
L[h] = False
|
|
179
|
+
if self.is_verbose():
|
|
180
|
+
print('\tI/O: O {:d}(L)'.format(h), end='')
|
|
181
|
+
else:
|
|
182
|
+
# the solution of the unconstrained problem is not feasible
|
|
183
|
+
# this means that d = xs - x is a descent direction, use it
|
|
184
|
+
# of course, only the "free" part really needs to be computed
|
|
185
|
+
|
|
186
|
+
d = np.zeros_like(self.x)
|
|
187
|
+
d[A] = xs[A] - self.x[A]
|
|
188
|
+
|
|
189
|
+
# first, compute the maximum feasible step size max_t such that:
|
|
190
|
+
# 0 <= x[i] + max_t * d[i] <= u[i] for all i
|
|
191
|
+
|
|
192
|
+
idx = np.logical_and(A, d > 0) # positive gradient entries
|
|
193
|
+
max_t = min((self.ub[idx] - self.x[idx]) / d[idx], default=np.inf)
|
|
194
|
+
idx = np.logical_and(A, d < 0) # negative gradient entries
|
|
195
|
+
max_t = min(max_t, min((self.lb[idx] - self.x[idx]) / d[idx], default=np.inf))
|
|
196
|
+
|
|
197
|
+
# it is useless to compute the optimal t, because we know already
|
|
198
|
+
# that it is 1, whereas max_t necessarily is < 1
|
|
199
|
+
self.x += max_t * d
|
|
200
|
+
|
|
201
|
+
# compute function value
|
|
202
|
+
self.f_x = self.f.function(self.x)
|
|
203
|
+
|
|
204
|
+
# update the active set(s)
|
|
205
|
+
nL = np.logical_and(A, self.x <= self.lb + 1e-12)
|
|
206
|
+
L[nL] = True
|
|
207
|
+
A[nL] = False
|
|
208
|
+
|
|
209
|
+
nU = np.logical_and(A, self.x >= self.ub - 1e-12)
|
|
210
|
+
U[nU] = True
|
|
211
|
+
A[nU] = False
|
|
212
|
+
|
|
213
|
+
if self.is_verbose():
|
|
214
|
+
print('\tI/O: I {:d}+{:d}'.format(sum(nL), sum(nU)), end='')
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
self.check_lagrangian_dual_optimality()
|
|
218
|
+
except StopIteration:
|
|
219
|
+
break
|
|
220
|
+
|
|
221
|
+
self.iter += 1
|
|
222
|
+
|
|
223
|
+
self.check_lagrangian_dual_conditions()
|
|
224
|
+
|
|
225
|
+
if self.verbose:
|
|
226
|
+
print('\n')
|
|
227
|
+
|
|
228
|
+
return self
|