DFO-LS 1.2.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of DFO-LS might be problematic. Click here for more details.
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/METADATA +66 -60
- DFO_LS-1.5.0.dist-info/RECORD +14 -0
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/WHEEL +1 -1
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/top_level.txt +0 -0
- dfols/__init__.py +4 -5
- dfols/controller.py +274 -59
- dfols/hessian.py +1 -1
- dfols/model.py +64 -33
- dfols/params.py +32 -2
- dfols/solver.py +156 -91
- dfols/trust_region.py +237 -7
- dfols/util.py +71 -10
- DFO_LS-1.2.1.dist-info/RECORD +0 -16
- DFO_LS-1.2.1.dist-info/zip-safe +0 -1
- dfols/version.py +0 -25
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/LICENSE.txt +0 -0
dfols/controller.py
CHANGED
|
@@ -41,8 +41,11 @@ from .util import *
|
|
|
41
41
|
|
|
42
42
|
__all__ = ['Controller', 'ExitInformation', 'EXIT_SLOW_WARNING', 'EXIT_MAXFUN_WARNING', 'EXIT_SUCCESS',
|
|
43
43
|
'EXIT_INPUT_ERROR', 'EXIT_TR_INCREASE_ERROR', 'EXIT_LINALG_ERROR', 'EXIT_FALSE_SUCCESS_WARNING',
|
|
44
|
-
'EXIT_AUTO_DETECT_RESTART_WARNING']
|
|
44
|
+
'EXIT_AUTO_DETECT_RESTART_WARNING', 'EXIT_EVAL_ERROR']
|
|
45
45
|
|
|
46
|
+
module_logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
EXIT_TR_INCREASE_WARNING = 5 # warning, TR increase in proj constrained case - likely due to multiple active constraints
|
|
46
49
|
EXIT_AUTO_DETECT_RESTART_WARNING = 4 # warning, auto-detected restart criteria
|
|
47
50
|
EXIT_FALSE_SUCCESS_WARNING = 3 # warning, maximum fake successful steps reached
|
|
48
51
|
EXIT_SLOW_WARNING = 2 # warning, maximum number of slow (successful) iterations reached
|
|
@@ -51,6 +54,7 @@ EXIT_SUCCESS = 0 # successful finish (rho=rhoend, sufficient objective reductio
|
|
|
51
54
|
EXIT_INPUT_ERROR = -1 # error, bad inputs
|
|
52
55
|
EXIT_TR_INCREASE_ERROR = -2 # error, trust region step increased model value
|
|
53
56
|
EXIT_LINALG_ERROR = -3 # error, linalg error (singular matrix encountered)
|
|
57
|
+
EXIT_EVAL_ERROR = -4 # error, objective evaluation error (e.g. nan result received)
|
|
54
58
|
|
|
55
59
|
|
|
56
60
|
class ExitInformation(object):
|
|
@@ -70,6 +74,8 @@ class ExitInformation(object):
|
|
|
70
74
|
return "Warning (slow progress): " + self.msg
|
|
71
75
|
elif self.flag == EXIT_MAXFUN_WARNING:
|
|
72
76
|
return "Warning (max evals): " + self.msg
|
|
77
|
+
elif self.flag == EXIT_TR_INCREASE_WARNING:
|
|
78
|
+
return "Warning (trust region increase): " + self.msg
|
|
73
79
|
elif self.flag == EXIT_INPUT_ERROR:
|
|
74
80
|
return "Error (bad input): " + self.msg
|
|
75
81
|
elif self.flag == EXIT_TR_INCREASE_ERROR:
|
|
@@ -78,11 +84,13 @@ class ExitInformation(object):
|
|
|
78
84
|
return "Error (linear algebra): " + self.msg
|
|
79
85
|
elif self.flag == EXIT_FALSE_SUCCESS_WARNING:
|
|
80
86
|
return "Warning (max false good steps): " + self.msg
|
|
87
|
+
elif self.flag == EXIT_EVAL_ERROR:
|
|
88
|
+
return "Error (function evaluation): " + self.msg
|
|
81
89
|
else:
|
|
82
90
|
return "Unknown exit flag: " + self.msg
|
|
83
91
|
|
|
84
92
|
def able_to_do_restart(self):
|
|
85
|
-
if self.flag in [EXIT_TR_INCREASE_ERROR, EXIT_LINALG_ERROR, EXIT_SLOW_WARNING, EXIT_AUTO_DETECT_RESTART_WARNING]:
|
|
93
|
+
if self.flag in [EXIT_TR_INCREASE_ERROR, EXIT_TR_INCREASE_WARNING, EXIT_LINALG_ERROR, EXIT_SLOW_WARNING, EXIT_AUTO_DETECT_RESTART_WARNING, EXIT_EVAL_ERROR]:
|
|
86
94
|
return True
|
|
87
95
|
elif self.flag in [EXIT_MAXFUN_WARNING, EXIT_INPUT_ERROR]:
|
|
88
96
|
return False
|
|
@@ -92,14 +100,19 @@ class ExitInformation(object):
|
|
|
92
100
|
|
|
93
101
|
|
|
94
102
|
class Controller(object):
|
|
95
|
-
def __init__(self, objfun,
|
|
96
|
-
scaling_changes, do_logging):
|
|
103
|
+
def __init__(self, objfun, argsf, x0, r0, r0_nsamples, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params,
|
|
104
|
+
scaling_changes, do_logging, h=None, lh=None, argsh = (), prox_uh=None, argsprox = ()):
|
|
97
105
|
self.do_logging = do_logging
|
|
98
106
|
self.objfun = objfun
|
|
99
|
-
self.
|
|
107
|
+
self.h = h
|
|
108
|
+
self.argsf = argsf
|
|
109
|
+
self.argsh = argsh
|
|
110
|
+
self.lh = lh
|
|
111
|
+
self.prox_uh = prox_uh #TODO: add instruction for prox_uh
|
|
112
|
+
self.argsprox = argsprox
|
|
100
113
|
self.maxfun = maxfun
|
|
101
|
-
self.model = Model(npt, x0, r0, xl, xu, r0_nsamples, precondition=params("interpolation.precondition"),
|
|
102
|
-
abs_tol = params("model.abs_tol"), rel_tol = params("model.rel_tol"), do_logging=do_logging)
|
|
114
|
+
self.model = Model(npt, x0, r0, xl, xu, projections, r0_nsamples, h=self.h, argsh = argsh, precondition=params("interpolation.precondition"),
|
|
115
|
+
abs_tol = params("model.abs_tol"), rel_tol = params("model.rel_tol"), do_logging=do_logging, scaling_changes=scaling_changes)
|
|
103
116
|
self.nf = nf
|
|
104
117
|
self.nx = nx
|
|
105
118
|
self.rhobeg = rhobeg
|
|
@@ -107,9 +120,6 @@ class Controller(object):
|
|
|
107
120
|
self.rho = rhobeg
|
|
108
121
|
self.rhoend = rhoend
|
|
109
122
|
self.diffs = [0.0, 0.0, 0.0]
|
|
110
|
-
self.last_iters_step_taken = []
|
|
111
|
-
self.last_fopts_step_taken = []
|
|
112
|
-
self.num_slow_iters = 0
|
|
113
123
|
self.finished_growing = False
|
|
114
124
|
self.finished_halfway_growing = False
|
|
115
125
|
# For measuing slow iterations
|
|
@@ -134,12 +144,113 @@ class Controller(object):
|
|
|
134
144
|
|
|
135
145
|
def initialise_coordinate_directions(self, number_of_samples, num_directions, params):
|
|
136
146
|
if self.do_logging:
|
|
137
|
-
|
|
147
|
+
module_logger.debug("Initialising with coordinate directions")
|
|
138
148
|
# self.model already has x0 evaluated, so only need to initialise the other points
|
|
139
149
|
# num_directions = params("growing.ndirs_initial")
|
|
140
150
|
assert self.model.num_pts <= (self.n() + 1) * (self.n() + 2) // 2, "prelim: must have npt <= (n+1)(n+2)/2"
|
|
141
151
|
assert 1 <= num_directions < self.model.num_pts, "Initialisation: must have 1 <= ndirs_initial < npt"
|
|
142
152
|
|
|
153
|
+
|
|
154
|
+
if self.model.projections:
|
|
155
|
+
D = np.zeros((self.n(),self.n()))
|
|
156
|
+
k = 0
|
|
157
|
+
while k < self.n():
|
|
158
|
+
ek = np.zeros(self.n())
|
|
159
|
+
ek[k] = 1
|
|
160
|
+
p = np.dot(ek,min(1,self.delta))
|
|
161
|
+
yk = dykstra(self.model.projections, self.model.xbase + p, max_iter=params("dykstra.max_iters"), tol=params("dykstra.d_tol"))
|
|
162
|
+
D[k,:] = yk - self.model.xbase
|
|
163
|
+
|
|
164
|
+
k += 1 # move on to next point
|
|
165
|
+
|
|
166
|
+
# Have at least one L.D. vector, try negative direction on bad one first
|
|
167
|
+
k = 0
|
|
168
|
+
mr_tol = params("matrix_rank.r_tol")
|
|
169
|
+
D_rank, diag = qr_rank(D,tol=mr_tol)
|
|
170
|
+
while D_rank != num_directions and k < self.n():
|
|
171
|
+
if diag[k] < mr_tol:
|
|
172
|
+
ek = np.zeros(self.n())
|
|
173
|
+
ek[k] = 1
|
|
174
|
+
p = -np.dot(ek,min(1,self.delta))
|
|
175
|
+
yk = dykstra(self.model.projections, self.model.xbase + p, max_iter=params("dykstra.max_iters"), tol=params("dykstra.d_tol"))
|
|
176
|
+
dk = D[k,:].copy()
|
|
177
|
+
D[k,:] = yk - self.model.xbase
|
|
178
|
+
D_rank2, _diag2 = qr_rank(D,tol=params("matrix_rank.r_tol"))
|
|
179
|
+
if D_rank2 <= D_rank:
|
|
180
|
+
# Did not improve rank, revert change
|
|
181
|
+
D[k,:] = dk
|
|
182
|
+
# rank was improved, update D_rank for next comparison
|
|
183
|
+
D_rank = D_rank2
|
|
184
|
+
k += 1
|
|
185
|
+
|
|
186
|
+
# Try random combination of negatives...
|
|
187
|
+
k = 0
|
|
188
|
+
slctr = np.random.randint(0, 1+1, self.n()) # generate rand binary "selector" array
|
|
189
|
+
D_rank, diag = qr_rank(D,tol=params("matrix_rank.r_tol"))
|
|
190
|
+
while D_rank != num_directions and k < 100*self.n():
|
|
191
|
+
if slctr[k%self.n()] == 1: # if selector says make -ve, make -ve
|
|
192
|
+
ek = np.zeros(self.n())
|
|
193
|
+
ek[k%self.n()] = 1
|
|
194
|
+
p = -np.dot(ek,min(1,self.delta))
|
|
195
|
+
yk = dykstra(self.model.projections, self.model.xbase + p, max_iter=params("dykstra.max_iters"), tol=params("dykstra.d_tol"))
|
|
196
|
+
dk = D[k%self.n(),:].copy()
|
|
197
|
+
D[k%self.n(),:] = yk - self.model.xbase
|
|
198
|
+
D_rank2, _diag2 = qr_rank(D,tol=params("matrix_rank.r_tol"))
|
|
199
|
+
if D_rank2 <= D_rank:
|
|
200
|
+
# Did not improve rank, revert change
|
|
201
|
+
D[k%self.n(),:] = dk
|
|
202
|
+
# rank was improved, update D_rank for next comparison
|
|
203
|
+
D_rank = D_rank2
|
|
204
|
+
|
|
205
|
+
# Go again
|
|
206
|
+
slctr = np.random.randint(0, 1+1, self.n())
|
|
207
|
+
k += 1
|
|
208
|
+
|
|
209
|
+
# Set still not L.I? Try random directions
|
|
210
|
+
i = 0
|
|
211
|
+
D_rank, diag = qr_rank(D,tol=params("matrix_rank.r_tol"))
|
|
212
|
+
while D_rank != num_directions and i <= 100*num_directions:
|
|
213
|
+
k = 0
|
|
214
|
+
while k < self.n():
|
|
215
|
+
if diag[k] < mr_tol:
|
|
216
|
+
p = np.random.normal(size=self.n())
|
|
217
|
+
p = p/np.linalg.norm(p)
|
|
218
|
+
p = np.dot(p,min(1,self.delta))
|
|
219
|
+
yk = dykstra(self.model.projections, self.model.xbase + p, max_iter=params("dykstra.max_iters"), tol=params("dykstra.d_tol"))
|
|
220
|
+
dk = D[k,:].copy()
|
|
221
|
+
D[k,:] = yk - self.model.xbase
|
|
222
|
+
D_rank2, _diag2 = qr_rank(D,tol=params("matrix_rank.r_tol"))
|
|
223
|
+
if D_rank2 <= D_rank:
|
|
224
|
+
# Did not improve rank, revert change
|
|
225
|
+
D[k,:] = dk
|
|
226
|
+
# rank was improved, update D_rank for next comparison
|
|
227
|
+
D_rank = D_rank2
|
|
228
|
+
k += 1
|
|
229
|
+
i += 1
|
|
230
|
+
|
|
231
|
+
if D_rank != num_directions:
|
|
232
|
+
raise RuntimeError("Unable to generate suitable initial directions")
|
|
233
|
+
|
|
234
|
+
# we have a L.I set of interpolation points
|
|
235
|
+
for k in range(0,self.n()):
|
|
236
|
+
# Evaluate objective at this new point
|
|
237
|
+
x = self.model.as_absolute_coordinates(D[k, :])
|
|
238
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
239
|
+
|
|
240
|
+
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
241
|
+
if exit_info is not None:
|
|
242
|
+
if num_samples_run > 0:
|
|
243
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
244
|
+
x_in_abs_coords=True)
|
|
245
|
+
return exit_info # return & quit
|
|
246
|
+
|
|
247
|
+
# Otherwise, add new results (increments model.npt_so_far)
|
|
248
|
+
self.model.change_point(k+1, x - self.model.xbase, rvec_list[0, :]) # expect step, not absolute x
|
|
249
|
+
for i in range(1, num_samples_run):
|
|
250
|
+
self.model.add_new_sample(k+1, rvec_extra=rvec_list[i, :])
|
|
251
|
+
|
|
252
|
+
return None # return & continue
|
|
253
|
+
|
|
143
254
|
at_lower_boundary = (self.model.sl > -0.01 * self.delta) # sl = xl - x0, should be -ve, actually < -rhobeg
|
|
144
255
|
at_upper_boundary = (self.model.su < 0.01 * self.delta) # su = xu - x0, should be +ve, actually > rhobeg
|
|
145
256
|
|
|
@@ -150,17 +261,19 @@ class Controller(object):
|
|
|
150
261
|
# k = 2n+1, ..., (n+1)(n+2)/2 --> off-diagonal directions
|
|
151
262
|
if 1 <= k < self.n() + 1: # first step along coord directions
|
|
152
263
|
dirn = k - 1 # direction to move in (0,...,n-1)
|
|
153
|
-
stepa = self.delta if not at_upper_boundary[dirn] else -self.delta
|
|
264
|
+
stepa = self.delta if not at_upper_boundary[dirn] else -self.delta # take a +delta step if at lower, -delta if at upper
|
|
154
265
|
stepb = None
|
|
155
|
-
xpts_added[k, dirn] = stepa
|
|
266
|
+
xpts_added[k, dirn] = stepa # set new (relative) point to the step since we haven't done any moving, so relative point is all zeros.
|
|
156
267
|
|
|
157
268
|
elif self.n() + 1 <= k < 2 * self.n() + 1: # second step along coord directions
|
|
158
269
|
dirn = k - self.n() - 1 # direction to move in (0,...,n-1)
|
|
159
|
-
stepa = xpts_added[k - self.n(), dirn]
|
|
160
|
-
stepb = -self.delta
|
|
270
|
+
stepa = xpts_added[k - self.n(), dirn] # previous step
|
|
271
|
+
stepb = -self.delta # new step
|
|
161
272
|
if at_lower_boundary[dirn]:
|
|
273
|
+
# if at lower boundary, set the second step to be +ve
|
|
162
274
|
stepb = min(2.0 * self.delta, self.model.su[dirn]) # su = xu - x0, should be +ve
|
|
163
275
|
if at_upper_boundary[dirn]:
|
|
276
|
+
# if at upper boundary, set the second step to be -ve
|
|
164
277
|
stepb = max(-2.0 * self.delta, self.model.sl[dirn]) # sl = xl - x0, should be -ve
|
|
165
278
|
xpts_added[k, dirn] = stepb
|
|
166
279
|
|
|
@@ -181,7 +294,7 @@ class Controller(object):
|
|
|
181
294
|
|
|
182
295
|
# Evaluate objective at this new point
|
|
183
296
|
x = self.model.as_absolute_coordinates(xpts_added[k, :])
|
|
184
|
-
rvec_list,
|
|
297
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
185
298
|
|
|
186
299
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
187
300
|
if exit_info is not None:
|
|
@@ -201,14 +314,14 @@ class Controller(object):
|
|
|
201
314
|
# Note: this works because the steps for (k) and (k-n) points were in the same coordinate direction
|
|
202
315
|
if self.n() + 1 <= k < 2 * self.n() + 1:
|
|
203
316
|
# Only swap if steps were in different directions AND new pt has lower objective
|
|
204
|
-
if stepa * stepb < 0.0 and self.model.
|
|
317
|
+
if stepa * stepb < 0.0 and self.model.objval[k] < self.model.objval[k - self.n()]:
|
|
205
318
|
xpts_added[[k, k-self.n()]] = xpts_added[[k-self.n(), k]]
|
|
206
319
|
|
|
207
320
|
return None # return & continue
|
|
208
321
|
|
|
209
322
|
def initialise_random_directions(self, number_of_samples, num_directions, params):
|
|
210
323
|
if self.do_logging:
|
|
211
|
-
|
|
324
|
+
module_logger.debug("Initialising with random orthogonal directions")
|
|
212
325
|
# self.model already has x0 evaluated, so only need to initialise the other points
|
|
213
326
|
assert 1 <= num_directions < self.model.num_pts, "Initialisation: must have 1 <= ndirs_initial < npt"
|
|
214
327
|
|
|
@@ -234,7 +347,7 @@ class Controller(object):
|
|
|
234
347
|
for ndirns in range(num_directions):
|
|
235
348
|
new_point = xopt + dirns[ndirns, :] # alway base move around best value so far
|
|
236
349
|
x = self.model.as_absolute_coordinates(new_point)
|
|
237
|
-
rvec_list,
|
|
350
|
+
rvec_list, obj_list, num_samples_run, exit_info = eval_obj_results[ndirns]
|
|
238
351
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
239
352
|
if exit_info is not None:
|
|
240
353
|
if num_samples_run > 0:
|
|
@@ -253,7 +366,7 @@ class Controller(object):
|
|
|
253
366
|
|
|
254
367
|
# Evaluate objective
|
|
255
368
|
x = self.model.as_absolute_coordinates(new_point)
|
|
256
|
-
rvec_list,
|
|
369
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
257
370
|
|
|
258
371
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
259
372
|
if exit_info is not None:
|
|
@@ -290,7 +403,7 @@ class Controller(object):
|
|
|
290
403
|
for j in range(num_steps):
|
|
291
404
|
xnew = self.model.xopt() + (step_length / LA.norm(dirns[j, :])) * dirns[j, :]
|
|
292
405
|
x = self.model.as_absolute_coordinates(xnew)
|
|
293
|
-
rvec_list,
|
|
406
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
294
407
|
|
|
295
408
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
296
409
|
if exit_info is not None:
|
|
@@ -328,29 +441,109 @@ class Controller(object):
|
|
|
328
441
|
|
|
329
442
|
return dirn * (step_length / LA.norm(dirn))
|
|
330
443
|
|
|
331
|
-
def
|
|
332
|
-
#
|
|
444
|
+
def evaluate_criticality_measure(self, params):
|
|
445
|
+
# Calculate criticality measure for regularized problems (h is not None)
|
|
446
|
+
|
|
447
|
+
# Build model for full least squares function
|
|
448
|
+
gopt, H = self.model.build_full_model()
|
|
449
|
+
|
|
450
|
+
if np.any(np.isnan(gopt)) or np.any(np.isnan(H)) or not np.all(np.isfinite(gopt)) or not np.all(np.isfinite(H)):
|
|
451
|
+
module_logger.debug("nan/inf values in gopt and/or H, skipping ctrsbox_sfista (criticality measure calc)")
|
|
452
|
+
# d = np.zeros(gopt.shape)
|
|
453
|
+
# gnew = gopt.copy()
|
|
454
|
+
# crvmin = -1
|
|
455
|
+
return np.inf
|
|
456
|
+
|
|
457
|
+
# NOTE: smaller params here to get more iterations in S-FISTA
|
|
458
|
+
func_tol = params("func_tol.criticality_measure") * self.delta
|
|
459
|
+
if self.model.projections:
|
|
460
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), self.model.projections, 1,
|
|
461
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
462
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
463
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
464
|
+
else:
|
|
465
|
+
proj = lambda x: pbox(x, self.model.sl, self.model.su)
|
|
466
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), [proj], 1,
|
|
467
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
468
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
469
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
470
|
+
|
|
471
|
+
# Calculate criticality measure
|
|
472
|
+
criticality_measure = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, np.zeros(H.shape), d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
|
|
473
|
+
return criticality_measure
|
|
474
|
+
|
|
475
|
+
def trust_region_step(self, params, criticality_measure=1e-2):
|
|
476
|
+
# Build model for full least squares function
|
|
333
477
|
gopt, H = self.model.build_full_model()
|
|
334
|
-
|
|
478
|
+
# Build func_tol for trust region step
|
|
479
|
+
# QUESTION: c1 = min{1, 1/delta_max^2}, but choose c1=1here; choose maxhessian = max(||H||_2,1)
|
|
480
|
+
# QUESTION: when criticality_measure = 0? choose max(criticality_measure,1)
|
|
481
|
+
func_tol = (1-params("func_tol.tr_step")) * 1 * max(criticality_measure,1) * min(self.delta, max(criticality_measure,1) / max(np.linalg.norm(H, 2),1))
|
|
482
|
+
|
|
483
|
+
if self.h is None:
|
|
484
|
+
if self.model.projections:
|
|
485
|
+
# Running PGD/SFISTA is generally slower than trsbox, so don't do this if gopt or H have bad values
|
|
486
|
+
# (this will ultimately lead to a manual setting of d=0 and calling a safety step anyway)
|
|
487
|
+
if np.any(np.isnan(gopt)) or np.any(np.isnan(H)) or not np.all(np.isfinite(gopt)) or not np.all(np.isfinite(H)):
|
|
488
|
+
module_logger.debug("nan/inf values in gopt and/or H, skipping ctrsbox_pgd")
|
|
489
|
+
d = np.zeros(gopt.shape)
|
|
490
|
+
gnew = gopt.copy()
|
|
491
|
+
crvmin = -1
|
|
492
|
+
else:
|
|
493
|
+
d, gnew, crvmin = ctrsbox_pgd(self.model.xopt(abs_coordinates=True), gopt, H, self.model.projections, self.delta, d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"))
|
|
494
|
+
else:
|
|
495
|
+
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
|
|
496
|
+
else:
|
|
497
|
+
# Running PGD/SFISTA is generally slower than trsbox, so don't do this if gopt or H have bad values
|
|
498
|
+
# (this will ultimately lead to a manual setting of d=0 and calling a safety step anyway)
|
|
499
|
+
if np.any(np.isnan(gopt)) or np.any(np.isnan(H)) or not np.all(np.isfinite(gopt)) or not np.all(np.isfinite(H)):
|
|
500
|
+
module_logger.debug("nan/inf values in gopt and/or H, skipping ctrsbox_sfista")
|
|
501
|
+
d = np.zeros(gopt.shape)
|
|
502
|
+
gnew = gopt.copy()
|
|
503
|
+
crvmin = -1
|
|
504
|
+
elif self.model.projections:
|
|
505
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, H, self.model.projections, self.delta,
|
|
506
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
507
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
508
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
509
|
+
else:
|
|
510
|
+
# NOTE: alternative way if using trsbox
|
|
511
|
+
# d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
|
|
512
|
+
proj = lambda x: pbox(x, self.model.sl, self.model.su)
|
|
513
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, H, [proj], self.delta,
|
|
514
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
515
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
516
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
517
|
+
|
|
518
|
+
# NOTE: check sufficient decrease. If increase in the model, set zero step
|
|
519
|
+
pred_reduction = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, H, d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
|
|
520
|
+
if pred_reduction < 0.0:
|
|
521
|
+
d = np.zeros(d.shape)
|
|
522
|
+
|
|
335
523
|
return d, gopt, H, gnew, crvmin
|
|
336
524
|
|
|
337
525
|
def geometry_step(self, knew, adelt, number_of_samples, params):
|
|
338
526
|
if self.do_logging:
|
|
339
|
-
|
|
527
|
+
module_logger.debug("Running geometry-fixing step")
|
|
340
528
|
try:
|
|
341
529
|
c, g = self.model.lagrange_gradient(knew)
|
|
342
530
|
# c = 1.0 if knew == self.model.kopt else 0.0 # based at xopt, just like d
|
|
343
|
-
|
|
344
|
-
|
|
531
|
+
if self.model.projections:
|
|
532
|
+
# Solve problem: use projection onto arbitrary constraints, and ||xnew-xopt|| <= adelt
|
|
533
|
+
step = ctrsbox_geometry(self.model.xopt(abs_coordinates=True), c, g, self.model.projections, adelt, d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"))
|
|
534
|
+
xnew = self.model.xopt() + step
|
|
535
|
+
else:
|
|
536
|
+
# Solve problem: bounds are sl <= xnew <= su, and ||xnew-xopt|| <= adelt
|
|
537
|
+
xnew = trsbox_geometry(self.model.xopt(), c, g, np.minimum(self.model.sl, 0.0), np.maximum(self.model.su, 0.0), adelt)
|
|
345
538
|
except LA.LinAlgError:
|
|
346
539
|
exit_info = ExitInformation(EXIT_LINALG_ERROR, "Singular matrix encountered in geometry step")
|
|
347
540
|
return exit_info # didn't fix geometry - return & quit
|
|
348
541
|
|
|
349
542
|
gopt, H = self.model.build_full_model() # save here, to calculate predicted value from geometry step
|
|
350
|
-
|
|
543
|
+
objopt = self.model.objopt() # again, evaluate now, before model.change_point()
|
|
351
544
|
d = xnew - self.model.xopt()
|
|
352
545
|
x = self.model.as_absolute_coordinates(xnew)
|
|
353
|
-
rvec_list,
|
|
546
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
354
547
|
|
|
355
548
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
356
549
|
if exit_info is not None:
|
|
@@ -365,11 +558,14 @@ class Controller(object):
|
|
|
365
558
|
self.model.add_new_sample(knew, rvec_extra=rvec_list[i, :])
|
|
366
559
|
|
|
367
560
|
# Estimate actual reduction to add to diffs vector
|
|
368
|
-
|
|
369
|
-
|
|
561
|
+
obj = sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) # estimate actual objective value
|
|
370
562
|
# pred_reduction = - calculate_model_value(gopt, H, d)
|
|
371
563
|
pred_reduction = - model_value(gopt, H, d)
|
|
372
|
-
|
|
564
|
+
if self.h is not None:
|
|
565
|
+
obj += self.h(remove_scaling(x, self.scaling_changes), *self.argsh)
|
|
566
|
+
# since m(0) = h(x)
|
|
567
|
+
pred_reduction = self.h(remove_scaling(x, self.scaling_changes), *self.argsh) - model_value(gopt, H, d, x, self.h, self.argsh, self.scaling_changes)
|
|
568
|
+
actual_reduction = objopt - obj
|
|
373
569
|
self.diffs = [abs(pred_reduction - actual_reduction), self.diffs[0], self.diffs[1]]
|
|
374
570
|
return None # exit_info = None
|
|
375
571
|
|
|
@@ -397,7 +593,7 @@ class Controller(object):
|
|
|
397
593
|
def evaluate_objective(self, x, number_of_samples, params):
|
|
398
594
|
# Sample from objective function several times, keeping track of maxfun and min_obj_value throughout
|
|
399
595
|
rvec_list = np.zeros((number_of_samples, self.m()))
|
|
400
|
-
|
|
596
|
+
obj_list = np.zeros((number_of_samples,))
|
|
401
597
|
num_samples_run = 0
|
|
402
598
|
incremented_nx = False
|
|
403
599
|
exit_info = None
|
|
@@ -411,19 +607,24 @@ class Controller(object):
|
|
|
411
607
|
if not incremented_nx:
|
|
412
608
|
self.nx += 1
|
|
413
609
|
incremented_nx = True
|
|
414
|
-
rvec_list[i, :],
|
|
415
|
-
|
|
610
|
+
rvec_list[i, :], obj_list[i] = eval_least_squares_with_regularisation(self.objfun, remove_scaling(x, self.scaling_changes), self.h,
|
|
611
|
+
argsf=self.argsf, argsh=self.argsh, verbose=self.do_logging, eval_num=self.nf, pt_num=self.nx,
|
|
416
612
|
full_x_thresh=params("logging.n_to_print_whole_x_vector"),
|
|
417
|
-
check_for_overflow=params("general.check_objfun_for_overflow")
|
|
418
|
-
verbose=self.do_logging)
|
|
613
|
+
check_for_overflow=params("general.check_objfun_for_overflow"))
|
|
419
614
|
num_samples_run += 1
|
|
420
615
|
|
|
421
616
|
# Check if the average value was below our threshold
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
617
|
+
# QUESTION: how to choose x in h when using averaged values
|
|
618
|
+
if self.h is None:
|
|
619
|
+
if num_samples_run > 0 and \
|
|
620
|
+
sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) <= self.model.min_objective_value():
|
|
621
|
+
exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
|
|
622
|
+
else:
|
|
623
|
+
if num_samples_run > 0 and \
|
|
624
|
+
sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) + self.h(remove_scaling(x, self.scaling_changes),*self.argsh) <= self.model.min_objective_value():
|
|
625
|
+
exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
|
|
425
626
|
|
|
426
|
-
return rvec_list,
|
|
627
|
+
return rvec_list, obj_list, num_samples_run, exit_info
|
|
427
628
|
|
|
428
629
|
def choose_point_to_replace(self, d, skip_kopt=True):
|
|
429
630
|
delsq = self.delta ** 2
|
|
@@ -499,17 +700,26 @@ class Controller(object):
|
|
|
499
700
|
self.last_successful_iter = current_iter # reset successful iteration check
|
|
500
701
|
return
|
|
501
702
|
|
|
502
|
-
def calculate_ratio(self, current_iter, rvec_list, d, gopt, H):
|
|
703
|
+
def calculate_ratio(self, x, current_iter, rvec_list, d, gopt, H):
|
|
503
704
|
exit_info = None
|
|
504
|
-
|
|
705
|
+
# estimate actual objective value
|
|
706
|
+
obj = sumsq(np.mean(rvec_list, axis=0))
|
|
707
|
+
# pred_reduction = - calculate_model_value(gopt, H, d)
|
|
505
708
|
pred_reduction = - model_value(gopt, H, d)
|
|
506
|
-
|
|
709
|
+
if self.h is not None:
|
|
710
|
+
# QUESTION: x+d here correct? rvec_list takes mean value
|
|
711
|
+
obj += self.h(remove_scaling(x+d, self.scaling_changes), *self.argsh)
|
|
712
|
+
# since m(0) = h(x)
|
|
713
|
+
pred_reduction = self.h(remove_scaling(x, self.scaling_changes), *self.argsh) - model_value(gopt, H, d, x, self.h, self.argsh, self.scaling_changes)
|
|
714
|
+
actual_reduction = self.model.objopt() - obj
|
|
507
715
|
self.diffs = [abs(actual_reduction - pred_reduction), self.diffs[0], self.diffs[1]]
|
|
508
716
|
if min(sqrt(sumsq(d)), self.delta) > self.rho: # if ||d|| >= rho, successful!
|
|
509
717
|
self.last_successful_iter = current_iter
|
|
510
718
|
if pred_reduction < 0.0:
|
|
511
|
-
|
|
512
|
-
|
|
719
|
+
if len(self.model.projections) > 1: # if we are using multiple projections, only warn since likely due to constraint intersection
|
|
720
|
+
exit_info = ExitInformation(EXIT_TR_INCREASE_WARNING, "Either multiple constraints are active or trust region step gave model increase")
|
|
721
|
+
else:
|
|
722
|
+
exit_info = ExitInformation(EXIT_TR_INCREASE_ERROR, "Trust region step gave model increase")
|
|
513
723
|
ratio = actual_reduction / pred_reduction
|
|
514
724
|
return ratio, exit_info
|
|
515
725
|
|
|
@@ -517,32 +727,32 @@ class Controller(object):
|
|
|
517
727
|
if len(self.last_iters_step_taken) <= params("slow.history_for_slow"):
|
|
518
728
|
# Not enough info, simply append
|
|
519
729
|
self.last_iters_step_taken.append(current_iter)
|
|
520
|
-
self.last_fopts_step_taken.append(self.model.
|
|
730
|
+
self.last_fopts_step_taken.append(self.model.objopt())
|
|
521
731
|
this_iter_slow = False
|
|
522
732
|
else:
|
|
523
733
|
# Enough info - shift values
|
|
524
734
|
self.last_iters_step_taken = self.last_iters_step_taken[1:] + [current_iter]
|
|
525
|
-
self.last_fopts_step_taken = self.last_fopts_step_taken[1:] + [self.model.
|
|
526
|
-
this_iter_slow = (log(self.last_fopts_step_taken[0]) - log(self.model.
|
|
735
|
+
self.last_fopts_step_taken = self.last_fopts_step_taken[1:] + [self.model.objopt()]
|
|
736
|
+
this_iter_slow = (log(self.last_fopts_step_taken[0]) - log(self.model.objopt())) / \
|
|
527
737
|
float(params("slow.history_for_slow")) < params("slow.thresh_for_slow")
|
|
528
738
|
# Update counter of number of slow iterations
|
|
529
739
|
if this_iter_slow:
|
|
530
740
|
self.num_slow_iters += 1
|
|
531
741
|
if self.do_logging:
|
|
532
|
-
|
|
742
|
+
module_logger.info("Slow iteration (%g consecutive so far, max allowed %g)"
|
|
533
743
|
% (self.num_slow_iters, params("slow.max_slow_iters")))
|
|
534
744
|
else:
|
|
535
745
|
self.num_slow_iters = 0
|
|
536
746
|
if self.do_logging:
|
|
537
|
-
|
|
747
|
+
module_logger.debug("Non-slow iteration")
|
|
538
748
|
return this_iter_slow, self.num_slow_iters >= params("slow.max_slow_iters")
|
|
539
749
|
|
|
540
750
|
def soft_restart(self, number_of_samples, nruns_so_far, params, x_in_abs_coords_to_save=None, rvec_to_save=None,
|
|
541
751
|
nsamples_to_save=None):
|
|
542
752
|
# A successful run is one where we reduced fopt
|
|
543
|
-
if self.model.
|
|
753
|
+
if self.model.objopt() < self.last_run_fopt:
|
|
544
754
|
self.last_successful_run = nruns_so_far
|
|
545
|
-
self.last_run_fopt = self.model.
|
|
755
|
+
self.last_run_fopt = self.model.objopt()
|
|
546
756
|
|
|
547
757
|
ok_to_do_restart = (nruns_so_far - self.last_successful_run < params("restarts.max_unsuccessful_restarts")) and \
|
|
548
758
|
(self.nf < self.maxfun)
|
|
@@ -563,12 +773,17 @@ class Controller(object):
|
|
|
563
773
|
self.model.nsamples[self.model.kopt], x_in_abs_coords=True)
|
|
564
774
|
|
|
565
775
|
if self.do_logging:
|
|
566
|
-
|
|
776
|
+
module_logger.info("Soft restart [currently, f = %g after %g function evals]" % (self.model.objopt(), self.nf))
|
|
567
777
|
# Resetting method: reset delta and rho, then move the closest 'num_steps' points to xk to improve geometry
|
|
568
778
|
# Note: closest points because we are suddenly increasing delta & rho, so we want to encourage spreading out points
|
|
569
779
|
self.delta = self.rhobeg
|
|
570
780
|
self.rho = self.rhobeg
|
|
571
781
|
self.diffs = [0.0, 0.0, 0.0]
|
|
782
|
+
|
|
783
|
+
# Forget history of slow iterations
|
|
784
|
+
self.last_iters_step_taken = []
|
|
785
|
+
self.last_fopts_step_taken = []
|
|
786
|
+
self.num_slow_iters = 0
|
|
572
787
|
|
|
573
788
|
all_sq_dist = self.model.distances_to_xopt()[:self.model.npt()]
|
|
574
789
|
closest_points = np.argsort(all_sq_dist)
|
|
@@ -600,7 +815,7 @@ class Controller(object):
|
|
|
600
815
|
for i in range(num_pts_to_add):
|
|
601
816
|
xnew = self.model.xopt() + dirns[i, :] # always base move around best value so far
|
|
602
817
|
x = self.model.as_absolute_coordinates(xnew)
|
|
603
|
-
rvec_list,
|
|
818
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
604
819
|
|
|
605
820
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
606
821
|
if exit_info is not None:
|
|
@@ -615,7 +830,7 @@ class Controller(object):
|
|
|
615
830
|
self.model.add_new_sample(self.model.npt() - 1, rvec_extra=rvec_list[i, :])
|
|
616
831
|
|
|
617
832
|
if self.do_logging:
|
|
618
|
-
|
|
833
|
+
module_logger.info("Soft restart: added %g new directions, npt is now %g" % (num_pts_to_add, self.model.npt()))
|
|
619
834
|
|
|
620
835
|
# Otherwise, we are doing a restart
|
|
621
836
|
self.last_successful_iter = 0
|
|
@@ -647,11 +862,11 @@ class Controller(object):
|
|
|
647
862
|
add_noise = params("noise.scale_factor_for_quit") * params("noise.additive_noise_level")
|
|
648
863
|
for k in range(self.model.npt()):
|
|
649
864
|
all_fvals_within_noise = all_fvals_within_noise and \
|
|
650
|
-
(self.model.
|
|
865
|
+
(self.model.objval[k] <= self.model.objopt() + add_noise / sqrt(self.model.nsamples[k]))
|
|
651
866
|
else: # noise_level_multiplicative
|
|
652
867
|
ratio = 1.0 + params("noise.scale_factor_for_quit") * params("noise.multiplicative_noise_level")
|
|
653
868
|
for k in range(self.model.npt()):
|
|
654
|
-
this_ratio = self.model.
|
|
869
|
+
this_ratio = self.model.objval[k] / self.model.objopt() # fval_opt strictly positive (would have quit o/w)
|
|
655
870
|
all_fvals_within_noise = all_fvals_within_noise and (
|
|
656
871
|
this_ratio <= ratio / sqrt(self.model.nsamples[k]))
|
|
657
872
|
return all_fvals_within_noise
|
|
@@ -680,7 +895,7 @@ class Controller(object):
|
|
|
680
895
|
dirns[i, :] = -dirns[i, :]
|
|
681
896
|
xnew = np.maximum(np.minimum(self.model.xopt() + dirns[i, :], self.model.su), self.model.sl)
|
|
682
897
|
x = self.model.as_absolute_coordinates(xnew)
|
|
683
|
-
rvec_list,
|
|
898
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
684
899
|
|
|
685
900
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
686
901
|
if exit_info is not None:
|
dfols/hessian.py
CHANGED
|
@@ -39,7 +39,7 @@ class Hessian(object):
|
|
|
39
39
|
def __init__(self, n, vals=None):
|
|
40
40
|
self.n = n
|
|
41
41
|
if vals is None:
|
|
42
|
-
self.hq = np.zeros((n * (n + 1) // 2,), dtype=
|
|
42
|
+
self.hq = np.zeros((n * (n + 1) // 2,), dtype=float)
|
|
43
43
|
else:
|
|
44
44
|
assert isinstance(vals, np.ndarray), "Can only set Hessian from NumPy array"
|
|
45
45
|
assert len(vals.shape) in [1, 2], "Can only set Hessian from vector or matrix"
|