DFO-LS 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of DFO-LS might be problematic. Click here for more details.
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/METADATA +14 -34
- DFO_LS-1.5.1.dist-info/RECORD +14 -0
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/WHEEL +1 -1
- dfols/__init__.py +1 -1
- dfols/controller.py +236 -113
- dfols/model.py +61 -33
- dfols/params.py +18 -2
- dfols/solver.py +95 -61
- dfols/trust_region.py +86 -7
- dfols/util.py +20 -9
- DFO_LS-1.4.1.dist-info/RECORD +0 -14
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/LICENSE.txt +0 -0
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/top_level.txt +0 -0
dfols/controller.py
CHANGED
|
@@ -100,14 +100,19 @@ class ExitInformation(object):
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
class Controller(object):
|
|
103
|
-
def __init__(self, objfun,
|
|
104
|
-
scaling_changes, do_logging):
|
|
103
|
+
def __init__(self, objfun, argsf, x0, r0, r0_nsamples, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun, params,
|
|
104
|
+
scaling_changes, do_logging, h=None, lh=None, argsh = (), prox_uh=None, argsprox = ()):
|
|
105
105
|
self.do_logging = do_logging
|
|
106
106
|
self.objfun = objfun
|
|
107
|
-
self.
|
|
107
|
+
self.h = h
|
|
108
|
+
self.argsf = argsf
|
|
109
|
+
self.argsh = argsh
|
|
110
|
+
self.lh = lh
|
|
111
|
+
self.prox_uh = prox_uh #TODO: add instruction for prox_uh
|
|
112
|
+
self.argsprox = argsprox
|
|
108
113
|
self.maxfun = maxfun
|
|
109
|
-
self.model = Model(npt, x0, r0, xl, xu, projections, r0_nsamples, precondition=params("interpolation.precondition"),
|
|
110
|
-
abs_tol = params("model.abs_tol"), rel_tol = params("model.rel_tol"), do_logging=do_logging)
|
|
114
|
+
self.model = Model(npt, x0, r0, xl, xu, projections, r0_nsamples, h=self.h, argsh = argsh, precondition=params("interpolation.precondition"),
|
|
115
|
+
abs_tol = params("model.abs_tol"), rel_tol = params("model.rel_tol"), do_logging=do_logging, scaling_changes=scaling_changes)
|
|
111
116
|
self.nf = nf
|
|
112
117
|
self.nx = nx
|
|
113
118
|
self.rhobeg = rhobeg
|
|
@@ -230,17 +235,17 @@ class Controller(object):
|
|
|
230
235
|
for k in range(0,self.n()):
|
|
231
236
|
# Evaluate objective at this new point
|
|
232
237
|
x = self.model.as_absolute_coordinates(D[k, :])
|
|
233
|
-
rvec_list,
|
|
238
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
234
239
|
|
|
235
240
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
236
241
|
if exit_info is not None:
|
|
237
242
|
if num_samples_run > 0:
|
|
238
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
243
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
239
244
|
x_in_abs_coords=True)
|
|
240
245
|
return exit_info # return & quit
|
|
241
246
|
|
|
242
247
|
# Otherwise, add new results (increments model.npt_so_far)
|
|
243
|
-
self.model.change_point(k+1, x - self.model.xbase, rvec_list[0, :]) # expect step, not absolute x
|
|
248
|
+
self.model.change_point(k+1, x - self.model.xbase, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
244
249
|
for i in range(1, num_samples_run):
|
|
245
250
|
self.model.add_new_sample(k+1, rvec_extra=rvec_list[i, :])
|
|
246
251
|
|
|
@@ -248,69 +253,101 @@ class Controller(object):
|
|
|
248
253
|
|
|
249
254
|
at_lower_boundary = (self.model.sl > -0.01 * self.delta) # sl = xl - x0, should be -ve, actually < -rhobeg
|
|
250
255
|
at_upper_boundary = (self.model.su < 0.01 * self.delta) # su = xu - x0, should be +ve, actually > rhobeg
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
#
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
256
|
+
|
|
257
|
+
if params("init.run_in_parallel") and num_directions <= self.n():
|
|
258
|
+
# Can do all the evaluation in parallel if <= n+1 interpolation points, but if larger
|
|
259
|
+
# then the step depends on the function value at previous steps and does point swapping
|
|
260
|
+
xpts_added = np.zeros((num_directions + 1, self.n()))
|
|
261
|
+
eval_obj_results = []
|
|
262
|
+
for k in range(1, num_directions + 1): # k = 1, ..., num_directions
|
|
263
|
+
# always have k = 1, ..., n since num_directions <= n
|
|
258
264
|
dirn = k - 1 # direction to move in (0,...,n-1)
|
|
259
265
|
stepa = self.delta if not at_upper_boundary[dirn] else -self.delta # take a +delta step if at lower, -delta if at upper
|
|
260
266
|
stepb = None
|
|
261
267
|
xpts_added[k, dirn] = stepa # set new (relative) point to the step since we haven't done any moving, so relative point is all zeros.
|
|
268
|
+
|
|
269
|
+
# Evaluate objective at this new point
|
|
270
|
+
x = self.model.as_absolute_coordinates(xpts_added[k, :])
|
|
271
|
+
eval_obj_results.append(self.evaluate_objective(x, number_of_samples, params))
|
|
272
|
+
|
|
273
|
+
# Evaluations done, now add to the model
|
|
274
|
+
for k in range(1, num_directions + 1):
|
|
275
|
+
x = self.model.as_absolute_coordinates(xpts_added[k, :])
|
|
276
|
+
rvec_list, obj_list, num_samples_run, exit_info = eval_obj_results[k-1]
|
|
277
|
+
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
278
|
+
if exit_info is not None:
|
|
279
|
+
if num_samples_run > 0:
|
|
280
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
281
|
+
x_in_abs_coords=True)
|
|
282
|
+
return exit_info # return & quit
|
|
262
283
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
284
|
+
# Otherwise, add new results (increments model.npt_so_far)
|
|
285
|
+
self.model.change_point(k, x - self.model.xbase, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
286
|
+
for i in range(1, num_samples_run):
|
|
287
|
+
self.model.add_new_sample(k, rvec_extra=rvec_list[i, :])
|
|
288
|
+
else:
|
|
289
|
+
xpts_added = np.zeros((num_directions + 1, self.n()))
|
|
290
|
+
for k in range(1, num_directions + 1):
|
|
291
|
+
# k = 0 --> base point (xpt = 0) [ not here]
|
|
292
|
+
# k = 1, ..., 2n --> coordinate directions [1,...,n and n+1,...,2n]
|
|
293
|
+
# k = 2n+1, ..., (n+1)(n+2)/2 --> off-diagonal directions
|
|
294
|
+
if 1 <= k < self.n() + 1: # first step along coord directions
|
|
295
|
+
dirn = k - 1 # direction to move in (0,...,n-1)
|
|
296
|
+
stepa = self.delta if not at_upper_boundary[dirn] else -self.delta # take a +delta step if at lower, -delta if at upper
|
|
297
|
+
stepb = None
|
|
298
|
+
xpts_added[k, dirn] = stepa # set new (relative) point to the step since we haven't done any moving, so relative point is all zeros.
|
|
299
|
+
|
|
300
|
+
elif self.n() + 1 <= k < 2 * self.n() + 1: # second step along coord directions
|
|
301
|
+
dirn = k - self.n() - 1 # direction to move in (0,...,n-1)
|
|
302
|
+
stepa = xpts_added[k - self.n(), dirn] # previous step
|
|
303
|
+
stepb = -self.delta # new step
|
|
304
|
+
if at_lower_boundary[dirn]:
|
|
305
|
+
# if at lower boundary, set the second step to be +ve
|
|
306
|
+
stepb = min(2.0 * self.delta, self.model.su[dirn]) # su = xu - x0, should be +ve
|
|
307
|
+
if at_upper_boundary[dirn]:
|
|
308
|
+
# if at upper boundary, set the second step to be -ve
|
|
309
|
+
stepb = max(-2.0 * self.delta, self.model.sl[dirn]) # sl = xl - x0, should be -ve
|
|
310
|
+
xpts_added[k, dirn] = stepb
|
|
311
|
+
|
|
312
|
+
else: # k = 2n+1, ..., (n+1)(n+2)/2
|
|
313
|
+
# p = (k - 1) % n + 1 # cycles through (1,...,n), starting at 2n+1 --> 1
|
|
314
|
+
# l = (k - 2 * n - 1) / n + 1 # (1,...,1, 2, ..., 2, etc.) where each number appears n times
|
|
315
|
+
# q = (p + l if p + l <= n else p + l - n)
|
|
316
|
+
stepa = None
|
|
317
|
+
stepb = None
|
|
318
|
+
itemp = (k - self.n() - 1) // self.n()
|
|
319
|
+
q = k - itemp * self.n() - self.n()
|
|
320
|
+
p = q + itemp
|
|
321
|
+
if p > self.n():
|
|
322
|
+
p, q = q, p - self.n() # does swap correctly in Python
|
|
323
|
+
|
|
324
|
+
xpts_added[k, p - 1] = xpts_added[p, p - 1]
|
|
325
|
+
xpts_added[k, q - 1] = xpts_added[q, q - 1]
|
|
289
326
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
327
|
+
# Evaluate objective at this new point
|
|
328
|
+
x = self.model.as_absolute_coordinates(xpts_added[k, :])
|
|
329
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
293
330
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
331
|
+
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
332
|
+
if exit_info is not None:
|
|
333
|
+
if num_samples_run > 0:
|
|
334
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
335
|
+
x_in_abs_coords=True)
|
|
336
|
+
return exit_info # return & quit
|
|
300
337
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
338
|
+
# Otherwise, add new results (increments model.npt_so_far)
|
|
339
|
+
self.model.change_point(k, x - self.model.xbase, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
340
|
+
for i in range(1, num_samples_run):
|
|
341
|
+
self.model.add_new_sample(k, rvec_extra=rvec_list[i, :])
|
|
305
342
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
343
|
+
# If k exceeds N+1, then the positions of the k-th and (k-N)-th interpolation
|
|
344
|
+
# points may be switched, in order that the function value at the first of them
|
|
345
|
+
# contributes to the off-diagonal second derivative terms of the initial quadratic model.
|
|
346
|
+
# Note: this works because the steps for (k) and (k-n) points were in the same coordinate direction
|
|
347
|
+
if self.n() + 1 <= k < 2 * self.n() + 1:
|
|
348
|
+
# Only swap if steps were in different directions AND new pt has lower objective
|
|
349
|
+
if stepa * stepb < 0.0 and self.model.objval[k] < self.model.objval[k - self.n()]:
|
|
350
|
+
xpts_added[[k, k-self.n()]] = xpts_added[[k-self.n(), k]]
|
|
314
351
|
|
|
315
352
|
return None # return & continue
|
|
316
353
|
|
|
@@ -342,17 +379,17 @@ class Controller(object):
|
|
|
342
379
|
for ndirns in range(num_directions):
|
|
343
380
|
new_point = xopt + dirns[ndirns, :] # alway base move around best value so far
|
|
344
381
|
x = self.model.as_absolute_coordinates(new_point)
|
|
345
|
-
rvec_list,
|
|
382
|
+
rvec_list, obj_list, num_samples_run, exit_info = eval_obj_results[ndirns]
|
|
346
383
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
347
384
|
if exit_info is not None:
|
|
348
385
|
if num_samples_run > 0:
|
|
349
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
386
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
350
387
|
x_in_abs_coords=True)
|
|
351
388
|
return exit_info # return & quit
|
|
352
389
|
|
|
353
390
|
# Otherwise, add new results (increments model.npt_so_far)
|
|
354
391
|
self.model.change_point(1 + ndirns, x - self.model.xbase,
|
|
355
|
-
rvec_list[0, :]) # expect step, not absolute x
|
|
392
|
+
rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
356
393
|
for i in range(1, num_samples_run):
|
|
357
394
|
self.model.add_new_sample(1 + ndirns, rvec_extra=rvec_list[i, :])
|
|
358
395
|
else:
|
|
@@ -361,17 +398,17 @@ class Controller(object):
|
|
|
361
398
|
|
|
362
399
|
# Evaluate objective
|
|
363
400
|
x = self.model.as_absolute_coordinates(new_point)
|
|
364
|
-
rvec_list,
|
|
401
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
365
402
|
|
|
366
403
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
367
404
|
if exit_info is not None:
|
|
368
405
|
if num_samples_run > 0:
|
|
369
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
406
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
370
407
|
x_in_abs_coords=True)
|
|
371
408
|
return exit_info # return & quit
|
|
372
409
|
|
|
373
410
|
# Otherwise, add new results (increments model.npt_so_far)
|
|
374
|
-
self.model.change_point(1 + ndirns, x - self.model.xbase, rvec_list[0, :]) # expect step, not absolute x
|
|
411
|
+
self.model.change_point(1 + ndirns, x - self.model.xbase, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
375
412
|
for i in range(1, num_samples_run):
|
|
376
413
|
self.model.add_new_sample(1 + ndirns, rvec_extra=rvec_list[i, :])
|
|
377
414
|
|
|
@@ -398,12 +435,12 @@ class Controller(object):
|
|
|
398
435
|
for j in range(num_steps):
|
|
399
436
|
xnew = self.model.xopt() + (step_length / LA.norm(dirns[j, :])) * dirns[j, :]
|
|
400
437
|
x = self.model.as_absolute_coordinates(xnew)
|
|
401
|
-
rvec_list,
|
|
438
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
402
439
|
|
|
403
440
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
404
441
|
if exit_info is not None:
|
|
405
442
|
if num_samples_run > 0:
|
|
406
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
443
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
407
444
|
x_in_abs_coords=True)
|
|
408
445
|
return exit_info # return & quit
|
|
409
446
|
|
|
@@ -417,7 +454,7 @@ class Controller(object):
|
|
|
417
454
|
return exit_info # return & quit
|
|
418
455
|
|
|
419
456
|
# Otherwise, add new results
|
|
420
|
-
self.model.change_point(kmin, xnew, rvec_list[0, :]) # expect step, not absolute x
|
|
457
|
+
self.model.change_point(kmin, xnew, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
421
458
|
for i in range(1, num_samples_run):
|
|
422
459
|
self.model.add_new_sample(kmin, rvec_extra=rvec_list[i, :])
|
|
423
460
|
|
|
@@ -436,13 +473,85 @@ class Controller(object):
|
|
|
436
473
|
|
|
437
474
|
return dirn * (step_length / LA.norm(dirn))
|
|
438
475
|
|
|
439
|
-
def
|
|
440
|
-
#
|
|
476
|
+
def evaluate_criticality_measure(self, params):
|
|
477
|
+
# Calculate criticality measure for regularized problems (h is not None)
|
|
478
|
+
|
|
479
|
+
# Build model for full least squares function
|
|
441
480
|
gopt, H = self.model.build_full_model()
|
|
481
|
+
|
|
482
|
+
if np.any(np.isnan(gopt)) or np.any(np.isnan(H)) or not np.all(np.isfinite(gopt)) or not np.all(np.isfinite(H)):
|
|
483
|
+
module_logger.debug("nan/inf values in gopt and/or H, skipping ctrsbox_sfista (criticality measure calc)")
|
|
484
|
+
# d = np.zeros(gopt.shape)
|
|
485
|
+
# gnew = gopt.copy()
|
|
486
|
+
# crvmin = -1
|
|
487
|
+
return np.inf
|
|
488
|
+
|
|
489
|
+
# NOTE: smaller params here to get more iterations in S-FISTA
|
|
490
|
+
func_tol = params("func_tol.criticality_measure") * self.delta
|
|
442
491
|
if self.model.projections:
|
|
443
|
-
d, gnew, crvmin =
|
|
492
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), self.model.projections, 1,
|
|
493
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
494
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
495
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
496
|
+
else:
|
|
497
|
+
proj = lambda x: pbox(x, self.model.sl, self.model.su)
|
|
498
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, np.zeros(H.shape), [proj], 1,
|
|
499
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
500
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
501
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
502
|
+
|
|
503
|
+
# Calculate criticality measure
|
|
504
|
+
criticality_measure = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, np.zeros(H.shape), d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
|
|
505
|
+
return criticality_measure
|
|
506
|
+
|
|
507
|
+
def trust_region_step(self, params, criticality_measure=1e-2):
|
|
508
|
+
# Build model for full least squares function
|
|
509
|
+
gopt, H = self.model.build_full_model()
|
|
510
|
+
# Build func_tol for trust region step
|
|
511
|
+
# QUESTION: c1 = min{1, 1/delta_max^2}, but choose c1=1here; choose maxhessian = max(||H||_2,1)
|
|
512
|
+
# QUESTION: when criticality_measure = 0? choose max(criticality_measure,1)
|
|
513
|
+
func_tol = (1-params("func_tol.tr_step")) * 1 * max(criticality_measure,1) * min(self.delta, max(criticality_measure,1) / max(np.linalg.norm(H, 2),1))
|
|
514
|
+
|
|
515
|
+
if self.h is None:
|
|
516
|
+
if self.model.projections:
|
|
517
|
+
# Running PGD/SFISTA is generally slower than trsbox, so don't do this if gopt or H have bad values
|
|
518
|
+
# (this will ultimately lead to a manual setting of d=0 and calling a safety step anyway)
|
|
519
|
+
if np.any(np.isnan(gopt)) or np.any(np.isnan(H)) or not np.all(np.isfinite(gopt)) or not np.all(np.isfinite(H)):
|
|
520
|
+
module_logger.debug("nan/inf values in gopt and/or H, skipping ctrsbox_pgd")
|
|
521
|
+
d = np.zeros(gopt.shape)
|
|
522
|
+
gnew = gopt.copy()
|
|
523
|
+
crvmin = -1
|
|
524
|
+
else:
|
|
525
|
+
d, gnew, crvmin = ctrsbox_pgd(self.model.xopt(abs_coordinates=True), gopt, H, self.model.projections, self.delta, d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"))
|
|
526
|
+
else:
|
|
527
|
+
d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
|
|
444
528
|
else:
|
|
445
|
-
|
|
529
|
+
# Running PGD/SFISTA is generally slower than trsbox, so don't do this if gopt or H have bad values
|
|
530
|
+
# (this will ultimately lead to a manual setting of d=0 and calling a safety step anyway)
|
|
531
|
+
if np.any(np.isnan(gopt)) or np.any(np.isnan(H)) or not np.all(np.isfinite(gopt)) or not np.all(np.isfinite(H)):
|
|
532
|
+
module_logger.debug("nan/inf values in gopt and/or H, skipping ctrsbox_sfista")
|
|
533
|
+
d = np.zeros(gopt.shape)
|
|
534
|
+
gnew = gopt.copy()
|
|
535
|
+
crvmin = -1
|
|
536
|
+
elif self.model.projections:
|
|
537
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, H, self.model.projections, self.delta,
|
|
538
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
539
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
540
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
541
|
+
else:
|
|
542
|
+
# NOTE: alternative way if using trsbox
|
|
543
|
+
# d, gnew, crvmin = trsbox(self.model.xopt(), gopt, H, self.model.sl, self.model.su, self.delta)
|
|
544
|
+
proj = lambda x: pbox(x, self.model.sl, self.model.su)
|
|
545
|
+
d, gnew, crvmin = ctrsbox_sfista(self.model.xopt(abs_coordinates=True), gopt, H, [proj], self.delta,
|
|
546
|
+
self.h, self.lh, self.prox_uh, argsh = self.argsh, argsprox=self.argsprox, func_tol=func_tol,
|
|
547
|
+
max_iters=params("func_tol.max_iters"), d_max_iters=params("dykstra.max_iters"), d_tol=params("dykstra.d_tol"),
|
|
548
|
+
scaling_changes=self.scaling_changes, sfista_iters_scale=params("sfista.max_iters_scaling"))
|
|
549
|
+
|
|
550
|
+
# NOTE: check sufficient decrease. If increase in the model, set zero step
|
|
551
|
+
pred_reduction = self.h(remove_scaling(self.model.xopt(abs_coordinates=True), self.scaling_changes), *self.argsh) - model_value(gopt, H, d, self.model.xopt(abs_coordinates=True), self.h, self.argsh, self.scaling_changes)
|
|
552
|
+
if pred_reduction < 0.0:
|
|
553
|
+
d = np.zeros(d.shape)
|
|
554
|
+
|
|
446
555
|
return d, gopt, H, gnew, crvmin
|
|
447
556
|
|
|
448
557
|
def geometry_step(self, knew, adelt, number_of_samples, params):
|
|
@@ -463,29 +572,32 @@ class Controller(object):
|
|
|
463
572
|
return exit_info # didn't fix geometry - return & quit
|
|
464
573
|
|
|
465
574
|
gopt, H = self.model.build_full_model() # save here, to calculate predicted value from geometry step
|
|
466
|
-
|
|
575
|
+
objopt = self.model.objopt() # again, evaluate now, before model.change_point()
|
|
467
576
|
d = xnew - self.model.xopt()
|
|
468
577
|
x = self.model.as_absolute_coordinates(xnew)
|
|
469
|
-
rvec_list,
|
|
578
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
470
579
|
|
|
471
580
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
472
581
|
if exit_info is not None:
|
|
473
582
|
if num_samples_run > 0:
|
|
474
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
583
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
475
584
|
x_in_abs_coords=True)
|
|
476
585
|
return exit_info # didn't fix geometry - return & quit
|
|
477
586
|
|
|
478
587
|
# Otherwise, add new results
|
|
479
|
-
self.model.change_point(knew, xnew, rvec_list[0, :]) # expect step, not absolute x
|
|
588
|
+
self.model.change_point(knew, xnew, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
480
589
|
for i in range(1, num_samples_run):
|
|
481
590
|
self.model.add_new_sample(knew, rvec_extra=rvec_list[i, :])
|
|
482
591
|
|
|
483
592
|
# Estimate actual reduction to add to diffs vector
|
|
484
|
-
|
|
485
|
-
|
|
593
|
+
obj = sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) # estimate actual objective value
|
|
486
594
|
# pred_reduction = - calculate_model_value(gopt, H, d)
|
|
487
595
|
pred_reduction = - model_value(gopt, H, d)
|
|
488
|
-
|
|
596
|
+
if self.h is not None:
|
|
597
|
+
obj += self.h(remove_scaling(x, self.scaling_changes), *self.argsh)
|
|
598
|
+
# since m(0) = h(x)
|
|
599
|
+
pred_reduction = self.h(remove_scaling(x, self.scaling_changes), *self.argsh) - model_value(gopt, H, d, x, self.h, self.argsh, self.scaling_changes)
|
|
600
|
+
actual_reduction = objopt - obj
|
|
489
601
|
self.diffs = [abs(pred_reduction - actual_reduction), self.diffs[0], self.diffs[1]]
|
|
490
602
|
return None # exit_info = None
|
|
491
603
|
|
|
@@ -513,7 +625,7 @@ class Controller(object):
|
|
|
513
625
|
def evaluate_objective(self, x, number_of_samples, params):
|
|
514
626
|
# Sample from objective function several times, keeping track of maxfun and min_obj_value throughout
|
|
515
627
|
rvec_list = np.zeros((number_of_samples, self.m()))
|
|
516
|
-
|
|
628
|
+
obj_list = np.zeros((number_of_samples,))
|
|
517
629
|
num_samples_run = 0
|
|
518
630
|
incremented_nx = False
|
|
519
631
|
exit_info = None
|
|
@@ -527,19 +639,24 @@ class Controller(object):
|
|
|
527
639
|
if not incremented_nx:
|
|
528
640
|
self.nx += 1
|
|
529
641
|
incremented_nx = True
|
|
530
|
-
rvec_list[i, :],
|
|
531
|
-
|
|
642
|
+
rvec_list[i, :], obj_list[i] = eval_least_squares_with_regularisation(self.objfun, remove_scaling(x, self.scaling_changes), self.h,
|
|
643
|
+
argsf=self.argsf, argsh=self.argsh, verbose=self.do_logging, eval_num=self.nf, pt_num=self.nx,
|
|
532
644
|
full_x_thresh=params("logging.n_to_print_whole_x_vector"),
|
|
533
|
-
check_for_overflow=params("general.check_objfun_for_overflow")
|
|
534
|
-
verbose=self.do_logging)
|
|
645
|
+
check_for_overflow=params("general.check_objfun_for_overflow"))
|
|
535
646
|
num_samples_run += 1
|
|
536
647
|
|
|
537
648
|
# Check if the average value was below our threshold
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
649
|
+
# QUESTION: how to choose x in h when using averaged values
|
|
650
|
+
if self.h is None:
|
|
651
|
+
if num_samples_run > 0 and \
|
|
652
|
+
sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) <= self.model.min_objective_value():
|
|
653
|
+
exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
|
|
654
|
+
else:
|
|
655
|
+
if num_samples_run > 0 and \
|
|
656
|
+
sumsq(np.mean(rvec_list[:num_samples_run, :], axis=0)) + self.h(remove_scaling(x, self.scaling_changes),*self.argsh) <= self.model.min_objective_value():
|
|
657
|
+
exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
|
|
541
658
|
|
|
542
|
-
return rvec_list,
|
|
659
|
+
return rvec_list, obj_list, num_samples_run, exit_info
|
|
543
660
|
|
|
544
661
|
def choose_point_to_replace(self, d, skip_kopt=True):
|
|
545
662
|
delsq = self.delta ** 2
|
|
@@ -615,11 +732,18 @@ class Controller(object):
|
|
|
615
732
|
self.last_successful_iter = current_iter # reset successful iteration check
|
|
616
733
|
return
|
|
617
734
|
|
|
618
|
-
def calculate_ratio(self, current_iter, rvec_list, d, gopt, H):
|
|
735
|
+
def calculate_ratio(self, x, current_iter, rvec_list, d, gopt, H):
|
|
619
736
|
exit_info = None
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
737
|
+
# estimate actual objective value
|
|
738
|
+
obj = sumsq(np.mean(rvec_list, axis=0))
|
|
739
|
+
# pred_reduction = - calculate_model_value(gopt, H, d)
|
|
740
|
+
pred_reduction = - model_value(gopt, H, d)
|
|
741
|
+
if self.h is not None:
|
|
742
|
+
# QUESTION: x+d here correct? rvec_list takes mean value
|
|
743
|
+
obj += self.h(remove_scaling(x+d, self.scaling_changes), *self.argsh)
|
|
744
|
+
# since m(0) = h(x)
|
|
745
|
+
pred_reduction = self.h(remove_scaling(x, self.scaling_changes), *self.argsh) - model_value(gopt, H, d, x, self.h, self.argsh, self.scaling_changes)
|
|
746
|
+
actual_reduction = self.model.objopt() - obj
|
|
623
747
|
self.diffs = [abs(actual_reduction - pred_reduction), self.diffs[0], self.diffs[1]]
|
|
624
748
|
if min(sqrt(sumsq(d)), self.delta) > self.rho: # if ||d|| >= rho, successful!
|
|
625
749
|
self.last_successful_iter = current_iter
|
|
@@ -627,8 +751,7 @@ class Controller(object):
|
|
|
627
751
|
if len(self.model.projections) > 1: # if we are using multiple projections, only warn since likely due to constraint intersection
|
|
628
752
|
exit_info = ExitInformation(EXIT_TR_INCREASE_WARNING, "Either multiple constraints are active or trust region step gave model increase")
|
|
629
753
|
else:
|
|
630
|
-
exit_info = ExitInformation(EXIT_TR_INCREASE_ERROR, "
|
|
631
|
-
|
|
754
|
+
exit_info = ExitInformation(EXIT_TR_INCREASE_ERROR, "Trust region step gave model increase")
|
|
632
755
|
ratio = actual_reduction / pred_reduction
|
|
633
756
|
return ratio, exit_info
|
|
634
757
|
|
|
@@ -636,13 +759,13 @@ class Controller(object):
|
|
|
636
759
|
if len(self.last_iters_step_taken) <= params("slow.history_for_slow"):
|
|
637
760
|
# Not enough info, simply append
|
|
638
761
|
self.last_iters_step_taken.append(current_iter)
|
|
639
|
-
self.last_fopts_step_taken.append(self.model.
|
|
762
|
+
self.last_fopts_step_taken.append(self.model.objopt())
|
|
640
763
|
this_iter_slow = False
|
|
641
764
|
else:
|
|
642
765
|
# Enough info - shift values
|
|
643
766
|
self.last_iters_step_taken = self.last_iters_step_taken[1:] + [current_iter]
|
|
644
|
-
self.last_fopts_step_taken = self.last_fopts_step_taken[1:] + [self.model.
|
|
645
|
-
this_iter_slow = (log(self.last_fopts_step_taken[0]) - log(self.model.
|
|
767
|
+
self.last_fopts_step_taken = self.last_fopts_step_taken[1:] + [self.model.objopt()]
|
|
768
|
+
this_iter_slow = (log(self.last_fopts_step_taken[0]) - log(self.model.objopt())) / \
|
|
646
769
|
float(params("slow.history_for_slow")) < params("slow.thresh_for_slow")
|
|
647
770
|
# Update counter of number of slow iterations
|
|
648
771
|
if this_iter_slow:
|
|
@@ -659,9 +782,9 @@ class Controller(object):
|
|
|
659
782
|
def soft_restart(self, number_of_samples, nruns_so_far, params, x_in_abs_coords_to_save=None, rvec_to_save=None,
|
|
660
783
|
nsamples_to_save=None):
|
|
661
784
|
# A successful run is one where we reduced fopt
|
|
662
|
-
if self.model.
|
|
785
|
+
if self.model.objopt() < self.last_run_fopt:
|
|
663
786
|
self.last_successful_run = nruns_so_far
|
|
664
|
-
self.last_run_fopt = self.model.
|
|
787
|
+
self.last_run_fopt = self.model.objopt()
|
|
665
788
|
|
|
666
789
|
ok_to_do_restart = (nruns_so_far - self.last_successful_run < params("restarts.max_unsuccessful_restarts")) and \
|
|
667
790
|
(self.nf < self.maxfun)
|
|
@@ -677,12 +800,12 @@ class Controller(object):
|
|
|
677
800
|
if x_in_abs_coords_to_save is not None:
|
|
678
801
|
assert rvec_to_save is not None, "Soft restart: specified x_to_save but not rvec_to_save"
|
|
679
802
|
assert nsamples_to_save is not None, "Soft restart: specified x_to_save but not nsamples_to_save"
|
|
680
|
-
self.model.save_point(x_in_abs_coords_to_save, rvec_to_save, nsamples_to_save, x_in_abs_coords=True)
|
|
681
|
-
self.model.save_point(self.model.xopt(abs_coordinates=True), self.model.ropt(),
|
|
803
|
+
self.model.save_point(x_in_abs_coords_to_save, rvec_to_save, nsamples_to_save, self.nx, x_in_abs_coords=True)
|
|
804
|
+
self.model.save_point(self.model.xopt(abs_coordinates=True), self.model.ropt(), self.nx,
|
|
682
805
|
self.model.nsamples[self.model.kopt], x_in_abs_coords=True)
|
|
683
806
|
|
|
684
807
|
if self.do_logging:
|
|
685
|
-
module_logger.info("Soft restart [currently, f = %g after %g function evals]" % (self.model.
|
|
808
|
+
module_logger.info("Soft restart [currently, f = %g after %g function evals]" % (self.model.objopt(), self.nf))
|
|
686
809
|
# Resetting method: reset delta and rho, then move the closest 'num_steps' points to xk to improve geometry
|
|
687
810
|
# Note: closest points because we are suddenly increasing delta & rho, so we want to encourage spreading out points
|
|
688
811
|
self.delta = self.rhobeg
|
|
@@ -724,17 +847,17 @@ class Controller(object):
|
|
|
724
847
|
for i in range(num_pts_to_add):
|
|
725
848
|
xnew = self.model.xopt() + dirns[i, :] # always base move around best value so far
|
|
726
849
|
x = self.model.as_absolute_coordinates(xnew)
|
|
727
|
-
rvec_list,
|
|
850
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
728
851
|
|
|
729
852
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
730
853
|
if exit_info is not None:
|
|
731
854
|
if num_samples_run > 0:
|
|
732
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
855
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
733
856
|
x_in_abs_coords=True)
|
|
734
857
|
return exit_info # return & quit
|
|
735
858
|
|
|
736
859
|
# Otherwise, add new results
|
|
737
|
-
self.model.add_new_point(xnew, rvec_list[0, :]) # expect step, not absolute x
|
|
860
|
+
self.model.add_new_point(xnew, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
738
861
|
for i in range(1, num_samples_run):
|
|
739
862
|
self.model.add_new_sample(self.model.npt() - 1, rvec_extra=rvec_list[i, :])
|
|
740
863
|
|
|
@@ -771,11 +894,11 @@ class Controller(object):
|
|
|
771
894
|
add_noise = params("noise.scale_factor_for_quit") * params("noise.additive_noise_level")
|
|
772
895
|
for k in range(self.model.npt()):
|
|
773
896
|
all_fvals_within_noise = all_fvals_within_noise and \
|
|
774
|
-
(self.model.
|
|
897
|
+
(self.model.objval[k] <= self.model.objopt() + add_noise / sqrt(self.model.nsamples[k]))
|
|
775
898
|
else: # noise_level_multiplicative
|
|
776
899
|
ratio = 1.0 + params("noise.scale_factor_for_quit") * params("noise.multiplicative_noise_level")
|
|
777
900
|
for k in range(self.model.npt()):
|
|
778
|
-
this_ratio = self.model.
|
|
901
|
+
this_ratio = self.model.objval[k] / self.model.objopt() # fval_opt strictly positive (would have quit o/w)
|
|
779
902
|
all_fvals_within_noise = all_fvals_within_noise and (
|
|
780
903
|
this_ratio <= ratio / sqrt(self.model.nsamples[k]))
|
|
781
904
|
return all_fvals_within_noise
|
|
@@ -804,17 +927,17 @@ class Controller(object):
|
|
|
804
927
|
dirns[i, :] = -dirns[i, :]
|
|
805
928
|
xnew = np.maximum(np.minimum(self.model.xopt() + dirns[i, :], self.model.su), self.model.sl)
|
|
806
929
|
x = self.model.as_absolute_coordinates(xnew)
|
|
807
|
-
rvec_list,
|
|
930
|
+
rvec_list, obj_list, num_samples_run, exit_info = self.evaluate_objective(x, number_of_samples, params)
|
|
808
931
|
|
|
809
932
|
# Handle exit conditions (f < min obj value or maxfun reached)
|
|
810
933
|
if exit_info is not None:
|
|
811
934
|
if num_samples_run > 0:
|
|
812
|
-
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
935
|
+
self.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, self.nx,
|
|
813
936
|
x_in_abs_coords=True)
|
|
814
937
|
return exit_info # return & quit
|
|
815
938
|
|
|
816
939
|
# Otherwise, add new results
|
|
817
|
-
self.model.change_point(knew, xnew, rvec_list[0, :]) # expect step, not absolute x
|
|
940
|
+
self.model.change_point(knew, xnew, rvec_list[0, :], self.nx) # expect step, not absolute x
|
|
818
941
|
for i in range(1, num_samples_run):
|
|
819
942
|
self.model.add_new_sample(knew, rvec_extra=rvec_list[i, :])
|
|
820
943
|
return None
|