DFO-LS 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of DFO-LS might be problematic. Click here for more details.
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/METADATA +14 -34
- DFO_LS-1.5.1.dist-info/RECORD +14 -0
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/WHEEL +1 -1
- dfols/__init__.py +1 -1
- dfols/controller.py +236 -113
- dfols/model.py +61 -33
- dfols/params.py +18 -2
- dfols/solver.py +95 -61
- dfols/trust_region.py +86 -7
- dfols/util.py +20 -9
- DFO_LS-1.4.1.dist-info/RECORD +0 -14
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/LICENSE.txt +0 -0
- {DFO_LS-1.4.1.dist-info → DFO_LS-1.5.1.dist-info}/top_level.txt +0 -0
dfols/solver.py
CHANGED
|
@@ -48,10 +48,10 @@ module_logger = logging.getLogger(__name__)
|
|
|
48
48
|
|
|
49
49
|
# A container for the results of the optimization routine
|
|
50
50
|
class OptimResults(object):
|
|
51
|
-
def __init__(self, xmin, rmin,
|
|
51
|
+
def __init__(self, xmin, rmin, objmin, jacmin, nf, nx, nruns, exit_flag, exit_msg, xmin_eval_num, jacmin_eval_nums):
|
|
52
52
|
self.x = xmin
|
|
53
53
|
self.resid = rmin
|
|
54
|
-
self.
|
|
54
|
+
self.obj = objmin
|
|
55
55
|
self.jacobian = jacmin
|
|
56
56
|
self.nf = nf
|
|
57
57
|
self.nx = nx
|
|
@@ -59,6 +59,8 @@ class OptimResults(object):
|
|
|
59
59
|
self.flag = exit_flag
|
|
60
60
|
self.msg = exit_msg
|
|
61
61
|
self.diagnostic_info = None
|
|
62
|
+
self.xmin_eval_num = xmin_eval_num
|
|
63
|
+
self.jacmin_eval_nums = jacmin_eval_nums
|
|
62
64
|
# Set standard names for exit flags
|
|
63
65
|
self.EXIT_SLOW_WARNING = EXIT_SLOW_WARNING
|
|
64
66
|
self.EXIT_MAXFUN_WARNING = EXIT_MAXFUN_WARNING
|
|
@@ -77,7 +79,7 @@ class OptimResults(object):
|
|
|
77
79
|
output += "Residual vector = %s\n" % str(self.resid)
|
|
78
80
|
else:
|
|
79
81
|
output += "Not showing residual vector because it is too long; check self.resid\n"
|
|
80
|
-
output += "Objective value f(xmin) = %.10g\n" % self.
|
|
82
|
+
output += "Objective value f(xmin) = %.10g\n" % self.obj
|
|
81
83
|
output += "Needed %g objective evaluations (at %g points)\n" % (self.nf, self.nx)
|
|
82
84
|
if self.nruns > 1:
|
|
83
85
|
output += "Did a total of %g runs\n" % self.nruns
|
|
@@ -89,14 +91,17 @@ class OptimResults(object):
|
|
|
89
91
|
output += "Not showing approximate Jacobian because it is too long; check self.jacobian\n"
|
|
90
92
|
if self.diagnostic_info is not None:
|
|
91
93
|
output += "Diagnostic information available; check self.diagnostic_info\n"
|
|
94
|
+
output += "Solution xmin was evaluation point %g\n" % self.xmin_eval_num
|
|
95
|
+
if len(self.jacmin_eval_nums) < 100:
|
|
96
|
+
output += "Approximate Jacobian formed using evaluation points %s\n" % str(self.jacmin_eval_nums)
|
|
92
97
|
output += "Exit flag = %g\n" % self.flag
|
|
93
98
|
output += "%s\n" % self.msg
|
|
94
99
|
output += "****************************\n"
|
|
95
100
|
return output
|
|
96
101
|
|
|
97
102
|
|
|
98
|
-
def solve_main(objfun, x0,
|
|
99
|
-
diagnostic_info, scaling_changes, r0_avg_old=None, r0_nsamples_old=None, default_growing_method_set_by_user=None,
|
|
103
|
+
def solve_main(objfun, x0, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
|
|
104
|
+
diagnostic_info, scaling_changes, h=None, lh=None, argsh=(), prox_uh=None, argsprox=None, r0_avg_old=None, r0_nsamples_old=None, default_growing_method_set_by_user=None,
|
|
100
105
|
do_logging=True, print_progress=False):
|
|
101
106
|
# Evaluate at x0 (keep nf, nx correct and check for f < 1e-12)
|
|
102
107
|
# The hard bit is determining what m = len(r0) should be, and allocating memory appropriately
|
|
@@ -105,18 +110,17 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
105
110
|
# Evaluate the first time...
|
|
106
111
|
nf = nf_so_far + 1
|
|
107
112
|
nx = nx_so_far + 1
|
|
108
|
-
r0,
|
|
109
|
-
|
|
113
|
+
r0, obj0 = eval_least_squares_with_regularisation(objfun, remove_scaling(x0, scaling_changes), h,
|
|
114
|
+
argsf=argsf, argsh=argsh, verbose=do_logging, eval_num=nf, pt_num=nx,
|
|
110
115
|
full_x_thresh=params("logging.n_to_print_whole_x_vector"),
|
|
111
|
-
check_for_overflow=params("general.check_objfun_for_overflow")
|
|
112
|
-
verbose=do_logging)
|
|
116
|
+
check_for_overflow=params("general.check_objfun_for_overflow"))
|
|
113
117
|
m = len(r0)
|
|
114
118
|
|
|
115
119
|
# Now we have m, we can evaluate the rest of the times
|
|
116
120
|
rvec_list = np.zeros((number_of_samples, m))
|
|
117
|
-
|
|
121
|
+
obj_list = np.zeros((number_of_samples,))
|
|
118
122
|
rvec_list[0, :] = r0
|
|
119
|
-
|
|
123
|
+
obj_list[0] = obj0
|
|
120
124
|
num_samples_run = 1
|
|
121
125
|
exit_info = None
|
|
122
126
|
|
|
@@ -128,15 +132,20 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
128
132
|
|
|
129
133
|
nf += 1
|
|
130
134
|
# Don't increment nx for x0 - we did this earlier
|
|
131
|
-
rvec_list[i, :],
|
|
135
|
+
rvec_list[i, :], obj_list[i] = eval_least_squares_with_regularisation(objfun, remove_scaling(x0, scaling_changes), h,
|
|
136
|
+
argsf=argsf, argsh=argsh, verbose=do_logging, eval_num=nf, pt_num=nx,
|
|
132
137
|
full_x_thresh=params("logging.n_to_print_whole_x_vector"),
|
|
133
|
-
check_for_overflow=params("general.check_objfun_for_overflow")
|
|
134
|
-
verbose=do_logging)
|
|
138
|
+
check_for_overflow=params("general.check_objfun_for_overflow"))
|
|
135
139
|
num_samples_run += 1
|
|
136
140
|
|
|
137
141
|
r0_avg = np.mean(rvec_list[:num_samples_run, :], axis=0)
|
|
138
|
-
|
|
139
|
-
|
|
142
|
+
# NOTE: modify objvalue here
|
|
143
|
+
if h is None:
|
|
144
|
+
if sumsq(r0_avg) <= params("model.abs_tol"):
|
|
145
|
+
exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
|
|
146
|
+
else:
|
|
147
|
+
if sumsq(r0_avg) + h(remove_scaling(x0, scaling_changes), *argsh)<= params("model.abs_tol"):
|
|
148
|
+
exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
|
|
140
149
|
|
|
141
150
|
if exit_info is not None:
|
|
142
151
|
return x0, r0_avg, sumsq(r0_avg), None, num_samples_run, nf, nx, nruns_so_far+1, exit_info, diagnostic_info
|
|
@@ -162,8 +171,8 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
162
171
|
params('growing.delta_scale_new_dirns', new_value=0.1)
|
|
163
172
|
|
|
164
173
|
# Initialise controller
|
|
165
|
-
control = Controller(objfun,
|
|
166
|
-
params, scaling_changes, do_logging)
|
|
174
|
+
control = Controller(objfun, argsf, x0, r0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun,
|
|
175
|
+
params, scaling_changes, do_logging, h=h, lh=lh, argsh=argsh, prox_uh=prox_uh, argsprox=argsprox)
|
|
167
176
|
|
|
168
177
|
# Initialise interpolation set
|
|
169
178
|
number_of_samples = max(nsamples(control.delta, control.rho, 0, nruns_so_far), 1)
|
|
@@ -178,8 +187,8 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
178
187
|
module_logger.info("Initialising (coordinate directions)")
|
|
179
188
|
exit_info = control.initialise_coordinate_directions(number_of_samples, num_directions, params)
|
|
180
189
|
if exit_info is not None:
|
|
181
|
-
x, rvec,
|
|
182
|
-
return x, rvec,
|
|
190
|
+
x, rvec, obj, jacmin, nsamples, x_eval_num, jac_eval_nums = control.model.get_final_results()
|
|
191
|
+
return x, rvec, obj, None, nsamples, control.nf, control.nx, nruns_so_far + 1, exit_info, diagnostic_info, x_eval_num, jac_eval_nums
|
|
183
192
|
|
|
184
193
|
finished_growing = (control.model.npt() >= control.model.num_pts) # have we finished growing the initial set yet?
|
|
185
194
|
|
|
@@ -271,16 +280,30 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
271
280
|
nruns_so_far += 1
|
|
272
281
|
break # quit
|
|
273
282
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
283
|
+
tau = 1.0 # ratio used in the safety phase
|
|
284
|
+
if h is None:
|
|
285
|
+
# Trust region step
|
|
286
|
+
d, gopt, H, gnew, crvmin = control.trust_region_step(params)
|
|
287
|
+
else:
|
|
288
|
+
# Calculate criticality measure
|
|
289
|
+
criticality_measure = control.evaluate_criticality_measure(params)
|
|
290
|
+
# Trust region step
|
|
291
|
+
d, gopt, H, gnew, crvmin = control.trust_region_step(params, criticality_measure)
|
|
292
|
+
try:
|
|
293
|
+
tau = min(criticality_measure/(LA.norm(gopt)+lh), 1.0)
|
|
294
|
+
except ValueError:
|
|
295
|
+
# In some instances, gopt can have nan/inf values -- this ultimately calls a safety step and is generally fine
|
|
296
|
+
# but we need to set a value for tau nonetheless
|
|
297
|
+
tau = 1.0
|
|
298
|
+
|
|
277
299
|
if do_logging:
|
|
278
300
|
module_logger.debug("Trust region step is d = " + str(d))
|
|
301
|
+
|
|
279
302
|
xnew = control.model.xopt() + d
|
|
280
303
|
dnorm = min(LA.norm(d), control.delta)
|
|
281
304
|
|
|
282
305
|
if print_progress:
|
|
283
|
-
print("{:^5}{:^7}{:^10.2e}{:^10.2e}{:^10.2e}{:^10.2e}{:^7}".format(nruns_so_far+1, current_iter+1, control.model.
|
|
306
|
+
print("{:^5}{:^7}{:^10.2e}{:^10.2e}{:^10.2e}{:^10.2e}{:^7}".format(nruns_so_far+1, current_iter+1, control.model.objopt(), np.linalg.norm(gopt), control.delta, control.rho, control.nf))
|
|
284
307
|
|
|
285
308
|
if params("logging.save_diagnostic_info"):
|
|
286
309
|
diagnostic_info.save_info_from_control(control, nruns_so_far, current_iter,
|
|
@@ -289,7 +312,7 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
289
312
|
diagnostic_info.update_interpolation_information(interp_error, ls_interp_cond_num, linalg_resid,
|
|
290
313
|
sqrt(norm_J_error), LA.norm(gopt), LA.norm(d))
|
|
291
314
|
|
|
292
|
-
if dnorm < params("general.safety_step_thresh") * control.rho and not finished_growing and params("growing.safety.do_safety_step"):
|
|
315
|
+
if dnorm < tau * params("general.safety_step_thresh") * control.rho and not finished_growing and params("growing.safety.do_safety_step"):
|
|
293
316
|
if do_logging:
|
|
294
317
|
module_logger.debug("Safety step during growing phase")
|
|
295
318
|
|
|
@@ -415,10 +438,10 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
415
438
|
if do_logging:
|
|
416
439
|
module_logger.info("New rho = %g after %i function evaluations" % (control.rho, control.nf))
|
|
417
440
|
if control.n() < params("logging.n_to_print_whole_x_vector"):
|
|
418
|
-
module_logger.debug("Best so far: f = %.15g at x = " % (control.model.
|
|
441
|
+
module_logger.debug("Best so far: f = %.15g at x = " % (control.model.objopt())
|
|
419
442
|
+ str(control.model.xopt(abs_coordinates=True)))
|
|
420
443
|
else:
|
|
421
|
-
module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.
|
|
444
|
+
module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.objopt()))
|
|
422
445
|
continue # next iteration
|
|
423
446
|
else:
|
|
424
447
|
# Quit on rho=rhoend
|
|
@@ -439,13 +462,14 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
439
462
|
else:
|
|
440
463
|
# Cannot reduce rho, so check xnew and quit
|
|
441
464
|
x = control.model.as_absolute_coordinates(xnew)
|
|
465
|
+
##print("x from xnew", x)
|
|
442
466
|
number_of_samples = max(nsamples(control.delta, control.rho, current_iter, nruns_so_far), 1)
|
|
443
|
-
rvec_list,
|
|
467
|
+
rvec_list, obj_list, num_samples_run, exit_info = control.evaluate_objective(x, number_of_samples,
|
|
444
468
|
params)
|
|
445
469
|
|
|
446
470
|
if num_samples_run > 0:
|
|
447
471
|
control.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0),
|
|
448
|
-
num_samples_run, x_in_abs_coords=True)
|
|
472
|
+
num_samples_run, control.nx, x_in_abs_coords=True)
|
|
449
473
|
|
|
450
474
|
if exit_info is not None:
|
|
451
475
|
nruns_so_far += 1
|
|
@@ -514,8 +538,9 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
514
538
|
|
|
515
539
|
# Evaluate new point
|
|
516
540
|
x = control.model.as_absolute_coordinates(xnew)
|
|
541
|
+
##print("x from xnew again", x)
|
|
517
542
|
number_of_samples = max(nsamples(control.delta, control.rho, current_iter, nruns_so_far), 1)
|
|
518
|
-
rvec_list,
|
|
543
|
+
rvec_list, obj_list, num_samples_run, exit_info = control.evaluate_objective(x, number_of_samples, params)
|
|
519
544
|
if np.any(np.isnan(rvec_list)):
|
|
520
545
|
# Just exit without saving the current point
|
|
521
546
|
# We should be able to do a hard restart though, because it's unlikely
|
|
@@ -529,13 +554,13 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
529
554
|
break # quit
|
|
530
555
|
if exit_info is not None:
|
|
531
556
|
if num_samples_run > 0:
|
|
532
|
-
control.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
|
|
557
|
+
control.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, control.nx,
|
|
533
558
|
x_in_abs_coords=True)
|
|
534
559
|
nruns_so_far += 1
|
|
535
560
|
break # quit
|
|
536
561
|
|
|
537
562
|
# Estimate f in order to compute 'actual reduction'
|
|
538
|
-
ratio, exit_info = control.calculate_ratio(current_iter, rvec_list[:num_samples_run, :], d, gopt, H)
|
|
563
|
+
ratio, exit_info = control.calculate_ratio(control.model.xopt(abs_coordinates=True), current_iter, rvec_list[:num_samples_run, :], d, gopt, H)
|
|
539
564
|
if exit_info is not None:
|
|
540
565
|
if exit_info.able_to_do_restart() and params("restarts.use_restarts") and params(
|
|
541
566
|
"restarts.use_soft_restarts"):
|
|
@@ -565,9 +590,9 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
565
590
|
diagnostic_info.update_slow_iter(-1) # n/a, unless otherwise update
|
|
566
591
|
if ratio < params("tr_radius.eta1"): # ratio < 0.1
|
|
567
592
|
if finished_growing:
|
|
568
|
-
control.delta = min(params("tr_radius.gamma_dec") * control.delta, dnorm)
|
|
593
|
+
control.delta = min(params("tr_radius.gamma_dec") * control.delta, dnorm) / tau
|
|
569
594
|
else:
|
|
570
|
-
control.delta = min(params("growing.gamma_dec") * control.delta, dnorm) # different gamma_dec
|
|
595
|
+
control.delta = min(params("growing.gamma_dec") * control.delta, dnorm) / tau # different gamma_dec
|
|
571
596
|
if params("logging.save_diagnostic_info"):
|
|
572
597
|
diagnostic_info.update_iter_type(ITER_ACCEPTABLE_NO_GEOM if ratio > 0.0
|
|
573
598
|
else ITER_UNSUCCESSFUL_NO_GEOM) # we flag geom update below
|
|
@@ -618,7 +643,7 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
618
643
|
|
|
619
644
|
if do_logging:
|
|
620
645
|
module_logger.debug("Updating with knew = %i" % knew)
|
|
621
|
-
control.model.change_point(knew, xnew, rvec_list[0, :]) # expect step, not absolute x
|
|
646
|
+
control.model.change_point(knew, xnew, rvec_list[0, :], control.nx) # expect step, not absolute x
|
|
622
647
|
for i in range(1, num_samples_run):
|
|
623
648
|
control.model.add_new_sample(knew, rvec_extra=rvec_list[i, :])
|
|
624
649
|
|
|
@@ -651,7 +676,7 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
651
676
|
break # quit
|
|
652
677
|
|
|
653
678
|
# Update list of successful steps
|
|
654
|
-
this_step_was_not_improvement = control.model.
|
|
679
|
+
this_step_was_not_improvement = control.model.objsave is not None and control.model.objopt() > control.model.objsave
|
|
655
680
|
succ_steps_not_improvement.pop() # remove last item
|
|
656
681
|
succ_steps_not_improvement.insert(0, this_step_was_not_improvement) # add at beginning
|
|
657
682
|
# Terminate (not restart) if all are True
|
|
@@ -828,10 +853,10 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
828
853
|
if do_logging:
|
|
829
854
|
module_logger.info("New rho = %g after %i function evaluations" % (control.rho, control.nf))
|
|
830
855
|
if control.n() < params("logging.n_to_print_whole_x_vector"):
|
|
831
|
-
module_logger.debug("Best so far: f = %.15g at x = " % (control.model.
|
|
856
|
+
module_logger.debug("Best so far: f = %.15g at x = " % (control.model.objopt())
|
|
832
857
|
+ str(control.model.xopt(abs_coordinates=True)))
|
|
833
858
|
else:
|
|
834
|
-
module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.
|
|
859
|
+
module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.objopt()))
|
|
835
860
|
continue # next iteration
|
|
836
861
|
else:
|
|
837
862
|
# Quit on rho=rhoend
|
|
@@ -857,14 +882,14 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
|
|
|
857
882
|
# (end main loop)
|
|
858
883
|
|
|
859
884
|
# Quit & return the important information
|
|
860
|
-
x, rvec,
|
|
885
|
+
x, rvec, obj, jacmin, nsamples, x_eval_num, jac_eval_nums = control.model.get_final_results()
|
|
861
886
|
if do_logging:
|
|
862
887
|
module_logger.debug("At return from DFO-LS, number of function evals = %i" % nf)
|
|
863
|
-
module_logger.debug("Smallest objective value = %.15g at x = " %
|
|
864
|
-
return x, rvec,
|
|
888
|
+
module_logger.debug("Smallest objective value = %.15g at x = " % obj + str(x))
|
|
889
|
+
return x, rvec, obj, jacmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info, x_eval_num, jac_eval_nums
|
|
865
890
|
|
|
866
891
|
|
|
867
|
-
def solve(objfun, x0,
|
|
892
|
+
def solve(objfun, x0, h=None, lh=None, prox_uh=None, argsf=(), argsh=(), argsprox=(), bounds=None, projections=[], npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
|
|
868
893
|
objfun_has_noise=False, scaling_within_bounds=False, do_logging=True, print_progress=False):
|
|
869
894
|
x0 = x0.astype(float)
|
|
870
895
|
n = len(x0)
|
|
@@ -934,13 +959,21 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
|
|
|
934
959
|
|
|
935
960
|
exit_info = None
|
|
936
961
|
# Input & parameter checks
|
|
962
|
+
if exit_info is None and h is not None:
|
|
963
|
+
if prox_uh is None:
|
|
964
|
+
exit_info = ExitInformation(EXIT_INPUT_ERROR, "Must provide prox_uh input if h is not None")
|
|
965
|
+
elif lh is None:
|
|
966
|
+
exit_info = ExitInformation(EXIT_INPUT_ERROR, "Must provide lh input if h is not None")
|
|
967
|
+
elif lh <= 0.0:
|
|
968
|
+
exit_info = ExitInformation(EXIT_INPUT_ERROR, "lh must be strictly positive")
|
|
969
|
+
|
|
937
970
|
if exit_info is None and npt < n + 1:
|
|
938
971
|
exit_info = ExitInformation(EXIT_INPUT_ERROR, "npt must be >= n+1 for linear models with inexact interpolation")
|
|
939
972
|
|
|
940
|
-
if exit_info is None and rhobeg
|
|
973
|
+
if exit_info is None and rhobeg <= 0.0:
|
|
941
974
|
exit_info = ExitInformation(EXIT_INPUT_ERROR, "rhobeg must be strictly positive")
|
|
942
975
|
|
|
943
|
-
if exit_info is None and rhoend
|
|
976
|
+
if exit_info is None and rhoend <= 0.0:
|
|
944
977
|
exit_info = ExitInformation(EXIT_INPUT_ERROR, "rhoend must be strictly positive")
|
|
945
978
|
|
|
946
979
|
if exit_info is None and rhobeg <= rhoend:
|
|
@@ -1013,12 +1046,12 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
|
|
|
1013
1046
|
x0 = xp.copy()
|
|
1014
1047
|
|
|
1015
1048
|
# Enforce lower & upper bounds on x0
|
|
1016
|
-
idx = (x0
|
|
1049
|
+
idx = (x0 < xl)
|
|
1017
1050
|
if np.any(idx):
|
|
1018
1051
|
warnings.warn("x0 below lower bound, adjusting", RuntimeWarning)
|
|
1019
1052
|
x0[idx] = xl[idx]
|
|
1020
1053
|
|
|
1021
|
-
idx = (x0
|
|
1054
|
+
idx = (x0 > xu)
|
|
1022
1055
|
if np.any(idx):
|
|
1023
1056
|
warnings.warn("x0 above upper bound, adjusting", RuntimeWarning)
|
|
1024
1057
|
x0[idx] = xu[idx]
|
|
@@ -1028,9 +1061,9 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
|
|
|
1028
1061
|
nruns = 0
|
|
1029
1062
|
nf = 0
|
|
1030
1063
|
nx = 0
|
|
1031
|
-
xmin, rmin,
|
|
1032
|
-
solve_main(objfun, x0,
|
|
1033
|
-
diagnostic_info, scaling_changes, default_growing_method_set_by_user=default_growing_method_set_by_user,
|
|
1064
|
+
xmin, rmin, objmin, jacmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info, xmin_eval_num, jacmin_eval_nums = \
|
|
1065
|
+
solve_main(objfun, x0, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
|
|
1066
|
+
diagnostic_info, scaling_changes, h, lh, argsh, prox_uh, argsprox, default_growing_method_set_by_user=default_growing_method_set_by_user,
|
|
1034
1067
|
do_logging=do_logging, print_progress=print_progress)
|
|
1035
1068
|
|
|
1036
1069
|
# Hard restarts loop
|
|
@@ -1045,27 +1078,28 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
|
|
|
1045
1078
|
|
|
1046
1079
|
if do_logging:
|
|
1047
1080
|
module_logger.info("Restarting from finish point (f = %g) after %g function evals; using rhobeg = %g and rhoend = %g"
|
|
1048
|
-
% (
|
|
1081
|
+
% (objmin, nf, rhobeg, rhoend))
|
|
1049
1082
|
if params("restarts.hard.use_old_rk"):
|
|
1050
|
-
xmin2, rmin2,
|
|
1051
|
-
solve_main(objfun, xmin,
|
|
1052
|
-
diagnostic_info, scaling_changes, r0_avg_old=rmin, r0_nsamples_old=nsamples_min,
|
|
1083
|
+
xmin2, rmin2, objmin2, jacmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info, xmin_eval_num2, jacmin_eval_nums2 = \
|
|
1084
|
+
solve_main(objfun, xmin, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
|
|
1085
|
+
diagnostic_info, scaling_changes, h, lh, argsh, prox_uh, argsprox, r0_avg_old=rmin, r0_nsamples_old=nsamples_min,
|
|
1053
1086
|
do_logging=do_logging, print_progress=print_progress)
|
|
1054
1087
|
else:
|
|
1055
|
-
xmin2, rmin2,
|
|
1056
|
-
solve_main(objfun, xmin,
|
|
1057
|
-
diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)
|
|
1088
|
+
xmin2, rmin2, objmin2, jacmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info, xmin_eval_num2, jacmin_eval_nums2 = \
|
|
1089
|
+
solve_main(objfun, xmin, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
|
|
1090
|
+
diagnostic_info, scaling_changes, h, lh, argsh, prox_uh, argsprox, do_logging=do_logging, print_progress=print_progress)
|
|
1058
1091
|
|
|
1059
|
-
if
|
|
1092
|
+
if objmin2 < objmin or np.isnan(objmin):
|
|
1060
1093
|
if do_logging:
|
|
1061
|
-
module_logger.info("Successful run with new f = %s compared to old f = %s" % (
|
|
1094
|
+
module_logger.info("Successful run with new f = %s compared to old f = %s" % (objmin2, objmin))
|
|
1062
1095
|
last_successful_run = nruns
|
|
1063
|
-
(xmin, rmin,
|
|
1096
|
+
(xmin, rmin, objmin, nsamples_min, xmin_eval_num) = (xmin2, rmin2, objmin2, nsamples2, xmin_eval_num2)
|
|
1064
1097
|
if jacmin2 is not None: # may be None if finished during setup phase, in which case just use old Jacobian
|
|
1065
1098
|
jacmin = jacmin2
|
|
1099
|
+
jacmin_eval_nums = jacmin_eval_nums2
|
|
1066
1100
|
else:
|
|
1067
1101
|
if do_logging:
|
|
1068
|
-
module_logger.info("Unsuccessful run with new f = %s compared to old f = %s" % (
|
|
1102
|
+
module_logger.info("Unsuccessful run with new f = %s compared to old f = %s" % (objmin2, objmin))
|
|
1069
1103
|
|
|
1070
1104
|
if nruns - last_successful_run >= params("restarts.max_unsuccessful_restarts"):
|
|
1071
1105
|
exit_info = ExitInformation(EXIT_SUCCESS, "Reached maximum number of unsuccessful restarts")
|
|
@@ -1077,7 +1111,7 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
|
|
|
1077
1111
|
if scaling_changes is not None and jacmin is not None:
|
|
1078
1112
|
for i in range(n):
|
|
1079
1113
|
jacmin[:, i] = jacmin[:, i] / scaling_changes[1][i]
|
|
1080
|
-
results = OptimResults(remove_scaling(xmin, scaling_changes), rmin,
|
|
1114
|
+
results = OptimResults(remove_scaling(xmin, scaling_changes), rmin, objmin, jacmin, nf, nx, nruns, exit_flag, exit_msg, xmin_eval_num, jacmin_eval_nums)
|
|
1081
1115
|
if params("logging.save_diagnostic_info"):
|
|
1082
1116
|
df = diagnostic_info.to_dataframe(with_xk=params("logging.save_xk"), with_rk=params("logging.save_rk"))
|
|
1083
1117
|
results.diagnostic_info = df
|
dfols/trust_region.py
CHANGED
|
@@ -29,14 +29,14 @@ solves
|
|
|
29
29
|
s.t. lower <= x <= upper
|
|
30
30
|
||x-xbase|| <= Delta
|
|
31
31
|
With this value, the variable d=x-xbase solves the problem
|
|
32
|
-
|
|
32
|
+
min_d abs(c + g' * d)
|
|
33
33
|
s.t. lower <= xbase + d <= upper
|
|
34
34
|
||d|| <= delta
|
|
35
35
|
Again, we have a version of this for handling arbitrary constraints
|
|
36
36
|
The call
|
|
37
37
|
x = ctrsbox_geometry(xbase, c, g, projections, Delta)
|
|
38
38
|
Solves
|
|
39
|
-
|
|
39
|
+
min_d abs(c + g' * d)
|
|
40
40
|
s.t. xbase + d is feasible w.r.t. the constraint set C
|
|
41
41
|
||d|| <= delta
|
|
42
42
|
|
|
@@ -70,7 +70,7 @@ alternative licensing.
|
|
|
70
70
|
# Ensure compatibility with Python 2
|
|
71
71
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
72
72
|
|
|
73
|
-
from math import sqrt
|
|
73
|
+
from math import sqrt, ceil
|
|
74
74
|
import numpy as np
|
|
75
75
|
try:
|
|
76
76
|
import trustregion
|
|
@@ -79,13 +79,93 @@ except ImportError:
|
|
|
79
79
|
# Fall back to Python implementation
|
|
80
80
|
USE_FORTRAN = False
|
|
81
81
|
|
|
82
|
-
from .util import dykstra, pball, pbox, sumsq, model_value
|
|
82
|
+
from .util import dykstra, pball, pbox, sumsq, model_value, remove_scaling
|
|
83
83
|
|
|
84
|
-
__all__ = ['
|
|
84
|
+
__all__ = ['ctrsbox_sfista', 'ctrsbox_pgd', 'ctrsbox_geometry', 'trsbox', 'trsbox_geometry']
|
|
85
85
|
|
|
86
86
|
ZERO_THRESH = 1e-14
|
|
87
87
|
|
|
88
|
-
def
|
|
88
|
+
def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), argsprox=(), func_tol=1e-3, max_iters=500, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN, scaling_changes=None, sfista_iters_scale=1.0):
|
|
89
|
+
n = xopt.size
|
|
90
|
+
assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
|
|
91
|
+
assert g.shape == (n,), "g and xopt have incompatible sizes"
|
|
92
|
+
assert len(H.shape) == 2, "H must be a matrix"
|
|
93
|
+
assert H.shape == (n,n), "H and xopt have incompatible sizes"
|
|
94
|
+
assert np.allclose(H, H.T), "H must be symmetric"
|
|
95
|
+
assert delta > 0.0, "delta must be strictly positive"
|
|
96
|
+
|
|
97
|
+
# Initialization
|
|
98
|
+
d = np.zeros(n) # start with zero vector
|
|
99
|
+
y = np.zeros(n)
|
|
100
|
+
t = 1
|
|
101
|
+
k_H = np.linalg.norm(H, 2)
|
|
102
|
+
crvmin = -1.0
|
|
103
|
+
|
|
104
|
+
# Number of iterations & smoothing parameter, from Theorem 10.57 in
|
|
105
|
+
# [A. Beck. First-order methods in optimization, SIAM, 2017]
|
|
106
|
+
# We do not use the values of k and mu given in the theorem statement, but rather the intermediate
|
|
107
|
+
# results on p313 (K1 for number of iterations, and the immediate next line for mu)
|
|
108
|
+
# Note: in the book's notation, Gamma=delta^2, alpha=1, beta=L_h^2/2, Lf=k_H [alpha and beta from Thm 10.51]
|
|
109
|
+
try:
|
|
110
|
+
MAX_LOOP_ITERS = ceil(sfista_iters_scale * delta * (L_h+sqrt(L_h*L_h+2*k_H*func_tol)) / func_tol)
|
|
111
|
+
MAX_LOOP_ITERS = min(MAX_LOOP_ITERS, max_iters)
|
|
112
|
+
except ValueError:
|
|
113
|
+
MAX_LOOP_ITERS = max_iters
|
|
114
|
+
u = 2 * delta / (MAX_LOOP_ITERS * L_h) # smoothing parameter
|
|
115
|
+
# u = 2 * func_tol / (L_h ** 2 + L_h * sqrt(L_h ** 2 + 2 * k_H * func_tol)) # the above choice works better in practice
|
|
116
|
+
|
|
117
|
+
def gradient_Fu(xopt, g, H, u, prox_uh, d):
|
|
118
|
+
# Calculate gradient_Fu,
|
|
119
|
+
# where Fu(d) := g(d) + h_u(d) and h_u(d) is a 1/u-smooth approximation of h.
|
|
120
|
+
# We assume that h is globally Lipschitz continous with constant L_h,
|
|
121
|
+
# then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.
|
|
122
|
+
return g + H @ d + (xopt + d - prox_uh(remove_scaling(xopt + d, scaling_changes), u, *argsprox)) / u
|
|
123
|
+
|
|
124
|
+
# Lipschitz constant of gradient_Fu
|
|
125
|
+
l = k_H + 1 / u
|
|
126
|
+
|
|
127
|
+
# trust region is a ball of radius delta around xopt
|
|
128
|
+
trproj = lambda w: pball(w, xopt, delta)
|
|
129
|
+
|
|
130
|
+
# combine trust region constraints with user-entered constraints
|
|
131
|
+
P = list(projections) # make a copy of the projections list
|
|
132
|
+
P.append(trproj)
|
|
133
|
+
def proj(d0):
|
|
134
|
+
p = dykstra(P, xopt+d0, max_iter=d_max_iters, tol=d_tol)
|
|
135
|
+
# we want the step only, so we subtract xopt
|
|
136
|
+
# from the new point: proj(xk+d) - xk
|
|
137
|
+
return p - xopt
|
|
138
|
+
|
|
139
|
+
# general step
|
|
140
|
+
model_value_best = model_value(g, H, d, xopt, h, argsh, scaling_changes)
|
|
141
|
+
d_best = d.copy()
|
|
142
|
+
for k in range(MAX_LOOP_ITERS):
|
|
143
|
+
prev_d = d.copy()
|
|
144
|
+
prev_t = t
|
|
145
|
+
# gradient_Fu at y
|
|
146
|
+
g_Fu = gradient_Fu(xopt, g, H, u, prox_uh, d, *argsprox)
|
|
147
|
+
|
|
148
|
+
# main update step
|
|
149
|
+
d = proj(y - g_Fu / l)
|
|
150
|
+
new_model_value = model_value(g, H, d, xopt, h, argsh, scaling_changes)
|
|
151
|
+
if new_model_value < model_value_best:
|
|
152
|
+
d_best = d.copy()
|
|
153
|
+
model_value_best = new_model_value
|
|
154
|
+
|
|
155
|
+
# update true gradient
|
|
156
|
+
# gnew is the gradient of the smoothed function
|
|
157
|
+
gnew = gradient_Fu(xopt, g, H, u, prox_uh, d, *argsprox)
|
|
158
|
+
|
|
159
|
+
# update CRVMIN
|
|
160
|
+
crv = d.dot(H).dot(d)/sumsq(d) if sumsq(d) >= ZERO_THRESH else crvmin
|
|
161
|
+
crvmin = min(crvmin, crv) if crvmin != -1.0 else crv
|
|
162
|
+
|
|
163
|
+
# momentum update
|
|
164
|
+
t = (1 + sqrt(1 + 4*t*t)) / 2
|
|
165
|
+
y = d + (prev_t - 1) * (d - prev_d) / t
|
|
166
|
+
return d, gnew, crvmin
|
|
167
|
+
|
|
168
|
+
def ctrsbox_pgd(xopt, g, H, projections, delta, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN):
|
|
89
169
|
n = xopt.size
|
|
90
170
|
assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
|
|
91
171
|
assert g.shape == (n,), "g and xopt have incompatible sizes"
|
|
@@ -151,7 +231,6 @@ def ctrsbox(xopt, g, H, projections, delta, d_max_iters=100, d_tol=1e-10, use_fo
|
|
|
151
231
|
|
|
152
232
|
return d, gnew, crvmin
|
|
153
233
|
|
|
154
|
-
|
|
155
234
|
def trsbox(xopt, g, H, sl, su, delta, use_fortran=USE_FORTRAN):
|
|
156
235
|
if use_fortran:
|
|
157
236
|
return trustregion.solve(g, H, delta,
|
dfols/util.py
CHANGED
|
@@ -31,7 +31,7 @@ import scipy.linalg as LA
|
|
|
31
31
|
import sys
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
__all__ = ['sumsq', '
|
|
34
|
+
__all__ = ['sumsq', 'eval_least_squares_with_regularisation', 'model_value', 'random_orthog_directions_within_bounds',
|
|
35
35
|
'random_directions_within_bounds', 'apply_scaling', 'remove_scaling', 'pbox', 'pball', 'dykstra', 'qr_rank']
|
|
36
36
|
|
|
37
37
|
module_logger = logging.getLogger(__name__)
|
|
@@ -47,9 +47,9 @@ def sumsq(x):
|
|
|
47
47
|
return np.dot(x, x)
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def
|
|
50
|
+
def eval_least_squares_with_regularisation(objfun, x, h=None, argsf=(), argsh=(), verbose=True, eval_num=0, pt_num=0, full_x_thresh=6, check_for_overflow=True):
|
|
51
51
|
# Evaluate least squares function
|
|
52
|
-
fvec = objfun(x, *
|
|
52
|
+
fvec = objfun(x, *argsf)
|
|
53
53
|
|
|
54
54
|
if check_for_overflow:
|
|
55
55
|
try:
|
|
@@ -62,20 +62,31 @@ def eval_least_squares_objective(objfun, x, args=(), verbose=True, eval_num=0, p
|
|
|
62
62
|
else:
|
|
63
63
|
f = sumsq(fvec)
|
|
64
64
|
|
|
65
|
+
# objective = least-squares + regularisation
|
|
66
|
+
obj = f
|
|
67
|
+
if h is not None:
|
|
68
|
+
# Evaluate regularisation term
|
|
69
|
+
hvalue = h(x, *argsh)
|
|
70
|
+
obj = f + hvalue
|
|
71
|
+
|
|
65
72
|
if verbose:
|
|
66
73
|
if len(x) < full_x_thresh:
|
|
67
|
-
module_logger.info("Function eval %i at point %i has
|
|
74
|
+
module_logger.info("Function eval %i at point %i has obj = %.15g at x = " % (eval_num, pt_num, obj) + str(x))
|
|
68
75
|
else:
|
|
69
|
-
module_logger.info("Function eval %i at point %i has
|
|
76
|
+
module_logger.info("Function eval %i at point %i has obj = %.15g at x = [...]" % (eval_num, pt_num, obj))
|
|
70
77
|
|
|
71
|
-
return fvec,
|
|
78
|
+
return fvec, obj
|
|
72
79
|
|
|
73
80
|
|
|
74
|
-
def model_value(g, H, s):
|
|
75
|
-
# Calculate model value (s^T * g + 0.5* s^T * H * s) = s^T * (gopt + 0.5 * H*s)
|
|
81
|
+
def model_value(g, H, s, xopt=(), h=None,argsh=(), scaling_changes=None):
|
|
82
|
+
# Calculate model value (s^T * g + 0.5* s^T * H * s) + h(xopt + s) = s^T * (gopt + 0.5 * H*s) + h(xopt + s)
|
|
76
83
|
assert g.shape == s.shape, "g and s have incompatible sizes"
|
|
77
84
|
Hs = H.dot(s)
|
|
78
|
-
|
|
85
|
+
rtn = np.dot(s, g + 0.5*Hs)
|
|
86
|
+
if h is not None:
|
|
87
|
+
hvalue = h(remove_scaling(xopt+s, scaling_changes), *argsh)
|
|
88
|
+
rtn += hvalue
|
|
89
|
+
return rtn
|
|
79
90
|
|
|
80
91
|
|
|
81
92
|
def get_scale(dirn, delta, lower, upper):
|
DFO_LS-1.4.1.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
dfols/__init__.py,sha256=D-x5glfZFfJ8-bdjA-4k4JFTDu1Eylaz3EL4GSH28eI,1605
|
|
2
|
-
dfols/controller.py,sha256=LSeHZoKaKUEYgB1_2subjKskHJ8mWccMbn-LOpxJ7LM,42769
|
|
3
|
-
dfols/diagnostic_info.py,sha256=2kEUkL-MS4eDENUf1r2hOWsntP8OxMDKi_kyHmrC9V4,6081
|
|
4
|
-
dfols/hessian.py,sha256=sExx4J4KoGwHItbthX2odosB2ONbQFvLdlcod7PIh4k,4262
|
|
5
|
-
dfols/model.py,sha256=q70zuqocNtsaXzNjWHcTdrS209BdQt4uY0GNtp0qlI8,18809
|
|
6
|
-
dfols/params.py,sha256=_Va1ybnQDIzWaXvImcSeH8xnNE_A2zpAfBgDG74sc5c,17557
|
|
7
|
-
dfols/solver.py,sha256=IKg3xWPLYlOW_zuTc_-HY_3ZvdDEfkyxARerERUQHlU,61264
|
|
8
|
-
dfols/trust_region.py,sha256=hRKQx0fpSxol7dLZO0yrT7O5IDptPPSnDvxKQNZ3r0M,24603
|
|
9
|
-
dfols/util.py,sha256=ysdIHTkrkWwCRKuGffofehKl-t5dT3sD9dfy0muI4ZI,9852
|
|
10
|
-
DFO_LS-1.4.1.dist-info/LICENSE.txt,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
|
11
|
-
DFO_LS-1.4.1.dist-info/METADATA,sha256=RR6KhJi4Ae_1PES8Bpzqm3AYK2w12V-2MyDyjaCDe80,8552
|
|
12
|
-
DFO_LS-1.4.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
13
|
-
DFO_LS-1.4.1.dist-info/top_level.txt,sha256=UfxRhaDN8HQx2_l17KbrDrERJ90OCN7VKkDMpYYbRLU,6
|
|
14
|
-
DFO_LS-1.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|