DFO-LS 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of DFO-LS might be problematic. Click here for more details.

dfols/solver.py CHANGED
@@ -48,10 +48,10 @@ module_logger = logging.getLogger(__name__)
48
48
 
49
49
  # A container for the results of the optimization routine
50
50
  class OptimResults(object):
51
- def __init__(self, xmin, rmin, fmin, jacmin, nf, nx, nruns, exit_flag, exit_msg):
51
+ def __init__(self, xmin, rmin, objmin, jacmin, nf, nx, nruns, exit_flag, exit_msg, xmin_eval_num, jacmin_eval_nums):
52
52
  self.x = xmin
53
53
  self.resid = rmin
54
- self.f = fmin
54
+ self.obj = objmin
55
55
  self.jacobian = jacmin
56
56
  self.nf = nf
57
57
  self.nx = nx
@@ -59,6 +59,8 @@ class OptimResults(object):
59
59
  self.flag = exit_flag
60
60
  self.msg = exit_msg
61
61
  self.diagnostic_info = None
62
+ self.xmin_eval_num = xmin_eval_num
63
+ self.jacmin_eval_nums = jacmin_eval_nums
62
64
  # Set standard names for exit flags
63
65
  self.EXIT_SLOW_WARNING = EXIT_SLOW_WARNING
64
66
  self.EXIT_MAXFUN_WARNING = EXIT_MAXFUN_WARNING
@@ -77,7 +79,7 @@ class OptimResults(object):
77
79
  output += "Residual vector = %s\n" % str(self.resid)
78
80
  else:
79
81
  output += "Not showing residual vector because it is too long; check self.resid\n"
80
- output += "Objective value f(xmin) = %.10g\n" % self.f
82
+ output += "Objective value f(xmin) = %.10g\n" % self.obj
81
83
  output += "Needed %g objective evaluations (at %g points)\n" % (self.nf, self.nx)
82
84
  if self.nruns > 1:
83
85
  output += "Did a total of %g runs\n" % self.nruns
@@ -89,14 +91,17 @@ class OptimResults(object):
89
91
  output += "Not showing approximate Jacobian because it is too long; check self.jacobian\n"
90
92
  if self.diagnostic_info is not None:
91
93
  output += "Diagnostic information available; check self.diagnostic_info\n"
94
+ output += "Solution xmin was evaluation point %g\n" % self.xmin_eval_num
95
+ if len(self.jacmin_eval_nums) < 100:
96
+ output += "Approximate Jacobian formed using evaluation points %s\n" % str(self.jacmin_eval_nums)
92
97
  output += "Exit flag = %g\n" % self.flag
93
98
  output += "%s\n" % self.msg
94
99
  output += "****************************\n"
95
100
  return output
96
101
 
97
102
 
98
- def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
99
- diagnostic_info, scaling_changes, r0_avg_old=None, r0_nsamples_old=None, default_growing_method_set_by_user=None,
103
+ def solve_main(objfun, x0, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns_so_far, nf_so_far, nx_so_far, nsamples, params,
104
+ diagnostic_info, scaling_changes, h=None, lh=None, argsh=(), prox_uh=None, argsprox=None, r0_avg_old=None, r0_nsamples_old=None, default_growing_method_set_by_user=None,
100
105
  do_logging=True, print_progress=False):
101
106
  # Evaluate at x0 (keep nf, nx correct and check for f < 1e-12)
102
107
  # The hard bit is determining what m = len(r0) should be, and allocating memory appropriately
@@ -105,18 +110,17 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
105
110
  # Evaluate the first time...
106
111
  nf = nf_so_far + 1
107
112
  nx = nx_so_far + 1
108
- r0, f0 = eval_least_squares_objective(objfun, remove_scaling(x0, scaling_changes),
109
- args=args, eval_num=nf, pt_num=nx,
113
+ r0, obj0 = eval_least_squares_with_regularisation(objfun, remove_scaling(x0, scaling_changes), h,
114
+ argsf=argsf, argsh=argsh, verbose=do_logging, eval_num=nf, pt_num=nx,
110
115
  full_x_thresh=params("logging.n_to_print_whole_x_vector"),
111
- check_for_overflow=params("general.check_objfun_for_overflow"),
112
- verbose=do_logging)
116
+ check_for_overflow=params("general.check_objfun_for_overflow"))
113
117
  m = len(r0)
114
118
 
115
119
  # Now we have m, we can evaluate the rest of the times
116
120
  rvec_list = np.zeros((number_of_samples, m))
117
- f_list = np.zeros((number_of_samples,))
121
+ obj_list = np.zeros((number_of_samples,))
118
122
  rvec_list[0, :] = r0
119
- f_list[0] = f0
123
+ obj_list[0] = obj0
120
124
  num_samples_run = 1
121
125
  exit_info = None
122
126
 
@@ -128,15 +132,20 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
128
132
 
129
133
  nf += 1
130
134
  # Don't increment nx for x0 - we did this earlier
131
- rvec_list[i, :], f_list[i] = eval_least_squares_objective(objfun, remove_scaling(x0, scaling_changes), args=args, eval_num=nf, pt_num=nx,
135
+ rvec_list[i, :], obj_list[i] = eval_least_squares_with_regularisation(objfun, remove_scaling(x0, scaling_changes), h,
136
+ argsf=argsf, argsh=argsh, verbose=do_logging, eval_num=nf, pt_num=nx,
132
137
  full_x_thresh=params("logging.n_to_print_whole_x_vector"),
133
- check_for_overflow=params("general.check_objfun_for_overflow"),
134
- verbose=do_logging)
138
+ check_for_overflow=params("general.check_objfun_for_overflow"))
135
139
  num_samples_run += 1
136
140
 
137
141
  r0_avg = np.mean(rvec_list[:num_samples_run, :], axis=0)
138
- if sumsq(r0_avg) <= params("model.abs_tol"):
139
- exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
142
+ # NOTE: modify objvalue here
143
+ if h is None:
144
+ if sumsq(r0_avg) <= params("model.abs_tol"):
145
+ exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
146
+ else:
147
+ if sumsq(r0_avg) + h(remove_scaling(x0, scaling_changes), *argsh)<= params("model.abs_tol"):
148
+ exit_info = ExitInformation(EXIT_SUCCESS, "Objective is sufficiently small")
140
149
 
141
150
  if exit_info is not None:
142
151
  return x0, r0_avg, sumsq(r0_avg), None, num_samples_run, nf, nx, nruns_so_far+1, exit_info, diagnostic_info
@@ -162,8 +171,8 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
162
171
  params('growing.delta_scale_new_dirns', new_value=0.1)
163
172
 
164
173
  # Initialise controller
165
- control = Controller(objfun, args, x0, r0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun,
166
- params, scaling_changes, do_logging)
174
+ control = Controller(objfun, argsf, x0, r0_avg, num_samples_run, xl, xu, projections, npt, rhobeg, rhoend, nf, nx, maxfun,
175
+ params, scaling_changes, do_logging, h=h, lh=lh, argsh=argsh, prox_uh=prox_uh, argsprox=argsprox)
167
176
 
168
177
  # Initialise interpolation set
169
178
  number_of_samples = max(nsamples(control.delta, control.rho, 0, nruns_so_far), 1)
@@ -178,8 +187,8 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
178
187
  module_logger.info("Initialising (coordinate directions)")
179
188
  exit_info = control.initialise_coordinate_directions(number_of_samples, num_directions, params)
180
189
  if exit_info is not None:
181
- x, rvec, f, jacmin, nsamples = control.model.get_final_results()
182
- return x, rvec, f, None, nsamples, control.nf, control.nx, nruns_so_far + 1, exit_info, diagnostic_info
190
+ x, rvec, obj, jacmin, nsamples, x_eval_num, jac_eval_nums = control.model.get_final_results()
191
+ return x, rvec, obj, None, nsamples, control.nf, control.nx, nruns_so_far + 1, exit_info, diagnostic_info, x_eval_num, jac_eval_nums
183
192
 
184
193
  finished_growing = (control.model.npt() >= control.model.num_pts) # have we finished growing the initial set yet?
185
194
 
@@ -271,16 +280,30 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
271
280
  nruns_so_far += 1
272
281
  break # quit
273
282
 
274
-
275
- # Trust region step
276
- d, gopt, H, gnew, crvmin = control.trust_region_step(params)
283
+ tau = 1.0 # ratio used in the safety phase
284
+ if h is None:
285
+ # Trust region step
286
+ d, gopt, H, gnew, crvmin = control.trust_region_step(params)
287
+ else:
288
+ # Calculate criticality measure
289
+ criticality_measure = control.evaluate_criticality_measure(params)
290
+ # Trust region step
291
+ d, gopt, H, gnew, crvmin = control.trust_region_step(params, criticality_measure)
292
+ try:
293
+ tau = min(criticality_measure/(LA.norm(gopt)+lh), 1.0)
294
+ except ValueError:
295
+ # In some instances, gopt can have nan/inf values -- this ultimately calls a safety step and is generally fine
296
+ # but we need to set a value for tau nonetheless
297
+ tau = 1.0
298
+
277
299
  if do_logging:
278
300
  module_logger.debug("Trust region step is d = " + str(d))
301
+
279
302
  xnew = control.model.xopt() + d
280
303
  dnorm = min(LA.norm(d), control.delta)
281
304
 
282
305
  if print_progress:
283
- print("{:^5}{:^7}{:^10.2e}{:^10.2e}{:^10.2e}{:^10.2e}{:^7}".format(nruns_so_far+1, current_iter+1, control.model.fopt(), np.linalg.norm(gopt), control.delta, control.rho, control.nf))
306
+ print("{:^5}{:^7}{:^10.2e}{:^10.2e}{:^10.2e}{:^10.2e}{:^7}".format(nruns_so_far+1, current_iter+1, control.model.objopt(), np.linalg.norm(gopt), control.delta, control.rho, control.nf))
284
307
 
285
308
  if params("logging.save_diagnostic_info"):
286
309
  diagnostic_info.save_info_from_control(control, nruns_so_far, current_iter,
@@ -289,7 +312,7 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
289
312
  diagnostic_info.update_interpolation_information(interp_error, ls_interp_cond_num, linalg_resid,
290
313
  sqrt(norm_J_error), LA.norm(gopt), LA.norm(d))
291
314
 
292
- if dnorm < params("general.safety_step_thresh") * control.rho and not finished_growing and params("growing.safety.do_safety_step"):
315
+ if dnorm < tau * params("general.safety_step_thresh") * control.rho and not finished_growing and params("growing.safety.do_safety_step"):
293
316
  if do_logging:
294
317
  module_logger.debug("Safety step during growing phase")
295
318
 
@@ -415,10 +438,10 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
415
438
  if do_logging:
416
439
  module_logger.info("New rho = %g after %i function evaluations" % (control.rho, control.nf))
417
440
  if control.n() < params("logging.n_to_print_whole_x_vector"):
418
- module_logger.debug("Best so far: f = %.15g at x = " % (control.model.fopt())
441
+ module_logger.debug("Best so far: f = %.15g at x = " % (control.model.objopt())
419
442
  + str(control.model.xopt(abs_coordinates=True)))
420
443
  else:
421
- module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.fopt()))
444
+ module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.objopt()))
422
445
  continue # next iteration
423
446
  else:
424
447
  # Quit on rho=rhoend
@@ -439,13 +462,14 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
439
462
  else:
440
463
  # Cannot reduce rho, so check xnew and quit
441
464
  x = control.model.as_absolute_coordinates(xnew)
465
+ ##print("x from xnew", x)
442
466
  number_of_samples = max(nsamples(control.delta, control.rho, current_iter, nruns_so_far), 1)
443
- rvec_list, f_list, num_samples_run, exit_info = control.evaluate_objective(x, number_of_samples,
467
+ rvec_list, obj_list, num_samples_run, exit_info = control.evaluate_objective(x, number_of_samples,
444
468
  params)
445
469
 
446
470
  if num_samples_run > 0:
447
471
  control.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0),
448
- num_samples_run, x_in_abs_coords=True)
472
+ num_samples_run, control.nx, x_in_abs_coords=True)
449
473
 
450
474
  if exit_info is not None:
451
475
  nruns_so_far += 1
@@ -514,8 +538,9 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
514
538
 
515
539
  # Evaluate new point
516
540
  x = control.model.as_absolute_coordinates(xnew)
541
+ ##print("x from xnew again", x)
517
542
  number_of_samples = max(nsamples(control.delta, control.rho, current_iter, nruns_so_far), 1)
518
- rvec_list, f_list, num_samples_run, exit_info = control.evaluate_objective(x, number_of_samples, params)
543
+ rvec_list, obj_list, num_samples_run, exit_info = control.evaluate_objective(x, number_of_samples, params)
519
544
  if np.any(np.isnan(rvec_list)):
520
545
  # Just exit without saving the current point
521
546
  # We should be able to do a hard restart though, because it's unlikely
@@ -529,13 +554,13 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
529
554
  break # quit
530
555
  if exit_info is not None:
531
556
  if num_samples_run > 0:
532
- control.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run,
557
+ control.model.save_point(x, np.mean(rvec_list[:num_samples_run, :], axis=0), num_samples_run, control.nx,
533
558
  x_in_abs_coords=True)
534
559
  nruns_so_far += 1
535
560
  break # quit
536
561
 
537
562
  # Estimate f in order to compute 'actual reduction'
538
- ratio, exit_info = control.calculate_ratio(current_iter, rvec_list[:num_samples_run, :], d, gopt, H)
563
+ ratio, exit_info = control.calculate_ratio(control.model.xopt(abs_coordinates=True), current_iter, rvec_list[:num_samples_run, :], d, gopt, H)
539
564
  if exit_info is not None:
540
565
  if exit_info.able_to_do_restart() and params("restarts.use_restarts") and params(
541
566
  "restarts.use_soft_restarts"):
@@ -565,9 +590,9 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
565
590
  diagnostic_info.update_slow_iter(-1) # n/a, unless otherwise update
566
591
  if ratio < params("tr_radius.eta1"): # ratio < 0.1
567
592
  if finished_growing:
568
- control.delta = min(params("tr_radius.gamma_dec") * control.delta, dnorm)
593
+ control.delta = min(params("tr_radius.gamma_dec") * control.delta, dnorm) / tau
569
594
  else:
570
- control.delta = min(params("growing.gamma_dec") * control.delta, dnorm) # different gamma_dec
595
+ control.delta = min(params("growing.gamma_dec") * control.delta, dnorm) / tau # different gamma_dec
571
596
  if params("logging.save_diagnostic_info"):
572
597
  diagnostic_info.update_iter_type(ITER_ACCEPTABLE_NO_GEOM if ratio > 0.0
573
598
  else ITER_UNSUCCESSFUL_NO_GEOM) # we flag geom update below
@@ -618,7 +643,7 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
618
643
 
619
644
  if do_logging:
620
645
  module_logger.debug("Updating with knew = %i" % knew)
621
- control.model.change_point(knew, xnew, rvec_list[0, :]) # expect step, not absolute x
646
+ control.model.change_point(knew, xnew, rvec_list[0, :], control.nx) # expect step, not absolute x
622
647
  for i in range(1, num_samples_run):
623
648
  control.model.add_new_sample(knew, rvec_extra=rvec_list[i, :])
624
649
 
@@ -651,7 +676,7 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
651
676
  break # quit
652
677
 
653
678
  # Update list of successful steps
654
- this_step_was_not_improvement = control.model.fsave is not None and control.model.fopt() > control.model.fsave
679
+ this_step_was_not_improvement = control.model.objsave is not None and control.model.objopt() > control.model.objsave
655
680
  succ_steps_not_improvement.pop() # remove last item
656
681
  succ_steps_not_improvement.insert(0, this_step_was_not_improvement) # add at beginning
657
682
  # Terminate (not restart) if all are True
@@ -828,10 +853,10 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
828
853
  if do_logging:
829
854
  module_logger.info("New rho = %g after %i function evaluations" % (control.rho, control.nf))
830
855
  if control.n() < params("logging.n_to_print_whole_x_vector"):
831
- module_logger.debug("Best so far: f = %.15g at x = " % (control.model.fopt())
856
+ module_logger.debug("Best so far: f = %.15g at x = " % (control.model.objopt())
832
857
  + str(control.model.xopt(abs_coordinates=True)))
833
858
  else:
834
- module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.fopt()))
859
+ module_logger.debug("Best so far: f = %.15g at x = [...]" % (control.model.objopt()))
835
860
  continue # next iteration
836
861
  else:
837
862
  # Quit on rho=rhoend
@@ -857,14 +882,14 @@ def solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfu
857
882
  # (end main loop)
858
883
 
859
884
  # Quit & return the important information
860
- x, rvec, f, jacmin, nsamples = control.model.get_final_results()
885
+ x, rvec, obj, jacmin, nsamples, x_eval_num, jac_eval_nums = control.model.get_final_results()
861
886
  if do_logging:
862
887
  module_logger.debug("At return from DFO-LS, number of function evals = %i" % nf)
863
- module_logger.debug("Smallest objective value = %.15g at x = " % f + str(x))
864
- return x, rvec, f, jacmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info
888
+ module_logger.debug("Smallest objective value = %.15g at x = " % obj + str(x))
889
+ return x, rvec, obj, jacmin, nsamples, control.nf, control.nx, nruns_so_far, exit_info, diagnostic_info, x_eval_num, jac_eval_nums
865
890
 
866
891
 
867
- def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
892
+ def solve(objfun, x0, h=None, lh=None, prox_uh=None, argsf=(), argsh=(), argsprox=(), bounds=None, projections=[], npt=None, rhobeg=None, rhoend=1e-8, maxfun=None, nsamples=None, user_params=None,
868
893
  objfun_has_noise=False, scaling_within_bounds=False, do_logging=True, print_progress=False):
869
894
  x0 = x0.astype(float)
870
895
  n = len(x0)
@@ -934,13 +959,21 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
934
959
 
935
960
  exit_info = None
936
961
  # Input & parameter checks
962
+ if exit_info is None and h is not None:
963
+ if prox_uh is None:
964
+ exit_info = ExitInformation(EXIT_INPUT_ERROR, "Must provide prox_uh input if h is not None")
965
+ elif lh is None:
966
+ exit_info = ExitInformation(EXIT_INPUT_ERROR, "Must provide lh input if h is not None")
967
+ elif lh <= 0.0:
968
+ exit_info = ExitInformation(EXIT_INPUT_ERROR, "lh must be strictly positive")
969
+
937
970
  if exit_info is None and npt < n + 1:
938
971
  exit_info = ExitInformation(EXIT_INPUT_ERROR, "npt must be >= n+1 for linear models with inexact interpolation")
939
972
 
940
- if exit_info is None and rhobeg < 0.0:
973
+ if exit_info is None and rhobeg <= 0.0:
941
974
  exit_info = ExitInformation(EXIT_INPUT_ERROR, "rhobeg must be strictly positive")
942
975
 
943
- if exit_info is None and rhoend < 0.0:
976
+ if exit_info is None and rhoend <= 0.0:
944
977
  exit_info = ExitInformation(EXIT_INPUT_ERROR, "rhoend must be strictly positive")
945
978
 
946
979
  if exit_info is None and rhobeg <= rhoend:
@@ -1013,12 +1046,12 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
1013
1046
  x0 = xp.copy()
1014
1047
 
1015
1048
  # Enforce lower & upper bounds on x0
1016
- idx = (x0 <= xl)
1049
+ idx = (x0 < xl)
1017
1050
  if np.any(idx):
1018
1051
  warnings.warn("x0 below lower bound, adjusting", RuntimeWarning)
1019
1052
  x0[idx] = xl[idx]
1020
1053
 
1021
- idx = (x0 >= xu)
1054
+ idx = (x0 > xu)
1022
1055
  if np.any(idx):
1023
1056
  warnings.warn("x0 above upper bound, adjusting", RuntimeWarning)
1024
1057
  x0[idx] = xu[idx]
@@ -1028,9 +1061,9 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
1028
1061
  nruns = 0
1029
1062
  nf = 0
1030
1063
  nx = 0
1031
- xmin, rmin, fmin, jacmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info = \
1032
- solve_main(objfun, x0, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
1033
- diagnostic_info, scaling_changes, default_growing_method_set_by_user=default_growing_method_set_by_user,
1064
+ xmin, rmin, objmin, jacmin, nsamples_min, nf, nx, nruns, exit_info, diagnostic_info, xmin_eval_num, jacmin_eval_nums = \
1065
+ solve_main(objfun, x0, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
1066
+ diagnostic_info, scaling_changes, h, lh, argsh, prox_uh, argsprox, default_growing_method_set_by_user=default_growing_method_set_by_user,
1034
1067
  do_logging=do_logging, print_progress=print_progress)
1035
1068
 
1036
1069
  # Hard restarts loop
@@ -1045,27 +1078,28 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
1045
1078
 
1046
1079
  if do_logging:
1047
1080
  module_logger.info("Restarting from finish point (f = %g) after %g function evals; using rhobeg = %g and rhoend = %g"
1048
- % (fmin, nf, rhobeg, rhoend))
1081
+ % (objmin, nf, rhobeg, rhoend))
1049
1082
  if params("restarts.hard.use_old_rk"):
1050
- xmin2, rmin2, fmin2, jacmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
1051
- solve_main(objfun, xmin, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
1052
- diagnostic_info, scaling_changes, r0_avg_old=rmin, r0_nsamples_old=nsamples_min,
1083
+ xmin2, rmin2, objmin2, jacmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info, xmin_eval_num2, jacmin_eval_nums2 = \
1084
+ solve_main(objfun, xmin, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
1085
+ diagnostic_info, scaling_changes, h, lh, argsh, prox_uh, argsprox, r0_avg_old=rmin, r0_nsamples_old=nsamples_min,
1053
1086
  do_logging=do_logging, print_progress=print_progress)
1054
1087
  else:
1055
- xmin2, rmin2, fmin2, jacmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info = \
1056
- solve_main(objfun, xmin, args, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
1057
- diagnostic_info, scaling_changes, do_logging=do_logging, print_progress=print_progress)
1088
+ xmin2, rmin2, objmin2, jacmin2, nsamples2, nf, nx, nruns, exit_info, diagnostic_info, xmin_eval_num2, jacmin_eval_nums2 = \
1089
+ solve_main(objfun, xmin, argsf, xl, xu, projections, npt, rhobeg, rhoend, maxfun, nruns, nf, nx, nsamples, params,
1090
+ diagnostic_info, scaling_changes, h, lh, argsh, prox_uh, argsprox, do_logging=do_logging, print_progress=print_progress)
1058
1091
 
1059
- if fmin2 < fmin or np.isnan(fmin):
1092
+ if objmin2 < objmin or np.isnan(objmin):
1060
1093
  if do_logging:
1061
- module_logger.info("Successful run with new f = %s compared to old f = %s" % (fmin2, fmin))
1094
+ module_logger.info("Successful run with new f = %s compared to old f = %s" % (objmin2, objmin))
1062
1095
  last_successful_run = nruns
1063
- (xmin, rmin, fmin, nsamples_min) = (xmin2, rmin2, fmin2, nsamples2)
1096
+ (xmin, rmin, objmin, nsamples_min, xmin_eval_num) = (xmin2, rmin2, objmin2, nsamples2, xmin_eval_num2)
1064
1097
  if jacmin2 is not None: # may be None if finished during setup phase, in which case just use old Jacobian
1065
1098
  jacmin = jacmin2
1099
+ jacmin_eval_nums = jacmin_eval_nums2
1066
1100
  else:
1067
1101
  if do_logging:
1068
- module_logger.info("Unsuccessful run with new f = %s compared to old f = %s" % (fmin2, fmin))
1102
+ module_logger.info("Unsuccessful run with new f = %s compared to old f = %s" % (objmin2, objmin))
1069
1103
 
1070
1104
  if nruns - last_successful_run >= params("restarts.max_unsuccessful_restarts"):
1071
1105
  exit_info = ExitInformation(EXIT_SUCCESS, "Reached maximum number of unsuccessful restarts")
@@ -1077,7 +1111,7 @@ def solve(objfun, x0, args=(), bounds=None, projections=[], npt=None, rhobeg=Non
1077
1111
  if scaling_changes is not None and jacmin is not None:
1078
1112
  for i in range(n):
1079
1113
  jacmin[:, i] = jacmin[:, i] / scaling_changes[1][i]
1080
- results = OptimResults(remove_scaling(xmin, scaling_changes), rmin, fmin, jacmin, nf, nx, nruns, exit_flag, exit_msg)
1114
+ results = OptimResults(remove_scaling(xmin, scaling_changes), rmin, objmin, jacmin, nf, nx, nruns, exit_flag, exit_msg, xmin_eval_num, jacmin_eval_nums)
1081
1115
  if params("logging.save_diagnostic_info"):
1082
1116
  df = diagnostic_info.to_dataframe(with_xk=params("logging.save_xk"), with_rk=params("logging.save_rk"))
1083
1117
  results.diagnostic_info = df
dfols/trust_region.py CHANGED
@@ -29,14 +29,14 @@ solves
29
29
  s.t. lower <= x <= upper
30
30
  ||x-xbase|| <= Delta
31
31
  With this value, the variable d=x-xbase solves the problem
32
- min_s abs(c + g' * d)
32
+ min_d abs(c + g' * d)
33
33
  s.t. lower <= xbase + d <= upper
34
34
  ||d|| <= delta
35
35
  Again, we have a version of this for handling arbitrary constraints
36
36
  The call
37
37
  x = ctrsbox_geometry(xbase, c, g, projections, Delta)
38
38
  Solves
39
- min_s abs(c + g' * d)
39
+ min_d abs(c + g' * d)
40
40
  s.t. xbase + d is feasible w.r.t. the constraint set C
41
41
  ||d|| <= delta
42
42
 
@@ -70,7 +70,7 @@ alternative licensing.
70
70
  # Ensure compatibility with Python 2
71
71
  from __future__ import absolute_import, division, print_function, unicode_literals
72
72
 
73
- from math import sqrt
73
+ from math import sqrt, ceil
74
74
  import numpy as np
75
75
  try:
76
76
  import trustregion
@@ -79,13 +79,93 @@ except ImportError:
79
79
  # Fall back to Python implementation
80
80
  USE_FORTRAN = False
81
81
 
82
- from .util import dykstra, pball, pbox, sumsq, model_value
82
+ from .util import dykstra, pball, pbox, sumsq, model_value, remove_scaling
83
83
 
84
- __all__ = ['ctrsbox', 'ctrsbox_geometry', 'trsbox', 'trsbox_geometry']
84
+ __all__ = ['ctrsbox_sfista', 'ctrsbox_pgd', 'ctrsbox_geometry', 'trsbox', 'trsbox_geometry']
85
85
 
86
86
  ZERO_THRESH = 1e-14
87
87
 
88
- def ctrsbox(xopt, g, H, projections, delta, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN):
88
+ def ctrsbox_sfista(xopt, g, H, projections, delta, h, L_h, prox_uh, argsh=(), argsprox=(), func_tol=1e-3, max_iters=500, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN, scaling_changes=None, sfista_iters_scale=1.0):
89
+ n = xopt.size
90
+ assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
91
+ assert g.shape == (n,), "g and xopt have incompatible sizes"
92
+ assert len(H.shape) == 2, "H must be a matrix"
93
+ assert H.shape == (n,n), "H and xopt have incompatible sizes"
94
+ assert np.allclose(H, H.T), "H must be symmetric"
95
+ assert delta > 0.0, "delta must be strictly positive"
96
+
97
+ # Initialization
98
+ d = np.zeros(n) # start with zero vector
99
+ y = np.zeros(n)
100
+ t = 1
101
+ k_H = np.linalg.norm(H, 2)
102
+ crvmin = -1.0
103
+
104
+ # Number of iterations & smoothing parameter, from Theorem 10.57 in
105
+ # [A. Beck. First-order methods in optimization, SIAM, 2017]
106
+ # We do not use the values of k and mu given in the theorem statement, but rather the intermediate
107
+ # results on p313 (K1 for number of iterations, and the immediate next line for mu)
108
+ # Note: in the book's notation, Gamma=delta^2, alpha=1, beta=L_h^2/2, Lf=k_H [alpha and beta from Thm 10.51]
109
+ try:
110
+ MAX_LOOP_ITERS = ceil(sfista_iters_scale * delta * (L_h+sqrt(L_h*L_h+2*k_H*func_tol)) / func_tol)
111
+ MAX_LOOP_ITERS = min(MAX_LOOP_ITERS, max_iters)
112
+ except ValueError:
113
+ MAX_LOOP_ITERS = max_iters
114
+ u = 2 * delta / (MAX_LOOP_ITERS * L_h) # smoothing parameter
115
+ # u = 2 * func_tol / (L_h ** 2 + L_h * sqrt(L_h ** 2 + 2 * k_H * func_tol)) # the above choice works better in practice
116
+
117
+ def gradient_Fu(xopt, g, H, u, prox_uh, d):
118
+ # Calculate gradient_Fu,
119
+ # where Fu(d) := g(d) + h_u(d) and h_u(d) is a 1/u-smooth approximation of h.
120
+ # We assume that h is globally Lipschitz continous with constant L_h,
121
+ # then we can let h_u(d) be the Moreau Envelope M_h_u(d) of h.
122
+ return g + H @ d + (xopt + d - prox_uh(remove_scaling(xopt + d, scaling_changes), u, *argsprox)) / u
123
+
124
+ # Lipschitz constant of gradient_Fu
125
+ l = k_H + 1 / u
126
+
127
+ # trust region is a ball of radius delta around xopt
128
+ trproj = lambda w: pball(w, xopt, delta)
129
+
130
+ # combine trust region constraints with user-entered constraints
131
+ P = list(projections) # make a copy of the projections list
132
+ P.append(trproj)
133
+ def proj(d0):
134
+ p = dykstra(P, xopt+d0, max_iter=d_max_iters, tol=d_tol)
135
+ # we want the step only, so we subtract xopt
136
+ # from the new point: proj(xk+d) - xk
137
+ return p - xopt
138
+
139
+ # general step
140
+ model_value_best = model_value(g, H, d, xopt, h, argsh, scaling_changes)
141
+ d_best = d.copy()
142
+ for k in range(MAX_LOOP_ITERS):
143
+ prev_d = d.copy()
144
+ prev_t = t
145
+ # gradient_Fu at y
146
+ g_Fu = gradient_Fu(xopt, g, H, u, prox_uh, d, *argsprox)
147
+
148
+ # main update step
149
+ d = proj(y - g_Fu / l)
150
+ new_model_value = model_value(g, H, d, xopt, h, argsh, scaling_changes)
151
+ if new_model_value < model_value_best:
152
+ d_best = d.copy()
153
+ model_value_best = new_model_value
154
+
155
+ # update true gradient
156
+ # gnew is the gradient of the smoothed function
157
+ gnew = gradient_Fu(xopt, g, H, u, prox_uh, d, *argsprox)
158
+
159
+ # update CRVMIN
160
+ crv = d.dot(H).dot(d)/sumsq(d) if sumsq(d) >= ZERO_THRESH else crvmin
161
+ crvmin = min(crvmin, crv) if crvmin != -1.0 else crv
162
+
163
+ # momentum update
164
+ t = (1 + sqrt(1 + 4*t*t)) / 2
165
+ y = d + (prev_t - 1) * (d - prev_d) / t
166
+ return d, gnew, crvmin
167
+
168
+ def ctrsbox_pgd(xopt, g, H, projections, delta, d_max_iters=100, d_tol=1e-10, use_fortran=USE_FORTRAN):
89
169
  n = xopt.size
90
170
  assert xopt.shape == (n,), "xopt has wrong shape (should be vector)"
91
171
  assert g.shape == (n,), "g and xopt have incompatible sizes"
@@ -151,7 +231,6 @@ def ctrsbox(xopt, g, H, projections, delta, d_max_iters=100, d_tol=1e-10, use_fo
151
231
 
152
232
  return d, gnew, crvmin
153
233
 
154
-
155
234
  def trsbox(xopt, g, H, sl, su, delta, use_fortran=USE_FORTRAN):
156
235
  if use_fortran:
157
236
  return trustregion.solve(g, H, delta,
dfols/util.py CHANGED
@@ -31,7 +31,7 @@ import scipy.linalg as LA
31
31
  import sys
32
32
 
33
33
 
34
- __all__ = ['sumsq', 'eval_least_squares_objective', 'model_value', 'random_orthog_directions_within_bounds',
34
+ __all__ = ['sumsq', 'eval_least_squares_with_regularisation', 'model_value', 'random_orthog_directions_within_bounds',
35
35
  'random_directions_within_bounds', 'apply_scaling', 'remove_scaling', 'pbox', 'pball', 'dykstra', 'qr_rank']
36
36
 
37
37
  module_logger = logging.getLogger(__name__)
@@ -47,9 +47,9 @@ def sumsq(x):
47
47
  return np.dot(x, x)
48
48
 
49
49
 
50
- def eval_least_squares_objective(objfun, x, args=(), verbose=True, eval_num=0, pt_num=0, full_x_thresh=6, check_for_overflow=True):
50
+ def eval_least_squares_with_regularisation(objfun, x, h=None, argsf=(), argsh=(), verbose=True, eval_num=0, pt_num=0, full_x_thresh=6, check_for_overflow=True):
51
51
  # Evaluate least squares function
52
- fvec = objfun(x, *args)
52
+ fvec = objfun(x, *argsf)
53
53
 
54
54
  if check_for_overflow:
55
55
  try:
@@ -62,20 +62,31 @@ def eval_least_squares_objective(objfun, x, args=(), verbose=True, eval_num=0, p
62
62
  else:
63
63
  f = sumsq(fvec)
64
64
 
65
+ # objective = least-squares + regularisation
66
+ obj = f
67
+ if h is not None:
68
+ # Evaluate regularisation term
69
+ hvalue = h(x, *argsh)
70
+ obj = f + hvalue
71
+
65
72
  if verbose:
66
73
  if len(x) < full_x_thresh:
67
- module_logger.info("Function eval %i at point %i has f = %.15g at x = " % (eval_num, pt_num, f) + str(x))
74
+ module_logger.info("Function eval %i at point %i has obj = %.15g at x = " % (eval_num, pt_num, obj) + str(x))
68
75
  else:
69
- module_logger.info("Function eval %i at point %i has f = %.15g at x = [...]" % (eval_num, pt_num, f))
76
+ module_logger.info("Function eval %i at point %i has obj = %.15g at x = [...]" % (eval_num, pt_num, obj))
70
77
 
71
- return fvec, f
78
+ return fvec, obj
72
79
 
73
80
 
74
- def model_value(g, H, s):
75
- # Calculate model value (s^T * g + 0.5* s^T * H * s) = s^T * (gopt + 0.5 * H*s)
81
+ def model_value(g, H, s, xopt=(), h=None,argsh=(), scaling_changes=None):
82
+ # Calculate model value (s^T * g + 0.5* s^T * H * s) + h(xopt + s) = s^T * (gopt + 0.5 * H*s) + h(xopt + s)
76
83
  assert g.shape == s.shape, "g and s have incompatible sizes"
77
84
  Hs = H.dot(s)
78
- return np.dot(s, g + 0.5*Hs)
85
+ rtn = np.dot(s, g + 0.5*Hs)
86
+ if h is not None:
87
+ hvalue = h(remove_scaling(xopt+s, scaling_changes), *argsh)
88
+ rtn += hvalue
89
+ return rtn
79
90
 
80
91
 
81
92
  def get_scale(dirn, delta, lower, upper):
@@ -1,14 +0,0 @@
1
- dfols/__init__.py,sha256=D-x5glfZFfJ8-bdjA-4k4JFTDu1Eylaz3EL4GSH28eI,1605
2
- dfols/controller.py,sha256=LSeHZoKaKUEYgB1_2subjKskHJ8mWccMbn-LOpxJ7LM,42769
3
- dfols/diagnostic_info.py,sha256=2kEUkL-MS4eDENUf1r2hOWsntP8OxMDKi_kyHmrC9V4,6081
4
- dfols/hessian.py,sha256=sExx4J4KoGwHItbthX2odosB2ONbQFvLdlcod7PIh4k,4262
5
- dfols/model.py,sha256=q70zuqocNtsaXzNjWHcTdrS209BdQt4uY0GNtp0qlI8,18809
6
- dfols/params.py,sha256=_Va1ybnQDIzWaXvImcSeH8xnNE_A2zpAfBgDG74sc5c,17557
7
- dfols/solver.py,sha256=IKg3xWPLYlOW_zuTc_-HY_3ZvdDEfkyxARerERUQHlU,61264
8
- dfols/trust_region.py,sha256=hRKQx0fpSxol7dLZO0yrT7O5IDptPPSnDvxKQNZ3r0M,24603
9
- dfols/util.py,sha256=ysdIHTkrkWwCRKuGffofehKl-t5dT3sD9dfy0muI4ZI,9852
10
- DFO_LS-1.4.1.dist-info/LICENSE.txt,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
11
- DFO_LS-1.4.1.dist-info/METADATA,sha256=RR6KhJi4Ae_1PES8Bpzqm3AYK2w12V-2MyDyjaCDe80,8552
12
- DFO_LS-1.4.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- DFO_LS-1.4.1.dist-info/top_level.txt,sha256=UfxRhaDN8HQx2_l17KbrDrERJ90OCN7VKkDMpYYbRLU,6
14
- DFO_LS-1.4.1.dist-info/RECORD,,