DFO-LS 1.2.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of DFO-LS might be problematic. Click here for more details.
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/METADATA +66 -60
- DFO_LS-1.5.0.dist-info/RECORD +14 -0
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/WHEEL +1 -1
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/top_level.txt +0 -0
- dfols/__init__.py +4 -5
- dfols/controller.py +274 -59
- dfols/hessian.py +1 -1
- dfols/model.py +64 -33
- dfols/params.py +32 -2
- dfols/solver.py +156 -91
- dfols/trust_region.py +237 -7
- dfols/util.py +71 -10
- DFO_LS-1.2.1.dist-info/RECORD +0 -16
- DFO_LS-1.2.1.dist-info/zip-safe +0 -1
- dfols/version.py +0 -25
- {DFO_LS-1.2.1.dist-info → DFO_LS-1.5.0.dist-info}/LICENSE.txt +0 -0
dfols/model.py
CHANGED
|
@@ -36,13 +36,16 @@ import numpy as np
|
|
|
36
36
|
import scipy.linalg as LA
|
|
37
37
|
|
|
38
38
|
from .trust_region import trsbox_geometry
|
|
39
|
-
from .util import sumsq
|
|
39
|
+
from .util import sumsq, dykstra, remove_scaling
|
|
40
40
|
|
|
41
41
|
__all__ = ['Model']
|
|
42
42
|
|
|
43
|
+
module_logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
43
46
|
class Model(object):
|
|
44
|
-
def __init__(self, npt, x0, r0, xl, xu, r0_nsamples, n=None, m=None, abs_tol=1e-12, rel_tol=1e-20, precondition=True,
|
|
45
|
-
do_logging=True):
|
|
47
|
+
def __init__(self, npt, x0, r0, xl, xu, projections, r0_nsamples, h=None, argsh=(), n=None, m=None, abs_tol=1e-12, rel_tol=1e-20, precondition=True,
|
|
48
|
+
do_logging=True, scaling_changes=None):
|
|
46
49
|
if n is None:
|
|
47
50
|
n = len(x0)
|
|
48
51
|
if m is None:
|
|
@@ -53,27 +56,35 @@ class Model(object):
|
|
|
53
56
|
assert xu.shape == (n,), "xu has wrong shape (got %s, expect (%g,))" % (str(xu.shape), n)
|
|
54
57
|
assert r0.shape == (m,), "r0 has wrong shape (got %s, expect (%g,))" % (str(r0.shape), m)
|
|
55
58
|
self.do_logging = do_logging
|
|
59
|
+
self.scaling_changes = scaling_changes
|
|
56
60
|
self.dim = n
|
|
57
61
|
self.resid_dim = m
|
|
58
62
|
self.num_pts = npt
|
|
59
63
|
self.npt_so_far = 1 # number of points added so far (with function values)
|
|
60
64
|
|
|
65
|
+
self.h = h
|
|
66
|
+
self.argsh = argsh
|
|
67
|
+
|
|
61
68
|
# Initialise to blank some useful stuff
|
|
62
69
|
# Interpolation points
|
|
63
70
|
self.xbase = x0.copy()
|
|
64
71
|
self.sl = xl - self.xbase # lower bound w.r.t. xbase (require xpt >= sl)
|
|
65
72
|
self.su = xu - self.xbase # upper bound w.r.t. xbase (require xpt <= su)
|
|
73
|
+
self.projections = projections
|
|
66
74
|
self.points = np.zeros((npt, n)) # interpolation points w.r.t. xbase
|
|
67
75
|
|
|
68
76
|
# Function values
|
|
69
77
|
self.fval_v = np.inf * np.ones((npt, m)) # residuals for each xpt
|
|
70
78
|
self.fval_v[0, :] = r0.copy()
|
|
71
|
-
|
|
72
|
-
self.
|
|
79
|
+
|
|
80
|
+
self.objval = np.inf * np.ones((npt, )) # overall objective value for each xpt
|
|
81
|
+
self.objval[0] = sumsq(r0)
|
|
82
|
+
if h is not None:
|
|
83
|
+
self.objval[0] += h(remove_scaling(x0, self.scaling_changes), *argsh)
|
|
73
84
|
self.kopt = 0 # index of current iterate (should be best value so far)
|
|
74
|
-
self.nsamples = np.zeros((npt,), dtype=
|
|
85
|
+
self.nsamples = np.zeros((npt,), dtype=int) # number of samples used to evaluate objective at each point
|
|
75
86
|
self.nsamples[0] = r0_nsamples
|
|
76
|
-
self.
|
|
87
|
+
self.objbeg = self.objval[0] # f(x0), saved to check for sufficient reduction
|
|
77
88
|
|
|
78
89
|
# Termination criteria
|
|
79
90
|
self.abs_tol = abs_tol
|
|
@@ -86,7 +97,7 @@ class Model(object):
|
|
|
86
97
|
# Saved point (in absolute coordinates) - always check this value before quitting solver
|
|
87
98
|
self.xsave = None
|
|
88
99
|
self.rsave = None
|
|
89
|
-
self.
|
|
100
|
+
self.objsave = None
|
|
90
101
|
self.jacsave = None
|
|
91
102
|
self.nsamples_save = None
|
|
92
103
|
|
|
@@ -114,8 +125,8 @@ class Model(object):
|
|
|
114
125
|
def ropt(self):
|
|
115
126
|
return self.fval_v[self.kopt, :] # residuals for current iterate
|
|
116
127
|
|
|
117
|
-
def
|
|
118
|
-
return self.
|
|
128
|
+
def objopt(self):
|
|
129
|
+
return self.objval[self.kopt]
|
|
119
130
|
|
|
120
131
|
def xpt(self, k, abs_coordinates=False):
|
|
121
132
|
assert 0 <= k < self.npt(), "Invalid index %g" % k
|
|
@@ -123,18 +134,22 @@ class Model(object):
|
|
|
123
134
|
return np.minimum(np.maximum(self.sl, self.points[k, :].copy()), self.su)
|
|
124
135
|
else:
|
|
125
136
|
# Apply bounds and convert back to absolute coordinates
|
|
137
|
+
if self.projections:
|
|
138
|
+
return dykstra(self.projections, self.xbase + self.points[k,:])
|
|
126
139
|
return self.xbase + np.minimum(np.maximum(self.sl, self.points[k, :]), self.su)
|
|
127
140
|
|
|
128
141
|
def rvec(self, k):
|
|
129
142
|
assert 0 <= k < self.npt(), "Invalid index %g" % k
|
|
130
143
|
return self.fval_v[k, :]
|
|
131
144
|
|
|
132
|
-
def
|
|
145
|
+
def objval(self, k):
|
|
133
146
|
assert 0 <= k < self.npt(), "Invalid index %g" % k
|
|
134
|
-
return self.
|
|
147
|
+
return self.objval[k]
|
|
135
148
|
|
|
136
|
-
def as_absolute_coordinates(self, x):
|
|
149
|
+
def as_absolute_coordinates(self, x, full_dykstra=False):
|
|
137
150
|
# If x were an interpolation point, get the absolute coordinates of x
|
|
151
|
+
if self.projections:
|
|
152
|
+
return dykstra(self.projections, self.xbase + x)
|
|
138
153
|
return self.xbase + np.minimum(np.maximum(self.sl, x), self.su)
|
|
139
154
|
|
|
140
155
|
def xpt_directions(self, include_kopt=True):
|
|
@@ -169,18 +184,20 @@ class Model(object):
|
|
|
169
184
|
|
|
170
185
|
self.points[k, :] = x.copy()
|
|
171
186
|
self.fval_v[k, :] = rvec.copy()
|
|
172
|
-
self.
|
|
187
|
+
self.objval[k] = sumsq(rvec)
|
|
188
|
+
if self.h is not None:
|
|
189
|
+
self.objval[k] += self.h(remove_scaling(self.xbase + x, self.scaling_changes), *self.argsh)
|
|
173
190
|
self.nsamples[k] = 1
|
|
174
191
|
self.factorisation_current = False
|
|
175
192
|
|
|
176
|
-
if allow_kopt_update and self.
|
|
193
|
+
if allow_kopt_update and self.objval[k] < self.objopt():
|
|
177
194
|
self.kopt = k
|
|
178
195
|
return
|
|
179
196
|
|
|
180
197
|
def swap_points(self, k1, k2):
|
|
181
198
|
self.points[[k1, k2], :] = self.points[[k2, k1], :]
|
|
182
199
|
self.fval_v[[k1, k2], :] = self.fval_v[[k2, k1], :]
|
|
183
|
-
self.
|
|
200
|
+
self.objval[[k1, k2]] = self.objval[[k2, k1]]
|
|
184
201
|
if self.kopt == k1:
|
|
185
202
|
self.kopt = k2
|
|
186
203
|
elif self.kopt == k2:
|
|
@@ -193,22 +210,27 @@ class Model(object):
|
|
|
193
210
|
assert 0 <= k < self.npt(), "Invalid index %g" % k
|
|
194
211
|
t = float(self.nsamples[k]) / float(self.nsamples[k] + 1)
|
|
195
212
|
self.fval_v[k, :] = t * self.fval_v[k, :] + (1 - t) * rvec_extra
|
|
196
|
-
|
|
213
|
+
# NOTE: how to sample when we have h? still at xpt(k), then add h(xpt(k)). Modify test if incorrect!
|
|
214
|
+
self.objval[k] = sumsq(self.fval_v[k, :])
|
|
215
|
+
if self.h is not None:
|
|
216
|
+
self.objval[k] += self.h(remove_scaling(self.xbase + self.points[k, :], self.scaling_changes), *self.argsh)
|
|
197
217
|
self.nsamples[k] += 1
|
|
198
218
|
|
|
199
|
-
self.kopt = np.argmin(self.
|
|
219
|
+
self.kopt = np.argmin(self.objval[:self.npt()]) # make sure kopt is always the best value we have
|
|
200
220
|
return
|
|
201
221
|
|
|
202
222
|
def add_new_point(self, x, rvec):
|
|
203
223
|
self.points = np.append(self.points, x.reshape((1, self.n())), axis=0) # append row to xpt
|
|
204
224
|
self.fval_v = np.append(self.fval_v, rvec.reshape((1, self.m())), axis=0) # append row to fval_v
|
|
205
|
-
|
|
206
|
-
|
|
225
|
+
obj = sumsq(rvec)
|
|
226
|
+
if self.h is not None:
|
|
227
|
+
obj += self.h(remove_scaling(self.xbase + x, self.scaling_changes), *self.argsh)
|
|
228
|
+
self.objval = np.append(self.objval, obj) # append entry to fval
|
|
207
229
|
self.nsamples = np.append(self.nsamples, 1) # add new sample number
|
|
208
230
|
self.num_pts += 1 # make sure npt is updated
|
|
209
231
|
self.npt_so_far += 1
|
|
210
232
|
|
|
211
|
-
if
|
|
233
|
+
if obj < self.objopt():
|
|
212
234
|
self.kopt = self.npt() - 1
|
|
213
235
|
|
|
214
236
|
self.factorisation_current = False
|
|
@@ -228,11 +250,14 @@ class Model(object):
|
|
|
228
250
|
return
|
|
229
251
|
|
|
230
252
|
def save_point(self, x, rvec, nsamples, x_in_abs_coords=True):
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
253
|
+
xabs = x.copy() if x_in_abs_coords else self.as_absolute_coordinates(x)
|
|
254
|
+
obj = sumsq(rvec)
|
|
255
|
+
if self.h is not None:
|
|
256
|
+
obj += self.h(remove_scaling(xabs, self.scaling_changes), *self.argsh)
|
|
257
|
+
if self.objsave is None or obj <= self.objsave:
|
|
258
|
+
self.xsave = xabs
|
|
234
259
|
self.rsave = rvec.copy()
|
|
235
|
-
self.
|
|
260
|
+
self.objsave = obj
|
|
236
261
|
self.jacsave = self.model_jac.copy()
|
|
237
262
|
self.nsamples_save = nsamples
|
|
238
263
|
return True
|
|
@@ -240,15 +265,15 @@ class Model(object):
|
|
|
240
265
|
return False # this value is worse than what we have already - didn't save
|
|
241
266
|
|
|
242
267
|
def get_final_results(self):
|
|
243
|
-
# Return x and
|
|
244
|
-
if self.
|
|
245
|
-
return self.xopt(abs_coordinates=True).copy(), self.ropt().copy(), self.
|
|
268
|
+
# Return x and objval for optimal point (either from xsave+objsave or kopt)
|
|
269
|
+
if self.objsave is None or self.objopt() <= self.objsave: # optimal has changed since xsave+objsave were last set
|
|
270
|
+
return self.xopt(abs_coordinates=True).copy(), self.ropt().copy(), self.objopt(), self.model_jac.copy(), self.nsamples[self.kopt]
|
|
246
271
|
else:
|
|
247
|
-
return self.xsave.copy(), self.rsave.copy(), self.
|
|
272
|
+
return self.xsave.copy(), self.rsave.copy(), self.objsave, self.jacsave, self.nsamples_save
|
|
248
273
|
|
|
249
274
|
def min_objective_value(self):
|
|
250
275
|
# Get termination criterion for f small: f <= abs_tol or f <= rel_tol * f0
|
|
251
|
-
return max(self.abs_tol, self.rel_tol * self.
|
|
276
|
+
return max(self.abs_tol, self.rel_tol * self.objbeg)
|
|
252
277
|
|
|
253
278
|
def model_value(self, d, d_based_at_xopt=True, with_const_term=False):
|
|
254
279
|
if d_based_at_xopt:
|
|
@@ -301,12 +326,12 @@ class Model(object):
|
|
|
301
326
|
return col_scale(LA.solve_triangular(self.R, Qb), self.right_scaling)
|
|
302
327
|
else:
|
|
303
328
|
if self.do_logging:
|
|
304
|
-
|
|
329
|
+
module_logger.warning("model.solve_geom_system not using factorisation")
|
|
305
330
|
W, left_scaling, right_scaling = self.interpolation_matrix()
|
|
306
331
|
return col_scale(LA.lstsq(W, col_scale(rhs * left_scaling))[0], right_scaling)
|
|
307
332
|
|
|
308
333
|
def interpolate_mini_models_svd(self, verbose=False, make_full_rank=False, min_sing_val=1e-6, sing_val_frac=1.0, max_jac_cond=1e8,
|
|
309
|
-
get_chg_J=False):
|
|
334
|
+
get_chg_J=False, throw_error_on_nans=False):
|
|
310
335
|
W, left_scaling, right_scaling = self.interpolation_matrix()
|
|
311
336
|
self.factorise_geom_system()
|
|
312
337
|
ls_interp_cond_num = np.linalg.cond(W) if verbose else 0.0 # scipy.linalg does not have condition number!
|
|
@@ -327,12 +352,18 @@ class Model(object):
|
|
|
327
352
|
self.model_jac = np.dot(self.model_jac, np.dot(Qhat, Qhat.T))
|
|
328
353
|
|
|
329
354
|
rhs = self.fval_v[fval_row_idx, :] # size npt * m
|
|
355
|
+
if np.any(np.isnan(rhs)) and throw_error_on_nans:
|
|
356
|
+
if self.do_logging:
|
|
357
|
+
module_logger.warning("model.interpolate_mini_models_svd: NaNs encountered in objective evaluations, raising error")
|
|
358
|
+
raise np.linalg.LinAlgError("NaN encountered in objective evaluations")
|
|
330
359
|
try:
|
|
331
360
|
dg = self.solve_geom_system(rhs) # size (n+1)*m
|
|
332
361
|
except LA.LinAlgError:
|
|
333
362
|
return False, None, None, None, None # flag error
|
|
334
363
|
except ValueError:
|
|
335
364
|
return False, None, None, None, None # flag error (e.g. inf or NaN encountered)
|
|
365
|
+
if not np.all(np.isfinite(dg)): # another check for inf or NaN
|
|
366
|
+
return False, None, None, None, None
|
|
336
367
|
J_old = self.model_jac.copy()
|
|
337
368
|
self.model_jac = dg[1:,:].T
|
|
338
369
|
self.model_const = dg[0,:] - np.dot(self.model_jac, xopt) # shift base to xbase
|
|
@@ -361,7 +392,7 @@ class Model(object):
|
|
|
361
392
|
return True, interp_error, sqrt(norm_J_error), linalg_resid, ls_interp_cond_num # flag ok
|
|
362
393
|
|
|
363
394
|
def build_full_model(self):
|
|
364
|
-
# Build full least squares
|
|
395
|
+
# Build full least squares model from mini-models
|
|
365
396
|
# Centred around xopt
|
|
366
397
|
r = self.model_const + np.dot(self.model_jac, self.xopt()) # constant term (for inexact interpolation)
|
|
367
398
|
J = self.model_jac
|
dfols/params.py
CHANGED
|
@@ -44,6 +44,7 @@ class ParameterList(object):
|
|
|
44
44
|
self.params["init.random_directions_make_orthogonal"] = True # although random > orthogonal, avoid for init
|
|
45
45
|
# Interpolation
|
|
46
46
|
self.params["interpolation.precondition"] = True
|
|
47
|
+
self.params["interpolation.throw_error_on_nans"] = False # throw numpy.linalg.LinAlgError if interpolating to nan data?
|
|
47
48
|
# Logging
|
|
48
49
|
self.params["logging.n_to_print_whole_x_vector"] = 6
|
|
49
50
|
self.params["logging.save_diagnostic_info"] = False
|
|
@@ -81,7 +82,7 @@ class ParameterList(object):
|
|
|
81
82
|
self.params["restarts.use_soft_restarts"] = True
|
|
82
83
|
self.params["restarts.soft.num_geom_steps"] = 3
|
|
83
84
|
self.params["restarts.soft.move_xk"] = True
|
|
84
|
-
self.params["restarts.soft.max_fake_successful_steps"] = maxfun # number ratio>0 steps below
|
|
85
|
+
self.params["restarts.soft.max_fake_successful_steps"] = maxfun # number ratio>0 steps below objsave allowed
|
|
85
86
|
self.params["restarts.hard.use_old_rk"] = True # recycle r(xk) from previous run?
|
|
86
87
|
self.params["restarts.increase_npt"] = False
|
|
87
88
|
self.params["restarts.increase_npt_amt"] = 1
|
|
@@ -108,7 +109,20 @@ class ParameterList(object):
|
|
|
108
109
|
self.params["growing.full_rank.min_sing_val"] = 1e-6 # absolute floor on singular values
|
|
109
110
|
self.params["growing.full_rank.svd_max_jac_cond"] = 1e8 # maximum condition number of Jacobian
|
|
110
111
|
self.params["growing.perturb_trust_region_step"] = False # add random direction onto TRS solution?
|
|
111
|
-
|
|
112
|
+
|
|
113
|
+
# Dykstra's algorithm
|
|
114
|
+
self.params["dykstra.d_tol"] = 1e-10
|
|
115
|
+
self.params["dykstra.max_iters"] = 100
|
|
116
|
+
|
|
117
|
+
# Matrix rank algorithm
|
|
118
|
+
self.params["matrix_rank.r_tol"] = 1e-18
|
|
119
|
+
|
|
120
|
+
# Function tolerance when applying S-FISTA method
|
|
121
|
+
self.params["func_tol.criticality_measure"] = 1e-3
|
|
122
|
+
self.params["func_tol.tr_step"] = 1-1e-1
|
|
123
|
+
self.params["func_tol.max_iters"] = 500
|
|
124
|
+
self.params["sfista.max_iters_scaling"] = 2.0
|
|
125
|
+
|
|
112
126
|
self.params_changed = {}
|
|
113
127
|
for p in self.params:
|
|
114
128
|
self.params_changed[p] = False
|
|
@@ -142,6 +156,8 @@ class ParameterList(object):
|
|
|
142
156
|
type_str, nonetype_ok, lower, upper = 'bool', False, None, None
|
|
143
157
|
elif key == "interpolation.precondition":
|
|
144
158
|
type_str, nonetype_ok, lower, upper = 'bool', False, None, None
|
|
159
|
+
elif key == "interpolation.throw_error_on_nans":
|
|
160
|
+
type_str, nonetype_ok, lower, upper = 'bool', False, None, None
|
|
145
161
|
elif key == "logging.n_to_print_whole_x_vector":
|
|
146
162
|
type_str, nonetype_ok, lower, upper = 'int', False, 0, None
|
|
147
163
|
elif key == "logging.save_diagnostic_info":
|
|
@@ -254,6 +270,20 @@ class ParameterList(object):
|
|
|
254
270
|
type_str, nonetype_ok, lower, upper = 'float', True, 1.0, None
|
|
255
271
|
elif key == "growing.perturb_trust_region_step":
|
|
256
272
|
type_str, nonetype_ok, lower, upper = 'bool', False, None, None
|
|
273
|
+
elif key == "dykstra.d_tol":
|
|
274
|
+
type_str, nonetype_ok, lower, upper = 'float', False, 0.0, None
|
|
275
|
+
elif key == "dykstra.max_iters":
|
|
276
|
+
type_str, nonetype_ok, lower, upper = 'int', False, 0, None
|
|
277
|
+
elif key == "matrix_rank.r_tol":
|
|
278
|
+
type_str, nonetype_ok, lower, upper = 'float', False, 0.0, None
|
|
279
|
+
elif key == "func_tol.criticality_measure":
|
|
280
|
+
type_str, nonetype_ok, lower, upper = 'float', False, 0.0, 1.0
|
|
281
|
+
elif key == "func_tol.tr_step":
|
|
282
|
+
type_str, nonetype_ok, lower, upper = 'float', False, 0.0, 1.0
|
|
283
|
+
elif key == "func_tol.max_iters":
|
|
284
|
+
type_str, nonetype_ok, lower, upper = 'int', False, 0, None
|
|
285
|
+
elif key == "sfista.max_iters_scaling":
|
|
286
|
+
type_str, nonetype_ok, lower, upper = 'float', False, 1.0, None
|
|
257
287
|
else:
|
|
258
288
|
assert False, "ParameterList.param_type() has unknown key: %s" % key
|
|
259
289
|
return type_str, nonetype_ok, lower, upper
|