pyGSTi 0.9.12__cp38-cp38-win_amd64.whl → 0.9.13__cp38-cp38-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyGSTi-0.9.13.dist-info/METADATA +185 -0
- {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/RECORD +211 -220
- {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/WHEEL +1 -1
- pygsti/_version.py +2 -2
- pygsti/algorithms/contract.py +1 -1
- pygsti/algorithms/core.py +62 -35
- pygsti/algorithms/fiducialpairreduction.py +95 -110
- pygsti/algorithms/fiducialselection.py +17 -8
- pygsti/algorithms/gaugeopt.py +2 -2
- pygsti/algorithms/germselection.py +87 -77
- pygsti/algorithms/mirroring.py +0 -388
- pygsti/algorithms/randomcircuit.py +165 -1333
- pygsti/algorithms/rbfit.py +0 -234
- pygsti/baseobjs/basis.py +94 -396
- pygsti/baseobjs/errorgenbasis.py +0 -132
- pygsti/baseobjs/errorgenspace.py +0 -10
- pygsti/baseobjs/label.py +52 -168
- pygsti/baseobjs/opcalc/fastopcalc.cp38-win_amd64.pyd +0 -0
- pygsti/baseobjs/opcalc/fastopcalc.pyx +2 -2
- pygsti/baseobjs/polynomial.py +13 -595
- pygsti/baseobjs/protectedarray.py +72 -132
- pygsti/baseobjs/statespace.py +1 -0
- pygsti/circuits/__init__.py +1 -1
- pygsti/circuits/circuit.py +753 -504
- pygsti/circuits/circuitconstruction.py +0 -4
- pygsti/circuits/circuitlist.py +47 -5
- pygsti/circuits/circuitparser/__init__.py +8 -8
- pygsti/circuits/circuitparser/fastcircuitparser.cp38-win_amd64.pyd +0 -0
- pygsti/circuits/circuitstructure.py +3 -3
- pygsti/circuits/cloudcircuitconstruction.py +27 -14
- pygsti/data/datacomparator.py +4 -9
- pygsti/data/dataset.py +51 -46
- pygsti/data/hypothesistest.py +0 -7
- pygsti/drivers/bootstrap.py +0 -49
- pygsti/drivers/longsequence.py +46 -10
- pygsti/evotypes/basereps_cython.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/chp/opreps.py +0 -61
- pygsti/evotypes/chp/statereps.py +0 -32
- pygsti/evotypes/densitymx/effectcreps.cpp +9 -10
- pygsti/evotypes/densitymx/effectreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/densitymx/effectreps.pyx +1 -1
- pygsti/evotypes/densitymx/opreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/densitymx/opreps.pyx +2 -2
- pygsti/evotypes/densitymx/statereps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/densitymx/statereps.pyx +1 -1
- pygsti/evotypes/densitymx_slow/effectreps.py +7 -23
- pygsti/evotypes/densitymx_slow/opreps.py +16 -23
- pygsti/evotypes/densitymx_slow/statereps.py +10 -3
- pygsti/evotypes/evotype.py +39 -2
- pygsti/evotypes/stabilizer/effectreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/stabilizer/effectreps.pyx +0 -4
- pygsti/evotypes/stabilizer/opreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/stabilizer/opreps.pyx +0 -4
- pygsti/evotypes/stabilizer/statereps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/stabilizer/statereps.pyx +1 -5
- pygsti/evotypes/stabilizer/termreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/stabilizer/termreps.pyx +0 -7
- pygsti/evotypes/stabilizer_slow/effectreps.py +0 -22
- pygsti/evotypes/stabilizer_slow/opreps.py +0 -4
- pygsti/evotypes/stabilizer_slow/statereps.py +0 -4
- pygsti/evotypes/statevec/effectreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/statevec/effectreps.pyx +1 -1
- pygsti/evotypes/statevec/opreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/statevec/opreps.pyx +2 -2
- pygsti/evotypes/statevec/statereps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/statevec/statereps.pyx +1 -1
- pygsti/evotypes/statevec/termreps.cp38-win_amd64.pyd +0 -0
- pygsti/evotypes/statevec/termreps.pyx +0 -7
- pygsti/evotypes/statevec_slow/effectreps.py +0 -3
- pygsti/evotypes/statevec_slow/opreps.py +0 -5
- pygsti/extras/__init__.py +0 -1
- pygsti/extras/drift/signal.py +1 -1
- pygsti/extras/drift/stabilityanalyzer.py +3 -1
- pygsti/extras/interpygate/__init__.py +12 -0
- pygsti/extras/interpygate/core.py +0 -36
- pygsti/extras/interpygate/process_tomography.py +44 -10
- pygsti/extras/rpe/rpeconstruction.py +0 -2
- pygsti/forwardsims/__init__.py +1 -0
- pygsti/forwardsims/forwardsim.py +50 -93
- pygsti/forwardsims/mapforwardsim.py +78 -20
- pygsti/forwardsims/mapforwardsim_calc_densitymx.cp38-win_amd64.pyd +0 -0
- pygsti/forwardsims/mapforwardsim_calc_densitymx.pyx +65 -66
- pygsti/forwardsims/mapforwardsim_calc_generic.py +91 -13
- pygsti/forwardsims/matrixforwardsim.py +72 -17
- pygsti/forwardsims/termforwardsim.py +9 -111
- pygsti/forwardsims/termforwardsim_calc_stabilizer.cp38-win_amd64.pyd +0 -0
- pygsti/forwardsims/termforwardsim_calc_statevec.cp38-win_amd64.pyd +0 -0
- pygsti/forwardsims/termforwardsim_calc_statevec.pyx +0 -651
- pygsti/forwardsims/torchfwdsim.py +265 -0
- pygsti/forwardsims/weakforwardsim.py +2 -2
- pygsti/io/__init__.py +1 -2
- pygsti/io/mongodb.py +0 -2
- pygsti/io/stdinput.py +6 -22
- pygsti/layouts/copalayout.py +10 -12
- pygsti/layouts/distlayout.py +0 -40
- pygsti/layouts/maplayout.py +103 -25
- pygsti/layouts/matrixlayout.py +99 -60
- pygsti/layouts/prefixtable.py +1534 -52
- pygsti/layouts/termlayout.py +1 -1
- pygsti/modelmembers/instruments/instrument.py +3 -3
- pygsti/modelmembers/instruments/tpinstrument.py +2 -2
- pygsti/modelmembers/modelmember.py +0 -17
- pygsti/modelmembers/operations/__init__.py +3 -4
- pygsti/modelmembers/operations/affineshiftop.py +206 -0
- pygsti/modelmembers/operations/composederrorgen.py +1 -1
- pygsti/modelmembers/operations/composedop.py +1 -24
- pygsti/modelmembers/operations/denseop.py +5 -5
- pygsti/modelmembers/operations/eigpdenseop.py +2 -2
- pygsti/modelmembers/operations/embeddederrorgen.py +1 -1
- pygsti/modelmembers/operations/embeddedop.py +0 -1
- pygsti/modelmembers/operations/experrorgenop.py +5 -2
- pygsti/modelmembers/operations/fullarbitraryop.py +1 -0
- pygsti/modelmembers/operations/fullcptpop.py +2 -2
- pygsti/modelmembers/operations/fulltpop.py +28 -6
- pygsti/modelmembers/operations/fullunitaryop.py +5 -4
- pygsti/modelmembers/operations/lindbladcoefficients.py +93 -78
- pygsti/modelmembers/operations/lindbladerrorgen.py +268 -441
- pygsti/modelmembers/operations/linearop.py +7 -27
- pygsti/modelmembers/operations/opfactory.py +1 -1
- pygsti/modelmembers/operations/repeatedop.py +1 -24
- pygsti/modelmembers/operations/staticstdop.py +1 -1
- pygsti/modelmembers/povms/__init__.py +3 -3
- pygsti/modelmembers/povms/basepovm.py +7 -36
- pygsti/modelmembers/povms/complementeffect.py +4 -9
- pygsti/modelmembers/povms/composedeffect.py +0 -320
- pygsti/modelmembers/povms/computationaleffect.py +1 -1
- pygsti/modelmembers/povms/computationalpovm.py +3 -1
- pygsti/modelmembers/povms/effect.py +3 -5
- pygsti/modelmembers/povms/marginalizedpovm.py +3 -81
- pygsti/modelmembers/povms/tppovm.py +74 -2
- pygsti/modelmembers/states/__init__.py +2 -5
- pygsti/modelmembers/states/composedstate.py +0 -317
- pygsti/modelmembers/states/computationalstate.py +3 -3
- pygsti/modelmembers/states/cptpstate.py +4 -4
- pygsti/modelmembers/states/densestate.py +10 -8
- pygsti/modelmembers/states/fullpurestate.py +0 -24
- pygsti/modelmembers/states/purestate.py +1 -1
- pygsti/modelmembers/states/state.py +5 -6
- pygsti/modelmembers/states/tpstate.py +28 -10
- pygsti/modelmembers/term.py +3 -6
- pygsti/modelmembers/torchable.py +50 -0
- pygsti/modelpacks/_modelpack.py +1 -1
- pygsti/modelpacks/smq1Q_ZN.py +3 -1
- pygsti/modelpacks/smq2Q_XXYYII.py +2 -1
- pygsti/modelpacks/smq2Q_XY.py +3 -3
- pygsti/modelpacks/smq2Q_XYI.py +2 -2
- pygsti/modelpacks/smq2Q_XYICNOT.py +3 -3
- pygsti/modelpacks/smq2Q_XYICPHASE.py +3 -3
- pygsti/modelpacks/smq2Q_XYXX.py +1 -1
- pygsti/modelpacks/smq2Q_XYZICNOT.py +3 -3
- pygsti/modelpacks/smq2Q_XYZZ.py +1 -1
- pygsti/modelpacks/stdtarget.py +0 -121
- pygsti/models/cloudnoisemodel.py +1 -2
- pygsti/models/explicitcalc.py +3 -3
- pygsti/models/explicitmodel.py +3 -13
- pygsti/models/fogistore.py +5 -3
- pygsti/models/localnoisemodel.py +1 -2
- pygsti/models/memberdict.py +0 -12
- pygsti/models/model.py +801 -68
- pygsti/models/modelconstruction.py +4 -4
- pygsti/models/modelnoise.py +2 -2
- pygsti/models/modelparaminterposer.py +1 -1
- pygsti/models/oplessmodel.py +1 -1
- pygsti/models/qutrit.py +15 -14
- pygsti/objectivefns/objectivefns.py +75 -140
- pygsti/objectivefns/wildcardbudget.py +2 -7
- pygsti/optimize/__init__.py +1 -0
- pygsti/optimize/arraysinterface.py +28 -0
- pygsti/optimize/customcg.py +0 -12
- pygsti/optimize/customlm.py +129 -323
- pygsti/optimize/customsolve.py +2 -2
- pygsti/optimize/optimize.py +0 -84
- pygsti/optimize/simplerlm.py +841 -0
- pygsti/optimize/wildcardopt.py +19 -598
- pygsti/protocols/confidenceregionfactory.py +28 -14
- pygsti/protocols/estimate.py +31 -14
- pygsti/protocols/gst.py +238 -142
- pygsti/protocols/modeltest.py +19 -12
- pygsti/protocols/protocol.py +9 -37
- pygsti/protocols/rb.py +450 -79
- pygsti/protocols/treenode.py +8 -2
- pygsti/protocols/vb.py +108 -206
- pygsti/protocols/vbdataframe.py +1 -1
- pygsti/report/factory.py +0 -15
- pygsti/report/fogidiagram.py +1 -17
- pygsti/report/modelfunction.py +12 -3
- pygsti/report/mpl_colormaps.py +1 -1
- pygsti/report/plothelpers.py +11 -3
- pygsti/report/report.py +16 -0
- pygsti/report/reportables.py +41 -37
- pygsti/report/templates/offline/pygsti_dashboard.css +6 -0
- pygsti/report/templates/offline/pygsti_dashboard.js +12 -0
- pygsti/report/workspace.py +2 -14
- pygsti/report/workspaceplots.py +328 -505
- pygsti/tools/basistools.py +9 -36
- pygsti/tools/edesigntools.py +124 -96
- pygsti/tools/fastcalc.cp38-win_amd64.pyd +0 -0
- pygsti/tools/fastcalc.pyx +35 -81
- pygsti/tools/internalgates.py +151 -15
- pygsti/tools/jamiolkowski.py +5 -5
- pygsti/tools/lindbladtools.py +19 -11
- pygsti/tools/listtools.py +0 -114
- pygsti/tools/matrixmod2.py +1 -1
- pygsti/tools/matrixtools.py +173 -339
- pygsti/tools/nameddict.py +1 -1
- pygsti/tools/optools.py +154 -88
- pygsti/tools/pdftools.py +0 -25
- pygsti/tools/rbtheory.py +3 -320
- pygsti/tools/slicetools.py +64 -12
- pyGSTi-0.9.12.dist-info/METADATA +0 -157
- pygsti/algorithms/directx.py +0 -711
- pygsti/evotypes/qibo/__init__.py +0 -33
- pygsti/evotypes/qibo/effectreps.py +0 -78
- pygsti/evotypes/qibo/opreps.py +0 -376
- pygsti/evotypes/qibo/povmreps.py +0 -98
- pygsti/evotypes/qibo/statereps.py +0 -174
- pygsti/extras/rb/__init__.py +0 -13
- pygsti/extras/rb/benchmarker.py +0 -957
- pygsti/extras/rb/dataset.py +0 -378
- pygsti/extras/rb/io.py +0 -814
- pygsti/extras/rb/simulate.py +0 -1020
- pygsti/io/legacyio.py +0 -385
- pygsti/modelmembers/povms/denseeffect.py +0 -142
- {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/LICENSE +0 -0
- {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,841 @@
|
|
1
|
+
"""
|
2
|
+
Custom implementation of the Levenberg-Marquardt Algorithm (but simpler than customlm.py)
|
3
|
+
"""
|
4
|
+
#***************************************************************************************************
|
5
|
+
# Copyright 2015, 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
6
|
+
# Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights
|
7
|
+
# in this software.
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
9
|
+
# in compliance with the License. You may obtain a copy of the License at
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
|
11
|
+
#***************************************************************************************************
|
12
|
+
|
13
|
+
import os as _os
|
14
|
+
import signal as _signal
|
15
|
+
import time as _time
|
16
|
+
|
17
|
+
import numpy as _np
|
18
|
+
import scipy as _scipy
|
19
|
+
|
20
|
+
from pygsti.optimize import arraysinterface as _ari
|
21
|
+
from pygsti.optimize.customsolve import custom_solve as _custom_solve
|
22
|
+
from pygsti.baseobjs.verbosityprinter import VerbosityPrinter as _VerbosityPrinter
|
23
|
+
from pygsti.baseobjs.resourceallocation import ResourceAllocation as _ResourceAllocation
|
24
|
+
from pygsti.baseobjs.nicelyserializable import NicelySerializable as _NicelySerializable
|
25
|
+
from pygsti.objectivefns.objectivefns import Chi2Function, TimeIndependentMDCObjectiveFunction
|
26
|
+
from typing import Callable
|
27
|
+
|
28
|
+
#Make sure SIGINT will generate a KeyboardInterrupt (even if we're launched in the background)
|
29
|
+
#This may be problematic for multithreaded parallelism above pyGSTi, e.g. Dask,
|
30
|
+
#so this can be turned off by setting the PYGSTI_NO_CUSTOMLM_SIGINT environment variable
|
31
|
+
if 'PYGSTI_NO_CUSTOMLM_SIGINT' not in _os.environ:
|
32
|
+
_signal.signal(_signal.SIGINT, _signal.default_int_handler)
|
33
|
+
|
34
|
+
#constants
|
35
|
+
_MACH_PRECISION = 1e-12
|
36
|
+
|
37
|
+
|
38
|
+
class OptimizerResult(object):
|
39
|
+
"""
|
40
|
+
The result from an optimization.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
objective_func : ObjectiveFunction
|
45
|
+
The objective function that was optimized.
|
46
|
+
|
47
|
+
opt_x : numpy.ndarray
|
48
|
+
The optimal argument (x) value. Often a vector of parameters.
|
49
|
+
|
50
|
+
opt_f : numpy.ndarray
|
51
|
+
the optimal objective function (f) value. Often this is the least-squares
|
52
|
+
vector of objective function values.
|
53
|
+
|
54
|
+
opt_jtj : numpy.ndarray, optional
|
55
|
+
the optimial `dot(transpose(J),J)` value, where `J`
|
56
|
+
is the Jacobian matrix. This may be useful for computing
|
57
|
+
approximate error bars.
|
58
|
+
|
59
|
+
opt_unpenalized_f : numpy.ndarray, optional
|
60
|
+
the optimal objective function (f) value with any
|
61
|
+
penalty terms removed.
|
62
|
+
|
63
|
+
chi2_k_distributed_qty : float, optional
|
64
|
+
a value that is supposed to be chi2_k distributed.
|
65
|
+
|
66
|
+
optimizer_specific_qtys : dict, optional
|
67
|
+
a dictionary of additional optimization parameters.
|
68
|
+
"""
|
69
|
+
def __init__(self, objective_func, opt_x, opt_f=None, opt_jtj=None,
|
70
|
+
opt_unpenalized_f=None, chi2_k_distributed_qty=None,
|
71
|
+
optimizer_specific_qtys=None):
|
72
|
+
self.objective_func = objective_func
|
73
|
+
self.x = opt_x
|
74
|
+
self.f = opt_f
|
75
|
+
self.jtj = opt_jtj # jacobian.T * jacobian
|
76
|
+
self.f_no_penalties = opt_unpenalized_f
|
77
|
+
self.optimizer_specific_qtys = optimizer_specific_qtys
|
78
|
+
self.chi2_k_distributed_qty = chi2_k_distributed_qty
|
79
|
+
|
80
|
+
|
81
|
+
class Optimizer(_NicelySerializable):
|
82
|
+
"""
|
83
|
+
An optimizer. Optimizes an objective function.
|
84
|
+
"""
|
85
|
+
|
86
|
+
@classmethod
|
87
|
+
def cast(cls, obj):
|
88
|
+
"""
|
89
|
+
Cast `obj` to a :class:`Optimizer`.
|
90
|
+
|
91
|
+
If `obj` is already an `Optimizer` it is just returned,
|
92
|
+
otherwise this function tries to create a new object
|
93
|
+
using `obj` as a dictionary of constructor arguments.
|
94
|
+
|
95
|
+
Parameters
|
96
|
+
----------
|
97
|
+
obj : Optimizer or dict
|
98
|
+
The object to cast.
|
99
|
+
|
100
|
+
Returns
|
101
|
+
-------
|
102
|
+
Optimizer
|
103
|
+
"""
|
104
|
+
if isinstance(obj, cls):
|
105
|
+
return obj
|
106
|
+
else:
|
107
|
+
return cls(**obj) if obj else cls()
|
108
|
+
|
109
|
+
def __init__(self):
|
110
|
+
super().__init__()
|
111
|
+
|
112
|
+
|
113
|
+
class SimplerLMOptimizer(Optimizer):
|
114
|
+
"""
|
115
|
+
A Levenberg-Marquardt optimizer customized for GST-like problems.
|
116
|
+
|
117
|
+
Parameters
|
118
|
+
----------
|
119
|
+
maxiter : int, optional
|
120
|
+
The maximum number of (outer) interations.
|
121
|
+
|
122
|
+
maxfev : int, optional
|
123
|
+
The maximum function evaluations.
|
124
|
+
|
125
|
+
tol : float or dict, optional
|
126
|
+
The tolerance, specified as a single float or as a dict
|
127
|
+
with keys `{'relx', 'relf', 'jac', 'maxdx'}`. A single
|
128
|
+
float sets the `'relf'` and `'jac'` elemments and leaves
|
129
|
+
the others at their default values.
|
130
|
+
|
131
|
+
fditer : int optional
|
132
|
+
Internally compute the Jacobian using a finite-difference method
|
133
|
+
for the first `fditer` iterations. This is useful when the initial
|
134
|
+
point lies at a special or singular point where the analytic Jacobian
|
135
|
+
is misleading.
|
136
|
+
|
137
|
+
first_fditer : int, optional
|
138
|
+
Number of finite-difference iterations applied to the first
|
139
|
+
stage of the optimization (only). Unused.
|
140
|
+
|
141
|
+
init_munu : tuple, optional
|
142
|
+
If not None, a (mu, nu) tuple of 2 floats giving the initial values
|
143
|
+
for mu and nu.
|
144
|
+
|
145
|
+
oob_check_interval : int, optional
|
146
|
+
Every `oob_check_interval` outer iterations, the objective function
|
147
|
+
(`obj_fn`) is called with a second argument 'oob_check', set to True.
|
148
|
+
In this case, `obj_fn` can raise a ValueError exception to indicate
|
149
|
+
that it is Out Of Bounds. If `oob_check_interval` is 0 then this
|
150
|
+
check is never performed; if 1 then it is always performed.
|
151
|
+
|
152
|
+
oob_action : {"reject","stop"}
|
153
|
+
What to do when the objective function indicates (by raising a ValueError
|
154
|
+
as described above). `"reject"` means the step is rejected but the
|
155
|
+
optimization proceeds; `"stop"` means the optimization stops and returns
|
156
|
+
as converged at the last known-in-bounds point.
|
157
|
+
|
158
|
+
oob_check_mode : int, optional
|
159
|
+
An advanced option, expert use only. If 0 then the optimization is
|
160
|
+
halted as soon as an *attempt* is made to evaluate the function out of bounds.
|
161
|
+
If 1 then the optimization is halted only when a would-be *accepted* step
|
162
|
+
is out of bounds.
|
163
|
+
|
164
|
+
serial_solve_proc_threshold : int, optional
|
165
|
+
When there are fewer than this many processors, the optimizer will solve linear
|
166
|
+
systems serially, using SciPy on a single processor, rather than using a parallelized
|
167
|
+
Gaussian Elimination (with partial pivoting) algorithm coded in Python. Since SciPy's
|
168
|
+
implementation is more efficient, it's not worth using the parallel version until there
|
169
|
+
are many processors to spread the work among.
|
170
|
+
|
171
|
+
lsvec_mode : {'normal', 'percircuit'}
|
172
|
+
Whether the terms used in the least-squares optimization are the "elements" as computed
|
173
|
+
by the objective function's `.terms()` and `.lsvec()` methods (`'normal'` mode) or the
|
174
|
+
"per-circuit quantities" computed by the objective function's `.percircuit()` and
|
175
|
+
`.lsvec_percircuit()` methods (`'percircuit'` mode).
|
176
|
+
"""
|
177
|
+
|
178
|
+
@classmethod
|
179
|
+
def cast(cls, obj):
|
180
|
+
if isinstance(obj, cls):
|
181
|
+
return obj
|
182
|
+
if obj:
|
183
|
+
try:
|
184
|
+
return cls(**obj)
|
185
|
+
except:
|
186
|
+
from pygsti.optimize.customlm import CustomLMOptimizer
|
187
|
+
return CustomLMOptimizer(**obj)
|
188
|
+
return cls()
|
189
|
+
|
190
|
+
def __init__(self, maxiter=100, maxfev=100, tol=1e-6, fditer=0, first_fditer=0, init_munu="auto", oob_check_interval=0,
|
191
|
+
oob_action="reject", oob_check_mode=0, serial_solve_proc_threshold=100, lsvec_mode="normal"):
|
192
|
+
|
193
|
+
super().__init__()
|
194
|
+
if isinstance(tol, float): tol = {'relx': 1e-8, 'relf': tol, 'f': 1.0, 'jac': tol, 'maxdx': 1.0}
|
195
|
+
self.maxiter = maxiter
|
196
|
+
self.maxfev = maxfev
|
197
|
+
self.tol = tol
|
198
|
+
self.fditer = fditer
|
199
|
+
self.first_fditer = first_fditer
|
200
|
+
self.init_munu = init_munu
|
201
|
+
self.oob_check_interval = oob_check_interval
|
202
|
+
self.oob_action = oob_action
|
203
|
+
self.oob_check_mode = oob_check_mode
|
204
|
+
self.array_types = 3 * ('p',) + ('e', 'ep') # see simplish_leastsq fn "-type"s -need to add 'jtj' type
|
205
|
+
self.called_objective_methods = ('lsvec', 'dlsvec') # the objective function methods we use (for mem estimate)
|
206
|
+
self.serial_solve_proc_threshold = serial_solve_proc_threshold
|
207
|
+
self.lsvec_mode = lsvec_mode
|
208
|
+
|
209
|
+
def _to_nice_serialization(self):
|
210
|
+
state = super()._to_nice_serialization()
|
211
|
+
state.update({
|
212
|
+
'maximum_iterations': self.maxiter,
|
213
|
+
'maximum_function_evaluations': self.maxfev,
|
214
|
+
'tolerance': self.tol,
|
215
|
+
'number_of_finite_difference_iterations': self.fditer,
|
216
|
+
'number_of_first_stage_finite_difference_iterations': self.first_fditer,
|
217
|
+
'initial_mu_and_nu': self.init_munu,
|
218
|
+
'out_of_bounds_check_interval': self.oob_check_interval,
|
219
|
+
'out_of_bounds_action': self.oob_action,
|
220
|
+
'out_of_bounds_check_mode': self.oob_check_mode,
|
221
|
+
'array_types': self.array_types,
|
222
|
+
'called_objective_function_methods': self.called_objective_methods,
|
223
|
+
'serial_solve_number_of_processors_threshold': self.serial_solve_proc_threshold,
|
224
|
+
'lsvec_mode': self.lsvec_mode
|
225
|
+
})
|
226
|
+
return state
|
227
|
+
|
228
|
+
@classmethod
|
229
|
+
def _from_nice_serialization(cls, state):
|
230
|
+
return cls(maxiter=state['maximum_iterations'],
|
231
|
+
maxfev=state['maximum_function_evaluations'],
|
232
|
+
tol=state['tolerance'],
|
233
|
+
fditer=state['number_of_finite_difference_iterations'],
|
234
|
+
first_fditer=state['number_of_first_stage_finite_difference_iterations'],
|
235
|
+
init_munu=state['initial_mu_and_nu'],
|
236
|
+
oob_check_interval=state['out_of_bounds_check_interval'],
|
237
|
+
oob_action=state['out_of_bounds_action'],
|
238
|
+
oob_check_mode=state['out_of_bounds_check_mode'],
|
239
|
+
serial_solve_proc_threshold=state['serial_solve_number_of_processors_threshold'],
|
240
|
+
lsvec_mode=state.get('lsvec_mode', 'normal'))
|
241
|
+
|
242
|
+
def run(self, objective: TimeIndependentMDCObjectiveFunction, profiler, printer):
|
243
|
+
|
244
|
+
"""
|
245
|
+
Perform the optimization.
|
246
|
+
|
247
|
+
Parameters
|
248
|
+
----------
|
249
|
+
objective : ObjectiveFunction
|
250
|
+
The objective function to optimize.
|
251
|
+
|
252
|
+
profiler : Profiler
|
253
|
+
A profiler to track resource usage.
|
254
|
+
|
255
|
+
printer : VerbosityPrinter
|
256
|
+
printer to use for sending output to stdout.
|
257
|
+
"""
|
258
|
+
nExtra = objective.ex # number of additional "extra" elements
|
259
|
+
|
260
|
+
if self.lsvec_mode == 'normal':
|
261
|
+
objective_func = objective.lsvec
|
262
|
+
jacobian = objective.dlsvec
|
263
|
+
nEls = objective.layout.num_elements + nExtra # 'e' for array types
|
264
|
+
elif self.lsvec_mode == 'percircuit':
|
265
|
+
objective_func = objective.lsvec_percircuit
|
266
|
+
jacobian = objective.dlsvec_percircuit
|
267
|
+
nEls = objective.layout.num_circuits + nExtra # 'e' for array types
|
268
|
+
else:
|
269
|
+
raise ValueError("Invalid `lsvec_mode`: %s" % str(self.lsvec_mode))
|
270
|
+
|
271
|
+
x0 = objective.model.to_vector()
|
272
|
+
x_limits = objective.model.parameter_bounds
|
273
|
+
# x_limits should be a (num_params, 2)-shaped array, holding on each row the (min, max) values for the
|
274
|
+
# corresponding parameter (element of the "x" vector) or `None`. If `None`, then no limits are imposed.
|
275
|
+
|
276
|
+
# Check memory limit can handle what simplish_leastsq will "allocate"
|
277
|
+
nP = len(x0) # 'p' for array types
|
278
|
+
objective.resource_alloc.check_can_allocate_memory(3 * nP + nEls + nEls * nP + nP * nP) # see array_types above
|
279
|
+
|
280
|
+
from ..layouts.distlayout import DistributableCOPALayout as _DL
|
281
|
+
if isinstance(objective.layout, _DL):
|
282
|
+
ari = _ari.DistributedArraysInterface(objective.layout, self.lsvec_mode, nExtra)
|
283
|
+
else:
|
284
|
+
ari = _ari.UndistributedArraysInterface(nEls, nP)
|
285
|
+
|
286
|
+
opt_x, converged, msg, mu, nu, norm_f, f = simplish_leastsq(
|
287
|
+
objective_func, jacobian, x0,
|
288
|
+
max_iter=self.maxiter,
|
289
|
+
num_fd_iters=self.fditer,
|
290
|
+
f_norm2_tol=self.tol.get('f', 1.0),
|
291
|
+
jac_norm_tol=self.tol.get('jac', 1e-6),
|
292
|
+
rel_ftol=self.tol.get('relf', 1e-6),
|
293
|
+
rel_xtol=self.tol.get('relx', 1e-8),
|
294
|
+
max_dx_scale=self.tol.get('maxdx', 1.0),
|
295
|
+
init_munu=self.init_munu,
|
296
|
+
oob_check_interval=self.oob_check_interval,
|
297
|
+
oob_action=self.oob_action,
|
298
|
+
oob_check_mode=self.oob_check_mode,
|
299
|
+
resource_alloc=objective.resource_alloc,
|
300
|
+
arrays_interface=ari,
|
301
|
+
serial_solve_proc_threshold=self.serial_solve_proc_threshold,
|
302
|
+
x_limits=x_limits,
|
303
|
+
verbosity=printer - 1, profiler=profiler)
|
304
|
+
|
305
|
+
printer.log("Least squares message = %s" % msg, 2)
|
306
|
+
assert(converged), "Failed to converge: %s" % msg
|
307
|
+
current_v = objective.model.to_vector()
|
308
|
+
if not _np.allclose(current_v, opt_x): # ensure the last model evaluation was at opt_x
|
309
|
+
objective_func(opt_x)
|
310
|
+
#objective.model.from_vector(opt_x) # performed within line above
|
311
|
+
|
312
|
+
#DEBUG CHECK SYNC between procs (especially for shared mem) - could REMOVE
|
313
|
+
# if objective.resource_alloc.comm is not None:
|
314
|
+
# comm = objective.resource_alloc.comm
|
315
|
+
# v_cmp = comm.bcast(objective.model.to_vector() if (comm.Get_rank() == 0) else None, root=0)
|
316
|
+
# v_matches_x = _np.allclose(objective.model.to_vector(), opt_x)
|
317
|
+
# same_as_root = _np.isclose(_np.linalg.norm(objective.model.to_vector() - v_cmp), 0.0)
|
318
|
+
# if not (v_matches_x and same_as_root):
|
319
|
+
# raise ValueError("Rank %d CUSTOMLM ERROR: END model vector-matches-x=%s and vector-is-same-as-root=%s"
|
320
|
+
# % (comm.rank, str(v_matches_x), str(same_as_root)))
|
321
|
+
# comm.barrier() # if we get past here, then *all* processors are OK
|
322
|
+
# if comm.rank == 0:
|
323
|
+
# print("OK - model vector == best_x and all vectors agree w/root proc's")
|
324
|
+
|
325
|
+
unpenalized_f = f[0:-objective.ex] if (objective.ex > 0) else f
|
326
|
+
unpenalized_normf = sum(unpenalized_f**2) # objective function without penalty factors
|
327
|
+
chi2k_qty = objective.chi2k_distributed_qty(norm_f)
|
328
|
+
optimizer_specific_qtys = {'msg': msg, 'mu': mu, 'nu': nu, 'fvec': f}
|
329
|
+
return OptimizerResult(objective, opt_x, norm_f, None, unpenalized_normf, chi2k_qty, optimizer_specific_qtys)
|
330
|
+
|
331
|
+
|
332
|
+
|
333
|
+
def damp_coeff_update(mu, nu, half_max_nu, reject_msg, printer):
|
334
|
+
############################################################################################
|
335
|
+
#
|
336
|
+
# if this point is reached, either the linear solve failed
|
337
|
+
# or the error did not reduce. In either case, reject increment.
|
338
|
+
#
|
339
|
+
############################################################################################
|
340
|
+
mu *= nu
|
341
|
+
if nu > half_max_nu: # watch for nu getting too large (&overflow)
|
342
|
+
msg = "Stopping after nu overflow!"
|
343
|
+
else:
|
344
|
+
msg = ""
|
345
|
+
nu = 2 * nu
|
346
|
+
printer.log(" Rejected%s! mu => mu*nu = %g, nu => 2*nu = %g" % (reject_msg, mu, nu), 2)
|
347
|
+
return mu, nu, msg
|
348
|
+
|
349
|
+
|
350
|
+
def jac_guarded(k: int, num_fd_iters: int, obj_fn: Callable, jac_fn: Callable, f, ari, global_x, fdJac_work):
|
351
|
+
if k >= num_fd_iters:
|
352
|
+
Jac = jac_fn(global_x) # 'EP'-type, but doesn't actually allocate any more mem (!)
|
353
|
+
else:
|
354
|
+
# Note: x holds only number of "fine"-division params - need to use global_x, and
|
355
|
+
# Jac only holds a subset of the derivative and element columns and rows, respectively.
|
356
|
+
f_fixed = f.copy() # a static part of the distributed `f` resturned by obj_fn - MUST copy this.
|
357
|
+
|
358
|
+
pslice = ari.jac_param_slice(only_if_leader=True)
|
359
|
+
eps = 1e-7
|
360
|
+
#Don't do this: for ii, i in enumerate(range(pslice.start, pslice.stop)): (must keep procs in sync)
|
361
|
+
for i in range(len(global_x)):
|
362
|
+
x_plus_dx = global_x.copy()
|
363
|
+
x_plus_dx[i] += eps
|
364
|
+
fd = (obj_fn(x_plus_dx) - f_fixed) / eps
|
365
|
+
if pslice.start <= i < pslice.stop:
|
366
|
+
fdJac_work[:, i - pslice.start] = fd
|
367
|
+
#if comm is not None: comm.barrier() # overkill for shared memory leader host barrier
|
368
|
+
Jac = fdJac_work
|
369
|
+
return Jac
|
370
|
+
|
371
|
+
|
372
|
+
|
373
|
+
def simplish_leastsq(
|
374
|
+
obj_fn, jac_fn, x0, f_norm2_tol=1e-6, jac_norm_tol=1e-6,
|
375
|
+
rel_ftol=1e-6, rel_xtol=1e-6, max_iter=100, num_fd_iters=0, max_dx_scale=1.0,
|
376
|
+
init_munu="auto", oob_check_interval=0, oob_action="reject", oob_check_mode=0,
|
377
|
+
resource_alloc=None, arrays_interface=None, serial_solve_proc_threshold=100,
|
378
|
+
x_limits=None, verbosity=0, profiler=None
|
379
|
+
):
|
380
|
+
"""
|
381
|
+
An implementation of the Levenberg-Marquardt least-squares optimization algorithm customized for use within pyGSTi.
|
382
|
+
|
383
|
+
This general purpose routine mimic to a large extent the interface used by
|
384
|
+
`scipy.optimize.leastsq`, though it implements a newer (and more robust) version
|
385
|
+
of the algorithm.
|
386
|
+
|
387
|
+
Parameters
|
388
|
+
----------
|
389
|
+
obj_fn : function
|
390
|
+
The objective function. Must accept and return 1D numpy ndarrays of
|
391
|
+
length N and M respectively. Same form as scipy.optimize.leastsq.
|
392
|
+
|
393
|
+
jac_fn : function
|
394
|
+
The jacobian function (not optional!). Accepts a 1D array of length N
|
395
|
+
and returns an array of shape (M,N).
|
396
|
+
|
397
|
+
x0 : numpy.ndarray
|
398
|
+
Initial evaluation point.
|
399
|
+
|
400
|
+
f_norm2_tol : float, optional
|
401
|
+
Tolerace for `F^2` where `F = `norm( sum(obj_fn(x)**2) )` is the
|
402
|
+
least-squares residual. If `F**2 < f_norm2_tol`, then mark converged.
|
403
|
+
|
404
|
+
jac_norm_tol : float, optional
|
405
|
+
Tolerance for jacobian norm, namely if `infn(dot(J.T,f)) < jac_norm_tol`
|
406
|
+
then mark converged, where `infn` is the infinity-norm and
|
407
|
+
`f = obj_fn(x)`.
|
408
|
+
|
409
|
+
rel_ftol : float, optional
|
410
|
+
Tolerance on the relative reduction in `F^2`, that is, if
|
411
|
+
`d(F^2)/F^2 < rel_ftol` then mark converged.
|
412
|
+
|
413
|
+
rel_xtol : float, optional
|
414
|
+
Tolerance on the relative value of `|x|`, so that if
|
415
|
+
`d(|x|)/|x| < rel_xtol` then mark converged.
|
416
|
+
|
417
|
+
max_iter : int, optional
|
418
|
+
The maximum number of (outer) interations.
|
419
|
+
|
420
|
+
num_fd_iters : int optional
|
421
|
+
Internally compute the Jacobian using a finite-difference method
|
422
|
+
for the first `num_fd_iters` iterations. This is useful when `x0`
|
423
|
+
lies at a special or singular point where the analytic Jacobian is
|
424
|
+
misleading.
|
425
|
+
|
426
|
+
max_dx_scale : float, optional
|
427
|
+
If not None, impose a limit on the magnitude of the step, so that
|
428
|
+
`|dx|^2 < max_dx_scale^2 * len(dx)` (so elements of `dx` should be,
|
429
|
+
roughly, less than `max_dx_scale`).
|
430
|
+
|
431
|
+
init_munu : tuple, optional
|
432
|
+
If not None, a (mu, nu) tuple of 2 floats giving the initial values
|
433
|
+
for mu and nu.
|
434
|
+
|
435
|
+
oob_check_interval : int, optional
|
436
|
+
Every `oob_check_interval` outer iterations, the objective function
|
437
|
+
(`obj_fn`) is called with a second argument 'oob_check', set to True.
|
438
|
+
In this case, `obj_fn` can raise a ValueError exception to indicate
|
439
|
+
that it is Out Of Bounds. If `oob_check_interval` is 0 then this
|
440
|
+
check is never performed; if 1 then it is always performed.
|
441
|
+
|
442
|
+
oob_action : {"reject","stop"}
|
443
|
+
What to do when the objective function indicates (by raising a ValueError
|
444
|
+
as described above). `"reject"` means the step is rejected but the
|
445
|
+
optimization proceeds; `"stop"` means the optimization stops and returns
|
446
|
+
as converged at the last known-in-bounds point.
|
447
|
+
|
448
|
+
oob_check_mode : int, optional
|
449
|
+
An advanced option, expert use only. If 0 then the optimization is
|
450
|
+
halted as soon as an *attempt* is made to evaluate the function out of bounds.
|
451
|
+
If 1 then the optimization is halted only when a would-be *accepted* step
|
452
|
+
is out of bounds.
|
453
|
+
|
454
|
+
resource_alloc : ResourceAllocation, optional
|
455
|
+
When not None, an resource allocation object used for distributing the computation
|
456
|
+
across multiple processors.
|
457
|
+
|
458
|
+
arrays_interface : ArraysInterface
|
459
|
+
An object that provides an interface for creating and manipulating data arrays.
|
460
|
+
|
461
|
+
serial_solve_proc_threshold : int optional
|
462
|
+
When there are fewer than this many processors, the optimizer will solve linear
|
463
|
+
systems serially, using SciPy on a single processor, rather than using a parallelized
|
464
|
+
Gaussian Elimination (with partial pivoting) algorithm coded in Python. Since SciPy's
|
465
|
+
implementation is more efficient, it's not worth using the parallel version until there
|
466
|
+
are many processors to spread the work among.
|
467
|
+
|
468
|
+
x_limits : numpy.ndarray, optional
|
469
|
+
A (num_params, 2)-shaped array, holding on each row the (min, max) values for the corresponding
|
470
|
+
parameter (element of the "x" vector). If `None`, then no limits are imposed.
|
471
|
+
|
472
|
+
verbosity : int, optional
|
473
|
+
Amount of detail to print to stdout.
|
474
|
+
|
475
|
+
profiler : Profiler, optional
|
476
|
+
A profiler object used for to track timing and memory usage.
|
477
|
+
|
478
|
+
Returns
|
479
|
+
-------
|
480
|
+
x : numpy.ndarray
|
481
|
+
The optimal solution.
|
482
|
+
converged : bool
|
483
|
+
Whether the solution converged.
|
484
|
+
msg : str
|
485
|
+
A message indicating why the solution converged (or didn't).
|
486
|
+
"""
|
487
|
+
resource_alloc = _ResourceAllocation.cast(resource_alloc)
|
488
|
+
comm = resource_alloc.comm
|
489
|
+
printer = _VerbosityPrinter.create_printer(verbosity, comm)
|
490
|
+
ari = arrays_interface # shorthand
|
491
|
+
|
492
|
+
msg = ""
|
493
|
+
converged = False
|
494
|
+
half_max_nu = 2**62 # what should this be??
|
495
|
+
tau = 1e-3
|
496
|
+
|
497
|
+
#Allocate potentially shared memory used in loop
|
498
|
+
JTJ = ari.allocate_jtj()
|
499
|
+
minus_JTf = ari.allocate_jtf()
|
500
|
+
x = ari.allocate_jtf()
|
501
|
+
best_x = ari.allocate_jtf()
|
502
|
+
dx = ari.allocate_jtf()
|
503
|
+
new_x = ari.allocate_jtf()
|
504
|
+
optional_jtj_buff = ari.allocate_jtj_shared_mem_buf()
|
505
|
+
fdJac = ari.allocate_jac() if num_fd_iters > 0 else None
|
506
|
+
|
507
|
+
global_x = x0.copy()
|
508
|
+
ari.allscatter_x(global_x, x)
|
509
|
+
global_new_x = global_x.copy()
|
510
|
+
best_x[:] = x[:]
|
511
|
+
# ^ like x.copy() -the x-value corresponding to min_norm_f ('P'-type)
|
512
|
+
|
513
|
+
if x_limits is not None:
|
514
|
+
x_lower_limits = ari.allocate_jtf()
|
515
|
+
x_upper_limits = ari.allocate_jtf()
|
516
|
+
ari.allscatter_x(x_limits[:, 0], x_lower_limits)
|
517
|
+
ari.allscatter_x(x_limits[:, 1], x_upper_limits)
|
518
|
+
max_norm_dx = (max_dx_scale**2) * len(global_x) if max_dx_scale else None
|
519
|
+
# ^ don't let any component change by more than ~max_dx_scale
|
520
|
+
|
521
|
+
|
522
|
+
f = obj_fn(global_x) # 'E'-type array
|
523
|
+
norm_f = ari.norm2_f(f)
|
524
|
+
if not _np.isfinite(norm_f):
|
525
|
+
msg = "Infinite norm of objective function at initial point!"
|
526
|
+
|
527
|
+
if len(global_x) == 0: # a model with 0 parameters - nothing to optimize
|
528
|
+
msg = "No parameters to optimize"
|
529
|
+
converged = True
|
530
|
+
|
531
|
+
mu, nu = (1, 2) if init_munu == 'auto' else init_munu
|
532
|
+
# ^ We have to set some *some* values in case we exit at the start of the first
|
533
|
+
# iteration. mu will almost certainly be overwritten before being read.
|
534
|
+
min_norm_f = 1e100 # sentinel
|
535
|
+
best_x_state = (mu, nu, norm_f, f.copy())
|
536
|
+
# ^ here and elsewhere, need f.copy() b/c f is objfn mem
|
537
|
+
|
538
|
+
try:
|
539
|
+
|
540
|
+
for k in range(max_iter): # outer loop
|
541
|
+
# assume global_x, x, f, fnorm hold valid values
|
542
|
+
|
543
|
+
if len(msg) > 0:
|
544
|
+
break # exit outer loop if an exit-message has been set
|
545
|
+
|
546
|
+
if norm_f < f_norm2_tol:
|
547
|
+
if oob_check_interval <= 1:
|
548
|
+
msg = "Sum of squares is at most %g" % f_norm2_tol
|
549
|
+
converged = True
|
550
|
+
break
|
551
|
+
else:
|
552
|
+
printer.log(("** Converged with out-of-bounds with check interval=%d, reverting to last know in-bounds point and setting interval=1 **") % oob_check_interval, 2)
|
553
|
+
oob_check_interval = 1
|
554
|
+
x[:] = best_x[:]
|
555
|
+
mu, nu, norm_f, f[:] = best_x_state
|
556
|
+
continue
|
557
|
+
|
558
|
+
if profiler: profiler.memory_check("simplish_leastsq: begin outer iter")
|
559
|
+
|
560
|
+
Jac = jac_guarded(k, num_fd_iters, obj_fn, jac_fn, f, ari, global_x, fdJac)
|
561
|
+
|
562
|
+
if profiler:
|
563
|
+
jac_gb = Jac.nbytes/(1024.0**3) if hasattr(Jac, 'nbytes') else _np.NaN
|
564
|
+
vals = ((f.size, global_x.size), jac_gb)
|
565
|
+
profiler.memory_check("simplish_leastsq: after jacobian: shape=%s, GB=%.2f" % vals)
|
566
|
+
|
567
|
+
Jnorm = _np.sqrt(ari.norm2_jac(Jac))
|
568
|
+
xnorm = _np.sqrt(ari.norm2_x(x))
|
569
|
+
printer.log("--- Outer Iter %d: norm_f = %g, mu=%g, |x|=%g, |J|=%g" % (k, norm_f, mu, xnorm, Jnorm))
|
570
|
+
|
571
|
+
tm = _time.time()
|
572
|
+
|
573
|
+
# Riley note: fill_JTJ is the first place where we try to access J as a dense matrix.
|
574
|
+
ari.fill_jtj(Jac, JTJ, optional_jtj_buff)
|
575
|
+
ari.fill_jtf(Jac, f, minus_JTf) # 'P'-type
|
576
|
+
minus_JTf *= -1
|
577
|
+
|
578
|
+
if profiler: profiler.add_time("simplish_leastsq: dotprods", tm)
|
579
|
+
|
580
|
+
norm_JTf = ari.infnorm_x(minus_JTf)
|
581
|
+
norm_x = ari.norm2_x(x)
|
582
|
+
pre_reg_data = ari.jtj_pre_regularization_data(JTJ)
|
583
|
+
|
584
|
+
if norm_JTf < jac_norm_tol:
|
585
|
+
if oob_check_interval <= 1:
|
586
|
+
msg = "norm(jacobian) is at most %g" % jac_norm_tol
|
587
|
+
converged = True
|
588
|
+
break
|
589
|
+
else:
|
590
|
+
printer.log(("** Converged with out-of-bounds with check interval=%d, reverting to last know in-bounds point and setting interval=1 **") % oob_check_interval, 2)
|
591
|
+
oob_check_interval = 1
|
592
|
+
x[:] = best_x[:]
|
593
|
+
mu, nu, norm_f, f[:] = best_x_state
|
594
|
+
continue
|
595
|
+
|
596
|
+
if k == 0:
|
597
|
+
max_jtj_diag = ari.jtj_max_diagonal_element(JTJ)
|
598
|
+
mu, nu = (tau * max_jtj_diag, 2) if init_munu == 'auto' else init_munu
|
599
|
+
best_x_state = (mu, nu, norm_f, f.copy())
|
600
|
+
|
601
|
+
#determing increment using adaptive damping
|
602
|
+
while True: # inner loop
|
603
|
+
|
604
|
+
if profiler: profiler.memory_check("simplish_leastsq: begin inner iter")
|
605
|
+
|
606
|
+
# ok if assume fine-param-proc.size == 1 (otherwise need to sync setting local JTJ)
|
607
|
+
ari.jtj_update_regularization(JTJ, pre_reg_data, mu)
|
608
|
+
|
609
|
+
#assert(_np.isfinite(JTJ).all()), "Non-finite JTJ (inner)!" # NaNs tracking
|
610
|
+
#assert(_np.isfinite(minus_JTf).all()), "Non-finite minus_JTf (inner)!" # NaNs tracking
|
611
|
+
|
612
|
+
try:
|
613
|
+
if profiler: profiler.memory_check("simplish_leastsq: before linsolve")
|
614
|
+
tm = _time.time()
|
615
|
+
_custom_solve(JTJ, minus_JTf, dx, ari, resource_alloc, serial_solve_proc_threshold)
|
616
|
+
if profiler: profiler.add_time("simplish_leastsq: linsolve", tm)
|
617
|
+
except _scipy.linalg.LinAlgError:
|
618
|
+
reject_msg = " (LinSolve Failure)"
|
619
|
+
mu, nu, msg = damp_coeff_update(mu, nu, half_max_nu, reject_msg, printer)
|
620
|
+
if len(msg) == 0:
|
621
|
+
continue
|
622
|
+
else:
|
623
|
+
break
|
624
|
+
|
625
|
+
reject_msg = ""
|
626
|
+
if profiler: profiler.memory_check("simplish_leastsq: after linsolve")
|
627
|
+
|
628
|
+
new_x[:] = x + dx
|
629
|
+
norm_dx = ari.norm2_x(dx)
|
630
|
+
|
631
|
+
#ensure dx isn't too large - don't let any component change by more than ~max_dx_scale
|
632
|
+
if max_norm_dx and norm_dx > max_norm_dx:
|
633
|
+
dx *= _np.sqrt(max_norm_dx / norm_dx)
|
634
|
+
new_x[:] = x + dx
|
635
|
+
norm_dx = ari.norm2_x(dx)
|
636
|
+
|
637
|
+
#apply x limits (bounds)
|
638
|
+
if x_limits is not None:
|
639
|
+
# Approach 1: project x into valid space by simply clipping out-of-bounds values
|
640
|
+
for i, (x_el, lower, upper) in enumerate(zip(x, x_lower_limits, x_upper_limits)):
|
641
|
+
if new_x[i] < lower:
|
642
|
+
new_x[i] = lower
|
643
|
+
dx[i] = lower - x_el
|
644
|
+
elif new_x[i] > upper:
|
645
|
+
new_x[i] = upper
|
646
|
+
dx[i] = upper - x_el
|
647
|
+
norm_dx = ari.norm2_x(dx)
|
648
|
+
|
649
|
+
printer.log(" - Inner Loop: mu=%g, norm_dx=%g" % (mu, norm_dx), 2)
|
650
|
+
|
651
|
+
if norm_dx < (rel_xtol**2) * norm_x:
|
652
|
+
if oob_check_interval <= 1:
|
653
|
+
msg = "Relative change, |dx|/|x|, is at most %g" % rel_xtol
|
654
|
+
converged = True
|
655
|
+
break
|
656
|
+
else:
|
657
|
+
printer.log(("** Converged with out-of-bounds with check interval=%d, reverting to last know in-bounds point and setting interval=1 **") % oob_check_interval, 2)
|
658
|
+
oob_check_interval = 1
|
659
|
+
x[:] = best_x[:]
|
660
|
+
mu, nu, norm_f, f[:] = best_x_state
|
661
|
+
break
|
662
|
+
elif (norm_x + rel_xtol) < norm_dx * (_MACH_PRECISION**2):
|
663
|
+
msg = "(near-)singular linear system"
|
664
|
+
break
|
665
|
+
|
666
|
+
if oob_check_mode == 0 and oob_check_interval > 0:
|
667
|
+
if k % oob_check_interval == 0:
|
668
|
+
#Check to see if objective function is out of bounds
|
669
|
+
|
670
|
+
in_bounds = []
|
671
|
+
ari.allgather_x(new_x, global_new_x)
|
672
|
+
try:
|
673
|
+
new_f = obj_fn(global_new_x, oob_check=True)
|
674
|
+
except ValueError: # Use this to mean - "not allowed, but don't stop"
|
675
|
+
in_bounds.append(False)
|
676
|
+
else:
|
677
|
+
in_bounds.append(True)
|
678
|
+
|
679
|
+
if any(in_bounds): # In adaptive mode, proceed if *any* cases are in-bounds
|
680
|
+
new_x_is_known_inbounds = True
|
681
|
+
else:
|
682
|
+
MIN_STOP_ITER = 1 # the minimum iteration where an OOB objective stops the optimization
|
683
|
+
if oob_action == "reject" or k < MIN_STOP_ITER:
|
684
|
+
reject_msg = " (out-of-bounds)"
|
685
|
+
mu, nu, msg = damp_coeff_update(mu, nu, half_max_nu, reject_msg, printer)
|
686
|
+
if len(msg) == 0:
|
687
|
+
continue
|
688
|
+
else:
|
689
|
+
break
|
690
|
+
elif oob_action == "stop":
|
691
|
+
if oob_check_interval == 1:
|
692
|
+
msg = "Objective function out-of-bounds! STOP"
|
693
|
+
converged = True
|
694
|
+
break
|
695
|
+
else: # reset to last know in-bounds point and not do oob check every step
|
696
|
+
printer.log(("** Hit out-of-bounds with check interval=%d, reverting to last know in-bounds point and setting interval=1 **") % oob_check_interval, 2)
|
697
|
+
oob_check_interval = 1
|
698
|
+
x[:] = best_x[:]
|
699
|
+
mu, nu, norm_f, f[:] = best_x_state
|
700
|
+
break # restart next outer loop
|
701
|
+
else:
|
702
|
+
raise ValueError("Invalid `oob_action`: '%s'" % oob_action)
|
703
|
+
else: # don't check this time
|
704
|
+
ari.allgather_x(new_x, global_new_x)
|
705
|
+
new_f = obj_fn(global_new_x, oob_check=False)
|
706
|
+
new_x_is_known_inbounds = False
|
707
|
+
else:
|
708
|
+
#Just evaluate objective function normally; never check for in-bounds condition
|
709
|
+
ari.allgather_x(new_x, global_new_x)
|
710
|
+
new_f = obj_fn(global_new_x)
|
711
|
+
new_x_is_known_inbounds = oob_check_interval == 0
|
712
|
+
# ^ assume in bounds if we have no out-of-bounds checks.
|
713
|
+
|
714
|
+
norm_new_f = ari.norm2_f(new_f)
|
715
|
+
if not _np.isfinite(norm_new_f): # avoid infinite loop...
|
716
|
+
msg = "Infinite norm of objective function!"
|
717
|
+
break
|
718
|
+
|
719
|
+
# dL = expected decrease in ||F||^2 from linear model
|
720
|
+
dL = ari.dot_x(dx, mu * dx + minus_JTf)
|
721
|
+
dF = norm_f - norm_new_f # actual decrease in ||F||^2
|
722
|
+
|
723
|
+
printer.log(" (cont): norm_new_f=%g, dL=%g, dF=%g, reldL=%g, reldF=%g" % (norm_new_f, dL, dF, dL / norm_f, dF / norm_f), 2)
|
724
|
+
|
725
|
+
if dL / norm_f < rel_ftol and dF >= 0 and dF / norm_f < rel_ftol and dF / dL < 2.0:
|
726
|
+
if oob_check_interval <= 1: # (if 0 then no oob checking is done)
|
727
|
+
msg = "Both actual and predicted relative reductions in the sum of squares are at most %g" % rel_ftol
|
728
|
+
converged = True
|
729
|
+
break
|
730
|
+
else:
|
731
|
+
printer.log(("** Converged with out-of-bounds with check interval=%d, reverting to last know in-bounds point and setting interval=1 **") % oob_check_interval, 2)
|
732
|
+
oob_check_interval = 1
|
733
|
+
x[:] = best_x[:]
|
734
|
+
mu, nu, norm_f, f[:] = best_x_state
|
735
|
+
break
|
736
|
+
|
737
|
+
if (dL <= 0 or dF <= 0):
|
738
|
+
reject_msg = " (out-of-bounds)"
|
739
|
+
mu, nu, msg = damp_coeff_update(mu, nu, half_max_nu, reject_msg, printer)
|
740
|
+
if len(msg) == 0:
|
741
|
+
continue
|
742
|
+
else:
|
743
|
+
break
|
744
|
+
|
745
|
+
#Check whether an otherwise acceptable solution is in-bounds
|
746
|
+
if oob_check_mode == 1 and oob_check_interval > 0 and k % oob_check_interval == 0:
|
747
|
+
#Check to see if objective function is out of bounds
|
748
|
+
try:
|
749
|
+
obj_fn(global_new_x, oob_check=True) # don't actually need return val (== new_f)
|
750
|
+
new_x_is_known_inbounds = True
|
751
|
+
except ValueError: # Use this to mean - "not allowed, but don't stop"
|
752
|
+
MIN_STOP_ITER = 1 # the minimum iteration where an OOB objective can stops the opt.
|
753
|
+
if oob_action == "reject" or k < MIN_STOP_ITER:
|
754
|
+
reject_msg = " (out-of-bounds)"
|
755
|
+
mu, nu, msg = damp_coeff_update(mu, nu, half_max_nu, reject_msg, printer)
|
756
|
+
if len(msg) == 0:
|
757
|
+
continue
|
758
|
+
else:
|
759
|
+
break
|
760
|
+
elif oob_action == "stop":
|
761
|
+
if oob_check_interval == 1:
|
762
|
+
msg = "Objective function out-of-bounds! STOP"
|
763
|
+
converged = True
|
764
|
+
break
|
765
|
+
else: # reset to last know in-bounds point and not do oob check every step
|
766
|
+
printer.log(("** Hit out-of-bounds with check interval=%d, reverting to last know in-bounds point and setting interval=1 **") % oob_check_interval, 2)
|
767
|
+
oob_check_interval = 1
|
768
|
+
x[:] = best_x[:]
|
769
|
+
mu, nu, norm_f, f[:] = best_x_state
|
770
|
+
break # restart next outer loop
|
771
|
+
else:
|
772
|
+
raise ValueError("Invalid `oob_action`: '%s'" % oob_action)
|
773
|
+
|
774
|
+
# reduction in error: increment accepted!
|
775
|
+
# ^ Note: if we ever reach this line, then we know that we'll be breaking from the loop.
|
776
|
+
t = 1.0 - (2 * dF / dL - 1.0)**3 # dF/dL == gain ratio
|
777
|
+
# always reduce mu for accepted step when |dx| is small
|
778
|
+
mu_factor = max(t, 1.0 / 3.0) if norm_dx > 1e-8 else 0.3
|
779
|
+
mu *= mu_factor
|
780
|
+
nu = 2
|
781
|
+
x[:] = new_x[:]
|
782
|
+
f[:] = new_f[:]
|
783
|
+
norm_f = norm_new_f
|
784
|
+
global_x[:] = global_new_x[:]
|
785
|
+
printer.log(" Accepted%s! gain ratio=%g mu * %g => %g" % ("", dF / dL, mu_factor, mu), 2)
|
786
|
+
if new_x_is_known_inbounds and norm_f < min_norm_f:
|
787
|
+
min_norm_f = norm_f
|
788
|
+
best_x[:] = x[:]
|
789
|
+
best_x_state = (mu, nu, norm_f, f.copy())
|
790
|
+
|
791
|
+
#assert(_np.isfinite(x).all()), "Non-finite x!" # NaNs tracking
|
792
|
+
#assert(_np.isfinite(f).all()), "Non-finite f!" # NaNs tracking
|
793
|
+
|
794
|
+
break
|
795
|
+
# ^ exit inner loop normally ...
|
796
|
+
# end of inner loop
|
797
|
+
# end of outer loop
|
798
|
+
else:
|
799
|
+
#if no break stmt hit, then we've exceeded max_iter
|
800
|
+
msg = "Maximum iterations (%d) exceeded" % max_iter
|
801
|
+
converged = True # call result "converged" even in this case, but issue warning:
|
802
|
+
printer.warning("Treating result as *converged* after maximum iterations (%d) were exceeded." % max_iter)
|
803
|
+
|
804
|
+
except KeyboardInterrupt:
|
805
|
+
if comm is not None:
|
806
|
+
# ensure all procs agree on what best_x is (in case the interrupt occurred around x being updated)
|
807
|
+
comm.Bcast(best_x, root=0)
|
808
|
+
printer.log("Rank %d caught keyboard interrupt! Returning the current solution as being *converged*."
|
809
|
+
% comm.Get_rank())
|
810
|
+
else:
|
811
|
+
printer.log("Caught keyboard interrupt! Returning the current solution as being *converged*.")
|
812
|
+
msg = "Keyboard interrupt!"
|
813
|
+
converged = True
|
814
|
+
|
815
|
+
if comm is not None:
|
816
|
+
comm.barrier() # Just to be safe, so procs stay synchronized and we don't free anything too soon
|
817
|
+
|
818
|
+
ari.deallocate_jtj(JTJ)
|
819
|
+
ari.deallocate_jtf(minus_JTf)
|
820
|
+
ari.deallocate_jtf(x)
|
821
|
+
ari.deallocate_jtj_shared_mem_buf(optional_jtj_buff)
|
822
|
+
|
823
|
+
if x_limits is not None:
|
824
|
+
ari.deallocate_jtf(x_lower_limits)
|
825
|
+
ari.deallocate_jtf(x_upper_limits)
|
826
|
+
|
827
|
+
ari.deallocate_jtf(dx)
|
828
|
+
ari.deallocate_jtf(new_x)
|
829
|
+
|
830
|
+
if fdJac is not None:
|
831
|
+
ari.deallocate_jac(fdJac)
|
832
|
+
|
833
|
+
ari.allgather_x(best_x, global_x)
|
834
|
+
ari.deallocate_jtf(best_x)
|
835
|
+
|
836
|
+
mu, nu, norm_f, f[:] = best_x_state
|
837
|
+
|
838
|
+
global_f = _np.empty(ari.global_num_elements(), 'd')
|
839
|
+
ari.allgather_f(f, global_f)
|
840
|
+
|
841
|
+
return global_x, converged, msg, mu, nu, norm_f, global_f
|