pyGSTi 0.9.12.1__cp38-cp38-win32.whl → 0.9.13__cp38-cp38-win32.whl
Sign up to get free protection for your applications and to get access to all the features.
- pyGSTi-0.9.13.dist-info/METADATA +185 -0
- {pyGSTi-0.9.12.1.dist-info → pyGSTi-0.9.13.dist-info}/RECORD +207 -217
- {pyGSTi-0.9.12.1.dist-info → pyGSTi-0.9.13.dist-info}/WHEEL +1 -1
- pygsti/_version.py +2 -2
- pygsti/algorithms/contract.py +1 -1
- pygsti/algorithms/core.py +42 -28
- pygsti/algorithms/fiducialselection.py +17 -8
- pygsti/algorithms/gaugeopt.py +2 -2
- pygsti/algorithms/germselection.py +87 -77
- pygsti/algorithms/mirroring.py +0 -388
- pygsti/algorithms/randomcircuit.py +165 -1333
- pygsti/algorithms/rbfit.py +0 -234
- pygsti/baseobjs/basis.py +94 -396
- pygsti/baseobjs/errorgenbasis.py +0 -132
- pygsti/baseobjs/errorgenspace.py +0 -10
- pygsti/baseobjs/label.py +52 -168
- pygsti/baseobjs/opcalc/fastopcalc.cp38-win32.pyd +0 -0
- pygsti/baseobjs/opcalc/fastopcalc.pyx +2 -2
- pygsti/baseobjs/polynomial.py +13 -595
- pygsti/baseobjs/statespace.py +1 -0
- pygsti/circuits/__init__.py +1 -1
- pygsti/circuits/circuit.py +682 -505
- pygsti/circuits/circuitconstruction.py +0 -4
- pygsti/circuits/circuitlist.py +47 -5
- pygsti/circuits/circuitparser/__init__.py +8 -8
- pygsti/circuits/circuitparser/fastcircuitparser.cp38-win32.pyd +0 -0
- pygsti/circuits/circuitstructure.py +3 -3
- pygsti/circuits/cloudcircuitconstruction.py +1 -1
- pygsti/data/datacomparator.py +2 -7
- pygsti/data/dataset.py +46 -44
- pygsti/data/hypothesistest.py +0 -7
- pygsti/drivers/bootstrap.py +0 -49
- pygsti/drivers/longsequence.py +2 -1
- pygsti/evotypes/basereps_cython.cp38-win32.pyd +0 -0
- pygsti/evotypes/chp/opreps.py +0 -61
- pygsti/evotypes/chp/statereps.py +0 -32
- pygsti/evotypes/densitymx/effectcreps.cpp +9 -10
- pygsti/evotypes/densitymx/effectreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/densitymx/effectreps.pyx +1 -1
- pygsti/evotypes/densitymx/opreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/densitymx/opreps.pyx +2 -2
- pygsti/evotypes/densitymx/statereps.cp38-win32.pyd +0 -0
- pygsti/evotypes/densitymx/statereps.pyx +1 -1
- pygsti/evotypes/densitymx_slow/effectreps.py +7 -23
- pygsti/evotypes/densitymx_slow/opreps.py +16 -23
- pygsti/evotypes/densitymx_slow/statereps.py +10 -3
- pygsti/evotypes/evotype.py +39 -2
- pygsti/evotypes/stabilizer/effectreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/stabilizer/effectreps.pyx +0 -4
- pygsti/evotypes/stabilizer/opreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/stabilizer/opreps.pyx +0 -4
- pygsti/evotypes/stabilizer/statereps.cp38-win32.pyd +0 -0
- pygsti/evotypes/stabilizer/statereps.pyx +1 -5
- pygsti/evotypes/stabilizer/termreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/stabilizer/termreps.pyx +0 -7
- pygsti/evotypes/stabilizer_slow/effectreps.py +0 -22
- pygsti/evotypes/stabilizer_slow/opreps.py +0 -4
- pygsti/evotypes/stabilizer_slow/statereps.py +0 -4
- pygsti/evotypes/statevec/effectreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/statevec/effectreps.pyx +1 -1
- pygsti/evotypes/statevec/opreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/statevec/opreps.pyx +2 -2
- pygsti/evotypes/statevec/statereps.cp38-win32.pyd +0 -0
- pygsti/evotypes/statevec/statereps.pyx +1 -1
- pygsti/evotypes/statevec/termreps.cp38-win32.pyd +0 -0
- pygsti/evotypes/statevec/termreps.pyx +0 -7
- pygsti/evotypes/statevec_slow/effectreps.py +0 -3
- pygsti/evotypes/statevec_slow/opreps.py +0 -5
- pygsti/extras/__init__.py +0 -1
- pygsti/extras/drift/stabilityanalyzer.py +3 -1
- pygsti/extras/interpygate/__init__.py +12 -0
- pygsti/extras/interpygate/core.py +0 -36
- pygsti/extras/interpygate/process_tomography.py +44 -10
- pygsti/extras/rpe/rpeconstruction.py +0 -2
- pygsti/forwardsims/__init__.py +1 -0
- pygsti/forwardsims/forwardsim.py +14 -55
- pygsti/forwardsims/mapforwardsim.py +69 -18
- pygsti/forwardsims/mapforwardsim_calc_densitymx.cp38-win32.pyd +0 -0
- pygsti/forwardsims/mapforwardsim_calc_densitymx.pyx +65 -66
- pygsti/forwardsims/mapforwardsim_calc_generic.py +91 -13
- pygsti/forwardsims/matrixforwardsim.py +63 -15
- pygsti/forwardsims/termforwardsim.py +8 -110
- pygsti/forwardsims/termforwardsim_calc_stabilizer.cp38-win32.pyd +0 -0
- pygsti/forwardsims/termforwardsim_calc_statevec.cp38-win32.pyd +0 -0
- pygsti/forwardsims/termforwardsim_calc_statevec.pyx +0 -651
- pygsti/forwardsims/torchfwdsim.py +265 -0
- pygsti/forwardsims/weakforwardsim.py +2 -2
- pygsti/io/__init__.py +1 -2
- pygsti/io/mongodb.py +0 -2
- pygsti/io/stdinput.py +6 -22
- pygsti/layouts/copalayout.py +10 -12
- pygsti/layouts/distlayout.py +0 -40
- pygsti/layouts/maplayout.py +103 -25
- pygsti/layouts/matrixlayout.py +99 -60
- pygsti/layouts/prefixtable.py +1534 -52
- pygsti/layouts/termlayout.py +1 -1
- pygsti/modelmembers/instruments/instrument.py +3 -3
- pygsti/modelmembers/instruments/tpinstrument.py +2 -2
- pygsti/modelmembers/modelmember.py +0 -17
- pygsti/modelmembers/operations/__init__.py +2 -4
- pygsti/modelmembers/operations/affineshiftop.py +1 -0
- pygsti/modelmembers/operations/composederrorgen.py +1 -1
- pygsti/modelmembers/operations/composedop.py +1 -24
- pygsti/modelmembers/operations/denseop.py +5 -5
- pygsti/modelmembers/operations/eigpdenseop.py +2 -2
- pygsti/modelmembers/operations/embeddederrorgen.py +1 -1
- pygsti/modelmembers/operations/embeddedop.py +0 -1
- pygsti/modelmembers/operations/experrorgenop.py +2 -2
- pygsti/modelmembers/operations/fullarbitraryop.py +1 -0
- pygsti/modelmembers/operations/fullcptpop.py +2 -2
- pygsti/modelmembers/operations/fulltpop.py +28 -6
- pygsti/modelmembers/operations/fullunitaryop.py +5 -4
- pygsti/modelmembers/operations/lindbladcoefficients.py +93 -78
- pygsti/modelmembers/operations/lindbladerrorgen.py +268 -441
- pygsti/modelmembers/operations/linearop.py +7 -27
- pygsti/modelmembers/operations/opfactory.py +1 -1
- pygsti/modelmembers/operations/repeatedop.py +1 -24
- pygsti/modelmembers/operations/staticstdop.py +1 -1
- pygsti/modelmembers/povms/__init__.py +3 -3
- pygsti/modelmembers/povms/basepovm.py +7 -36
- pygsti/modelmembers/povms/complementeffect.py +4 -9
- pygsti/modelmembers/povms/composedeffect.py +0 -320
- pygsti/modelmembers/povms/computationaleffect.py +1 -1
- pygsti/modelmembers/povms/computationalpovm.py +3 -1
- pygsti/modelmembers/povms/effect.py +3 -5
- pygsti/modelmembers/povms/marginalizedpovm.py +0 -79
- pygsti/modelmembers/povms/tppovm.py +74 -2
- pygsti/modelmembers/states/__init__.py +2 -5
- pygsti/modelmembers/states/composedstate.py +0 -317
- pygsti/modelmembers/states/computationalstate.py +3 -3
- pygsti/modelmembers/states/cptpstate.py +4 -4
- pygsti/modelmembers/states/densestate.py +6 -4
- pygsti/modelmembers/states/fullpurestate.py +0 -24
- pygsti/modelmembers/states/purestate.py +1 -1
- pygsti/modelmembers/states/state.py +5 -6
- pygsti/modelmembers/states/tpstate.py +28 -10
- pygsti/modelmembers/term.py +3 -6
- pygsti/modelmembers/torchable.py +50 -0
- pygsti/modelpacks/_modelpack.py +1 -1
- pygsti/modelpacks/smq1Q_ZN.py +3 -1
- pygsti/modelpacks/smq2Q_XXYYII.py +2 -1
- pygsti/modelpacks/smq2Q_XY.py +3 -3
- pygsti/modelpacks/smq2Q_XYI.py +2 -2
- pygsti/modelpacks/smq2Q_XYICNOT.py +3 -3
- pygsti/modelpacks/smq2Q_XYICPHASE.py +3 -3
- pygsti/modelpacks/smq2Q_XYXX.py +1 -1
- pygsti/modelpacks/smq2Q_XYZICNOT.py +3 -3
- pygsti/modelpacks/smq2Q_XYZZ.py +1 -1
- pygsti/modelpacks/stdtarget.py +0 -121
- pygsti/models/cloudnoisemodel.py +1 -2
- pygsti/models/explicitcalc.py +3 -3
- pygsti/models/explicitmodel.py +3 -13
- pygsti/models/fogistore.py +5 -3
- pygsti/models/localnoisemodel.py +1 -2
- pygsti/models/memberdict.py +0 -12
- pygsti/models/model.py +800 -65
- pygsti/models/modelconstruction.py +4 -4
- pygsti/models/modelnoise.py +2 -2
- pygsti/models/modelparaminterposer.py +1 -1
- pygsti/models/oplessmodel.py +1 -1
- pygsti/models/qutrit.py +15 -14
- pygsti/objectivefns/objectivefns.py +73 -138
- pygsti/objectivefns/wildcardbudget.py +2 -7
- pygsti/optimize/__init__.py +1 -0
- pygsti/optimize/arraysinterface.py +28 -0
- pygsti/optimize/customcg.py +0 -12
- pygsti/optimize/customlm.py +129 -323
- pygsti/optimize/customsolve.py +2 -2
- pygsti/optimize/optimize.py +0 -84
- pygsti/optimize/simplerlm.py +841 -0
- pygsti/optimize/wildcardopt.py +19 -598
- pygsti/protocols/confidenceregionfactory.py +28 -14
- pygsti/protocols/estimate.py +31 -14
- pygsti/protocols/gst.py +142 -68
- pygsti/protocols/modeltest.py +6 -10
- pygsti/protocols/protocol.py +9 -37
- pygsti/protocols/rb.py +450 -79
- pygsti/protocols/treenode.py +8 -2
- pygsti/protocols/vb.py +108 -206
- pygsti/protocols/vbdataframe.py +1 -1
- pygsti/report/factory.py +0 -15
- pygsti/report/fogidiagram.py +1 -17
- pygsti/report/modelfunction.py +12 -3
- pygsti/report/mpl_colormaps.py +1 -1
- pygsti/report/plothelpers.py +8 -2
- pygsti/report/reportables.py +41 -37
- pygsti/report/templates/offline/pygsti_dashboard.css +6 -0
- pygsti/report/templates/offline/pygsti_dashboard.js +12 -0
- pygsti/report/workspace.py +2 -14
- pygsti/report/workspaceplots.py +326 -504
- pygsti/tools/basistools.py +9 -36
- pygsti/tools/edesigntools.py +124 -96
- pygsti/tools/fastcalc.cp38-win32.pyd +0 -0
- pygsti/tools/fastcalc.pyx +35 -81
- pygsti/tools/internalgates.py +151 -15
- pygsti/tools/jamiolkowski.py +5 -5
- pygsti/tools/lindbladtools.py +19 -11
- pygsti/tools/listtools.py +0 -114
- pygsti/tools/matrixmod2.py +1 -1
- pygsti/tools/matrixtools.py +173 -339
- pygsti/tools/nameddict.py +1 -1
- pygsti/tools/optools.py +154 -88
- pygsti/tools/pdftools.py +0 -25
- pygsti/tools/rbtheory.py +3 -320
- pygsti/tools/slicetools.py +64 -12
- pyGSTi-0.9.12.1.dist-info/METADATA +0 -155
- pygsti/algorithms/directx.py +0 -711
- pygsti/evotypes/qibo/__init__.py +0 -33
- pygsti/evotypes/qibo/effectreps.py +0 -78
- pygsti/evotypes/qibo/opreps.py +0 -376
- pygsti/evotypes/qibo/povmreps.py +0 -98
- pygsti/evotypes/qibo/statereps.py +0 -174
- pygsti/extras/rb/__init__.py +0 -13
- pygsti/extras/rb/benchmarker.py +0 -957
- pygsti/extras/rb/dataset.py +0 -378
- pygsti/extras/rb/io.py +0 -814
- pygsti/extras/rb/simulate.py +0 -1020
- pygsti/io/legacyio.py +0 -385
- pygsti/modelmembers/povms/denseeffect.py +0 -142
- {pyGSTi-0.9.12.1.dist-info → pyGSTi-0.9.13.dist-info}/LICENSE +0 -0
- {pyGSTi-0.9.12.1.dist-info → pyGSTi-0.9.13.dist-info}/top_level.txt +0 -0
pygsti/optimize/wildcardopt.py
CHANGED
@@ -17,6 +17,25 @@ import numpy as _np
|
|
17
17
|
from pygsti.objectivefns.wildcardbudget import update_circuit_probs as _update_circuit_probs
|
18
18
|
from pygsti.optimize.optimize import minimize as _minimize
|
19
19
|
|
20
|
+
"""Developer notes
|
21
|
+
|
22
|
+
Removed functions
|
23
|
+
-----------------
|
24
|
+
|
25
|
+
This file used to have three algorithms for optimizing wildcard budgets that relied on
|
26
|
+
CVXOPT's nonlinear optimization interface. In June 2024 we investigated whether these
|
27
|
+
algorithms could be re-implemented to rely only on CVXPY's modeling capabilities. We
|
28
|
+
came to the conclusion that while that may have been possible, it would have involved
|
29
|
+
an inordinate amount of work, and that for the sake of maintainability it was better to
|
30
|
+
remove these CVXOPT-based algorithms from pyGSTi altogether.
|
31
|
+
|
32
|
+
Here's a hash for one of the last commits on pyGSTi's develop branch that had these
|
33
|
+
algorithms: 723cd24aec3b90d28b0fcd9b31145b920c256acf.
|
34
|
+
|
35
|
+
See https://github.com/sandialabs/pyGSTi/pull/444 for more information.
|
36
|
+
|
37
|
+
"""
|
38
|
+
|
20
39
|
|
21
40
|
def optimize_wildcard_budget_neldermead(budget, L1weights, wildcard_objfn, two_dlogl_threshold,
|
22
41
|
redbox_threshold, printer, smart_init=True, max_outer_iters=10,
|
@@ -48,19 +67,6 @@ def optimize_wildcard_budget_neldermead(budget, L1weights, wildcard_objfn, two_d
|
|
48
67
|
|
49
68
|
return max(0, two_dlogl - two_dlogl_threshold) + percircuit_penalty
|
50
69
|
|
51
|
-
##For debugging wildcard (see below for suggested insertion point)
|
52
|
-
#def _wildcard_fit_criteria_debug(wv):
|
53
|
-
# dlogl_elements = logl_wildcard_fn.lsvec(wv)**2 # b/c WC fn only has sqrt of terms implemented now
|
54
|
-
# for i in range(num_circuits):
|
55
|
-
# dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
|
56
|
-
# two_dlogl_percircuit = 2 * dlogl_percircuit
|
57
|
-
# two_dlogl = sum(two_dlogl_percircuit)
|
58
|
-
# print("Aggregate penalty = ", two_dlogl, "-", two_dlogl_threshold, "=", two_dlogl - two_dlogl_threshold)
|
59
|
-
# print("Per-circuit (redbox) penalty = ", sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None)))
|
60
|
-
# print(" per-circuit threshold = ", redbox_threshold, " highest violators = ")
|
61
|
-
# sorted_percircuit = sorted(enumerate(two_dlogl_percircuit), key=lambda x: x[1], reverse=True)
|
62
|
-
# print('\n'.join(["(%d) %s: %g" % (i, layout.circuits[i].str, val) for i, val in sorted_percircuit[0:10]]))
|
63
|
-
|
64
70
|
num_iters = 0
|
65
71
|
wvec_init = budget.to_vector()
|
66
72
|
|
@@ -220,44 +226,6 @@ def _get_critical_circuit_budgets(objfn, redbox_threshold):
|
|
220
226
|
return global_critical_percircuit_budgets
|
221
227
|
|
222
228
|
|
223
|
-
# Aggregate 2-delta-logl criteria (for cvxopt call below, as we want this function to be <= 0)
|
224
|
-
# - for each circuit, we have the sum of -2Nf*logl(p) + const. terms
|
225
|
-
# - the derivatives taken below are complicated because they're derivatives with respect to
|
226
|
-
# the circuit's *wildcard budget*, which is effectively w.r.t `p` except all the p's must
|
227
|
-
# sum to 1. We compute these derivatives as follows:
|
228
|
-
#
|
229
|
-
# - 1st deriv: the first derivative of each term is -Nf/p and N is common to all the terms of
|
230
|
-
# a single circuit so this is dictated by chi = f/p >= 0. All these terms are positive (the
|
231
|
-
# deriv is negative), and we want to move probability from the terms with smallest chi to
|
232
|
-
# largest chi. Note here that positive `p` means *more* wildcard budget and so the largest-chi
|
233
|
-
# terms have their p_i increase (dp_i = dp) whereas the smallest-chi terms have p_i decrease
|
234
|
-
# (dp_i = -dp). When multiple terms have the same chi then we split the total dp
|
235
|
-
# (delta-probability) according to 1 / 2nd-deriv = p**2/Nf. This is so that if
|
236
|
-
# chi1 = f1/p1 = chi2 = f2/p2 and we want the chi's to remain equal after
|
237
|
-
# p1 -> p1 + lambda1*dp, p2 -> p2 + lambda2*dp then we get:
|
238
|
-
# (p1 + lambda1*dp) / f1 = 1/chi1 + lambda1/f1 * dp = 1/chi2 + lambda2/f2 * dp, so
|
239
|
-
# lambda1/f1 = lambda2/f2 => lambda1/lambda2 = f1/f2. Since lambda1 + lambda2 = 1,
|
240
|
-
# we get lambda1 (1 + f2/f1) = 1 => lambda1 = f1 / (f1 + f2)
|
241
|
-
# In general, lambda_i = f_i / sum_fs_with_max_chi.
|
242
|
-
# Note: f1/p1 = f2/p2 => f1/f2 = p1/p2 so lambda_i also could be = p_i / sum_ps_with_max_chi
|
243
|
-
# We could also derive by wanting the derivs wrt chi be equal:
|
244
|
-
# d(chi1)/dp = d(chi2)/dp => -f1/p1**2 * lambda_1 = -f2/p2**2 * lambda_2
|
245
|
-
# => lambda1/lambda2 = p1/p2 as before (recall dp1 = lambda1 * dp)
|
246
|
-
# Note that this also means the lambdas could be weighted by the full 2nd deriv: Nf/p**2
|
247
|
-
# ** IN SUMMARY, the total derivative is:
|
248
|
-
# -2N * (sum_max_chi(f_i/p_i * lambda_i) - sum_min_chi(f_i/p_i * lambda_i))
|
249
|
-
# = -2N * (max_chi - min_chi)
|
250
|
-
#
|
251
|
-
# - 2nd deriv: same as above, but now different lambda_i matter:
|
252
|
-
# = 2N * (sum_max_chi(f_i/p_i**2 * lambda_i**2) - sum_min_chi(f_i/p_i**2 * lambda_i**2))
|
253
|
-
# (where we take the lambda_i as given by the frequencies, so they aren't diff'd)
|
254
|
-
# If we took lambda_i = p_i / sum_of_ps then we'd get:
|
255
|
-
# d/dp (f_i/p_i * lambda_i) = -f_i/p_i**2 * lambda_i**2 + f_i/p_i * dlambda_i/dp
|
256
|
-
# = -f_i/p_i**2 * lambda_i**2 (see below)
|
257
|
-
# Note dlambda_i/dp = lambda_i / sum_of_ps - p_i / (sum_ps)**2 * sum(lambda_i) = 0
|
258
|
-
# So we get the same result.
|
259
|
-
|
260
|
-
|
261
229
|
def _agg_dlogl(current_probs, objfn, two_dlogl_threshold):
|
262
230
|
#Note: current_probs is a *local* quantity
|
263
231
|
p, f, n, N = current_probs, objfn.freqs, objfn.counts, objfn.total_counts
|
@@ -369,60 +337,6 @@ def _agg_dlogl_hessian(current_probs, objfn, percircuit_budget_deriv, probs_deri
|
|
369
337
|
return objfn.layout.allsum_local_quantity('c', local_H, use_shared_mem=False)
|
370
338
|
|
371
339
|
|
372
|
-
def _proxy_agg_dlogl(x, tvds, fn0s, percircuit_budget_deriv, two_dlogl_threshold):
|
373
|
-
# expects percircuit_budget_deriv to be for all (*global*) circuits
|
374
|
-
percircuit_budgets = _np.dot(percircuit_budget_deriv, x)
|
375
|
-
num_circuits = percircuit_budgets.shape[0]
|
376
|
-
a = 4; b = 2 # fit params: must be same in all proxy fns
|
377
|
-
|
378
|
-
f = 0
|
379
|
-
for i in range(num_circuits):
|
380
|
-
fn0 = fn0s[i]; tvd = tvds[i]; x = percircuit_budgets[i]
|
381
|
-
f += (fn0 / _np.exp(a)) * _np.exp(a - b * (x / tvd)**2 - _np.sqrt(2 * b) * (x / tvd))
|
382
|
-
return f - two_dlogl_threshold
|
383
|
-
|
384
|
-
|
385
|
-
def _proxy_agg_dlogl_deriv(x, tvds, fn0s, percircuit_budget_deriv):
|
386
|
-
# expects percircuit_budget_deriv to be for all (*global*) circuits
|
387
|
-
percircuit_budgets = _np.dot(percircuit_budget_deriv, x)
|
388
|
-
num_circuits = percircuit_budgets.shape[0]
|
389
|
-
a = 4; b = 2 # fit params: must be same in all proxy fns
|
390
|
-
|
391
|
-
agg_dlogl_deriv_wrt_percircuit_budgets = _np.zeros(num_circuits, 'd')
|
392
|
-
for i in range(num_circuits):
|
393
|
-
fn0 = fn0s[i]; tvd = tvds[i]; x = percircuit_budgets[i]
|
394
|
-
agg_dlogl_deriv_wrt_percircuit_budgets[i] = \
|
395
|
-
(fn0 / _np.exp(a)) * _np.exp(a - b * (x / tvd)**2
|
396
|
-
- _np.sqrt(2 * b) * (x / tvd)) * (-2 * b * x / tvd**2
|
397
|
-
- _np.sqrt(2 * b) / tvd)
|
398
|
-
#This isn't always true in "proxy" case - maybe clip to 0?
|
399
|
-
#assert(_np.all(agg_dlogl_deriv_wrt_percircuit_budgets <= 0)), \
|
400
|
-
# "Derivative of aggregate LLR wrt any circuit budget should be negative"
|
401
|
-
return _np.dot(agg_dlogl_deriv_wrt_percircuit_budgets, percircuit_budget_deriv)
|
402
|
-
|
403
|
-
|
404
|
-
def _proxy_agg_dlogl_hessian(x, tvds, fn0s, percircuit_budget_deriv):
|
405
|
-
# expects percircuit_budget_deriv to be for all (*global*) circuits
|
406
|
-
percircuit_budgets = _np.dot(percircuit_budget_deriv, x)
|
407
|
-
num_circuits = percircuit_budgets.shape[0]
|
408
|
-
a = 4; b = 2 # fit params: must be same in all proxy fns
|
409
|
-
|
410
|
-
agg_dlogl_hessian_wrt_percircuit_budgets = _np.zeros(num_circuits)
|
411
|
-
for i in range(num_circuits):
|
412
|
-
fn0 = fn0s[i]; tvd = tvds[i]; x = percircuit_budgets[i]
|
413
|
-
agg_dlogl_hessian_wrt_percircuit_budgets[i] = \
|
414
|
-
(fn0 / _np.exp(a)) * _np.exp(a - b * (x / tvd)**2 - _np.sqrt(2 * b) * (x / tvd)) * (
|
415
|
-
(-2 * b * x / tvd**2 - _np.sqrt(2 * b) / tvd)**2 - 2 * b / tvd**2)
|
416
|
-
assert(_np.all(agg_dlogl_hessian_wrt_percircuit_budgets >= -1e-8)), \
|
417
|
-
"Hessian of aggregate LLR wrt any circuit budget should be positive"
|
418
|
-
H = _np.dot(percircuit_budget_deriv.T,
|
419
|
-
_np.dot(_np.diag(agg_dlogl_hessian_wrt_percircuit_budgets),
|
420
|
-
percircuit_budget_deriv)) # (nW, nC)(nC)(nC, nW)
|
421
|
-
#evals = _np.linalg.eigvals(H)
|
422
|
-
#assert(_np.all(evals >= -1e-8))
|
423
|
-
return H
|
424
|
-
|
425
|
-
|
426
340
|
def _get_percircuit_budget_deriv(budget, layout):
|
427
341
|
""" Returns local_percircuit_budget_deriv, global_percircuit_budget_deriv """
|
428
342
|
percircuit_budget_deriv = budget.precompute_for_same_circuits(layout.circuits) # for *local* circuits
|
@@ -492,168 +406,6 @@ def optimize_wildcard_bisect_alpha(budget, objfn, two_dlogl_threshold, redbox_th
|
|
492
406
|
return
|
493
407
|
|
494
408
|
|
495
|
-
def optimize_wildcard_budget_cvxopt(budget, L1weights, objfn, two_dlogl_threshold, redbox_threshold,
|
496
|
-
printer, abs_tol=1e-5, rel_tol=1e-5, max_iters=50):
|
497
|
-
"""Uses CVXOPT to optimize the wildcard budget. Includes both aggregate and per-circuit constraints."""
|
498
|
-
#Use cvxopt
|
499
|
-
import cvxopt as _cvxopt
|
500
|
-
# Minimize f_0(wv) = |wv|_1 (perhaps weighted) subject to the constraints:
|
501
|
-
# dot(percircuit_budget_deriv, wv) >= critical_percircuit_budgets
|
502
|
-
# 2 * aggregate_dlogl <= two_dlogl_threshold => f_1(wv) = 2 * aggregate_dlogl(wv) - threshold <= 0
|
503
|
-
|
504
|
-
layout = objfn.layout
|
505
|
-
wv = budget.to_vector().copy()
|
506
|
-
n = len(wv)
|
507
|
-
x0 = wv.reshape((n, 1)) # TODO - better guess?
|
508
|
-
|
509
|
-
initial_probs = objfn.probs.copy() # *local*
|
510
|
-
current_probs = initial_probs.copy()
|
511
|
-
percircuit_budget_deriv, global_percircuit_budget_deriv = _get_percircuit_budget_deriv(budget, layout)
|
512
|
-
|
513
|
-
critical_percircuit_budgets = _get_critical_circuit_budgets(objfn, redbox_threshold) # for *global* circuits
|
514
|
-
critical_percircuit_budgets.shape = (len(critical_percircuit_budgets), 1)
|
515
|
-
|
516
|
-
_cvxopt.solvers.options['abstol'] = abs_tol
|
517
|
-
_cvxopt.solvers.options['reltol'] = rel_tol
|
518
|
-
_cvxopt.solvers.options['maxiters'] = max_iters
|
519
|
-
|
520
|
-
def F(x=None, z=None, debug=True):
|
521
|
-
if z is None and x is None:
|
522
|
-
# (m, x0) where m is number of nonlinear constraints and x0 is in domain of f
|
523
|
-
return (1, _cvxopt.matrix(x0))
|
524
|
-
|
525
|
-
if min(x) < 0.0:
|
526
|
-
return None # don't allow negative wildcard vector components
|
527
|
-
|
528
|
-
budget.from_vector(_np.array(x))
|
529
|
-
p_deriv = budget.update_probs(initial_probs, current_probs, objfn.freqs, layout, percircuit_budget_deriv,
|
530
|
-
return_deriv=True)
|
531
|
-
|
532
|
-
#Evaluate F(x) => return (f, Df)
|
533
|
-
f = _cvxopt.matrix(_np.array([_agg_dlogl(current_probs, objfn,
|
534
|
-
two_dlogl_threshold)]).reshape((1, 1))) # shape (m,1)
|
535
|
-
Df = _cvxopt.matrix(_np.empty((1, n), 'd')) # shape (m, n)
|
536
|
-
Df[0, :] = _agg_dlogl_deriv(current_probs, objfn, percircuit_budget_deriv, p_deriv)
|
537
|
-
|
538
|
-
if z is None:
|
539
|
-
return f, Df
|
540
|
-
|
541
|
-
# additionally, compute H = z_0 * Hessian(f_0)(wv)
|
542
|
-
H = _cvxopt.matrix(z[0] * _agg_dlogl_hessian(current_probs, objfn, percircuit_budget_deriv, p_deriv))
|
543
|
-
evals = _np.linalg.eigvals(H)
|
544
|
-
assert(_np.all(evals >= -1e-8)) # tests *global* H
|
545
|
-
return f, Df, H
|
546
|
-
|
547
|
-
#check_fd([0.0001] * n, True)
|
548
|
-
|
549
|
-
#CVXOPT
|
550
|
-
printer.log("Beginning cvxopt.cpl solve...")
|
551
|
-
c = _cvxopt.matrix(L1weights.reshape((n, 1)))
|
552
|
-
G = -_cvxopt.matrix(_np.concatenate((global_percircuit_budget_deriv, _np.identity(n, 'd')), axis=0))
|
553
|
-
h = -_cvxopt.matrix(_np.concatenate((critical_percircuit_budgets, _np.zeros((n, 1), 'd')), axis=0))
|
554
|
-
#result = _cvxopt.solvers.cpl(c, F) # kktsolver='ldl2'
|
555
|
-
result = _cvxopt.solvers.cpl(c, F, G, h) # kktsolver='ldl2'
|
556
|
-
|
557
|
-
#This didn't seem to help much:
|
558
|
-
#print("Attempting restart...")
|
559
|
-
#x0[:,0] = list(result['x'])
|
560
|
-
#result = _cvxopt.solvers.cpl(c, F) # kktsolver='ldl2'
|
561
|
-
|
562
|
-
printer.log("CVXOPT result = " + str(result))
|
563
|
-
printer.log("x = " + str(list(result['x'])))
|
564
|
-
printer.log("y = " + str(list(result['y'])))
|
565
|
-
printer.log("znl = " + str(list(result['znl'])))
|
566
|
-
printer.log("snl = " + str(list(result['snl'])))
|
567
|
-
budget.from_vector(result['x'])
|
568
|
-
return
|
569
|
-
|
570
|
-
|
571
|
-
def optimize_wildcard_budget_cvxopt_zeroreg(budget, L1weights, objfn, two_dlogl_threshold, redbox_threshold,
|
572
|
-
printer, abs_tol=1e-5, rel_tol=1e-5, max_iters=50, small=1e-6):
|
573
|
-
"""Adds regularization of the L1 term around zero values of the budget. This doesn't seem to help much."""
|
574
|
-
#Use cvxopt
|
575
|
-
import cvxopt as _cvxopt
|
576
|
-
# Minimize f_0(wv) = |wv|_1 (perhaps weighted) subject to the constraints:
|
577
|
-
# dot(percircuit_budget_deriv, wv) >= critical_percircuit_budgets
|
578
|
-
# 2 * aggregate_dlogl <= two_dlogl_threshold => f_1(wv) = 2 * aggregate_dlogl(wv) - threshold <= 0
|
579
|
-
|
580
|
-
layout = objfn.layout
|
581
|
-
wv = budget.to_vector().copy()
|
582
|
-
n = len(wv)
|
583
|
-
x0 = wv.reshape((n, 1))
|
584
|
-
c = L1weights.reshape((n, 1))
|
585
|
-
SMALL2 = small**2
|
586
|
-
|
587
|
-
initial_probs = objfn.probs.copy()
|
588
|
-
current_probs = initial_probs.copy()
|
589
|
-
percircuit_budget_deriv, global_percircuit_budget_deriv = _get_percircuit_budget_deriv(budget, layout)
|
590
|
-
|
591
|
-
critical_percircuit_budgets = _get_critical_circuit_budgets(objfn, redbox_threshold)
|
592
|
-
critical_percircuit_budgets.shape = (len(critical_percircuit_budgets), 1)
|
593
|
-
assert(_np.all(critical_percircuit_budgets >= 0))
|
594
|
-
assert(_np.all(percircuit_budget_deriv >= 0))
|
595
|
-
|
596
|
-
_cvxopt.solvers.options['abstol'] = abs_tol
|
597
|
-
_cvxopt.solvers.options['reltol'] = rel_tol
|
598
|
-
_cvxopt.solvers.options['maxiters'] = max_iters
|
599
|
-
|
600
|
-
def F(x=None, z=None):
|
601
|
-
if z is None and x is None:
|
602
|
-
# (m, x0) where m is number of nonlinear constraints and x0 is in domain of f
|
603
|
-
return (1, _cvxopt.matrix(x0))
|
604
|
-
|
605
|
-
if min(x) < 0.0:
|
606
|
-
return None # don't allow negative wildcard vector components
|
607
|
-
|
608
|
-
budget.from_vector(x)
|
609
|
-
p_deriv = budget.update_probs(initial_probs, current_probs, objfn.freqs, layout, percircuit_budget_deriv,
|
610
|
-
return_deriv=True)
|
611
|
-
|
612
|
-
#Evaluate F(x) => return (f, Df)
|
613
|
-
sqrtVec = _np.sqrt((c * x)**2 + SMALL2)
|
614
|
-
f = _cvxopt.matrix(_np.array([float(_np.sum(sqrtVec)),
|
615
|
-
_agg_dlogl(current_probs, objfn,
|
616
|
-
two_dlogl_threshold)]).reshape((2, 1))) # shape (m+1,1)
|
617
|
-
|
618
|
-
L1term_grad = c if SMALL2 == 0.0 else c**2 * x / sqrtVec
|
619
|
-
Df = _cvxopt.matrix(_np.empty((2, n), 'd')) # shape (m+1, n)
|
620
|
-
Df[0, :] = L1term_grad[:, 0]
|
621
|
-
Df[1, :] = _agg_dlogl_deriv(current_probs, objfn, percircuit_budget_deriv, p_deriv)
|
622
|
-
#print("rank Df=", _np.linalg.matrix_rank(Df))
|
623
|
-
if z is None:
|
624
|
-
return f, Df
|
625
|
-
|
626
|
-
# additionally, compute H = z_0 * Hessian(f_0)(wv) + z_1 * Hessian(f_1)(wv)
|
627
|
-
L1_term_hess = _np.zeros((n, n), 'd') if SMALL2 == 0.0 else \
|
628
|
-
_np.diag(-1.0 / (sqrtVec**3) * (c**2 * x)**2 + c**2 / sqrtVec)
|
629
|
-
Hf = _cvxopt.matrix(z[0] * L1_term_hess + z[1] * _agg_dlogl_hessian(current_probs, objfn,
|
630
|
-
percircuit_budget_deriv, p_deriv))
|
631
|
-
#print("rank Hf=", _np.linalg.matrix_rank(Hf), " z[1]=",z[1])
|
632
|
-
return f, Df, Hf
|
633
|
-
|
634
|
-
#CVXOPT
|
635
|
-
printer.log("Beginning cvxopt.cp solve...")
|
636
|
-
#print("Rank G = ",_np.linalg.matrix_rank(percircuit_budget_deriv))
|
637
|
-
#result = _cvxopt.solvers.cp(F)
|
638
|
-
# Condition is Gx <= h => -Gx >= -h
|
639
|
-
G = -_cvxopt.matrix(_np.concatenate((global_percircuit_budget_deriv, _np.identity(n, 'd')), axis=0))
|
640
|
-
h = -_cvxopt.matrix(_np.concatenate((critical_percircuit_budgets, _np.zeros((n, 1), 'd')), axis=0))
|
641
|
-
result = _cvxopt.solvers.cp(F, G, h)
|
642
|
-
|
643
|
-
#This didn't seem to help much:
|
644
|
-
#print("Attempting restart...")
|
645
|
-
#x0[:,0] = list(result['x'])
|
646
|
-
#result = _cvxopt.solvers.cpl(c, F) # kktsolver='ldl2'
|
647
|
-
|
648
|
-
printer.log("CVXOPT result = " + str(result))
|
649
|
-
printer.log("x = " + str(list(result['x'])))
|
650
|
-
printer.log("y = " + str(list(result['y'])))
|
651
|
-
printer.log("znl = " + str(list(result['znl'])))
|
652
|
-
printer.log("snl = " + str(list(result['snl'])))
|
653
|
-
budget.from_vector(result['x'])
|
654
|
-
return
|
655
|
-
|
656
|
-
|
657
409
|
def optimize_wildcard_budget_barrier(budget, L1weights, objfn, two_dlogl_threshold,
|
658
410
|
redbox_threshold, printer, tol=1e-7, max_iters=50, num_steps=3,
|
659
411
|
save_debugplot_data=False):
|
@@ -776,13 +528,6 @@ def optimize_wildcard_budget_barrier(budget, L1weights, objfn, two_dlogl_thresho
|
|
776
528
|
Hobj = t * _np.diag(-1.0 / (sqrtVec**3) * (c**2 * x)**2 + c**2 / sqrtVec) + Hbarrier
|
777
529
|
return obj, Dobj, Hobj
|
778
530
|
|
779
|
-
#import scipy.optimize
|
780
|
-
#def barrier_obj(x):
|
781
|
-
# x = _np.clip(x, 1e-10, None)
|
782
|
-
# return t * _np.dot(c.T, x) - _np.log(-barrierF(x, False))
|
783
|
-
#result = scipy.optimize.minimize(barrier_obj, x, method="CG")
|
784
|
-
#x = _np.clip(result.x, 0, None)
|
785
|
-
|
786
531
|
x, debug_x_list = NewtonSolve(x, NewtonObjective, NewtonObjective_derivs, tol, max_iters, printer - 1)
|
787
532
|
#x, debug_x_list = NewtonSolve(x, NewtonObjective, None, tol, max_iters, printer - 1) # use finite-diff derivs
|
788
533
|
|
@@ -924,86 +669,6 @@ def NewtonSolve(initial_x, fn, fn_with_derivs=None, dx_tol=1e-6, max_iters=20, p
|
|
924
669
|
return x, x_list
|
925
670
|
|
926
671
|
|
927
|
-
def optimize_wildcard_budget_cvxopt_smoothed(budget, L1weights, objfn, two_dlogl_threshold, redbox_threshold,
|
928
|
-
printer, abs_tol=1e-5, rel_tol=1e-5, max_iters=50):
|
929
|
-
"""
|
930
|
-
Uses a smooted version of the objective function. Doesn't seem to help much.
|
931
|
-
|
932
|
-
The thinking here was to eliminate the 2nd derivative discontinuities of the original problem.
|
933
|
-
"""
|
934
|
-
import cvxopt as _cvxopt
|
935
|
-
|
936
|
-
layout = objfn.layout
|
937
|
-
wv = budget.to_vector().copy()
|
938
|
-
n = len(wv)
|
939
|
-
x0 = wv.reshape((n, 1)) # TODO - better guess?
|
940
|
-
|
941
|
-
#initial_probs = objfn.probs.copy()
|
942
|
-
#current_probs = initial_probs.copy()
|
943
|
-
percircuit_budget_deriv, global_percircuit_budget_deriv = _get_percircuit_budget_deriv(budget, layout)
|
944
|
-
critical_percircuit_budgets = _get_critical_circuit_budgets(objfn, redbox_threshold)
|
945
|
-
critical_percircuit_budgets.shape = (len(critical_percircuit_budgets), 1)
|
946
|
-
num_circuits = len(layout.circuits)
|
947
|
-
|
948
|
-
_cvxopt.solvers.options['abstol'] = abs_tol
|
949
|
-
_cvxopt.solvers.options['reltol'] = rel_tol
|
950
|
-
_cvxopt.solvers.options['maxiters'] = max_iters
|
951
|
-
|
952
|
-
#Prepare for proxy_barrierF evaluations
|
953
|
-
local_tvds = _np.zeros(num_circuits, 'd')
|
954
|
-
local_fn0s = _np.zeros(num_circuits, 'd')
|
955
|
-
for i in range(num_circuits):
|
956
|
-
p = objfn.probs[layout.indices_for_index(i)]
|
957
|
-
f = objfn.freqs[layout.indices_for_index(i)]
|
958
|
-
nn = objfn.counts[layout.indices_for_index(i)] # don't re-use 'n' variable!
|
959
|
-
N = objfn.total_counts[layout.indices_for_index(i)]
|
960
|
-
dlogl_elements = objfn.raw_objfn.terms(p, nn, N, f) # N * f * _np.log(f / p)
|
961
|
-
local_fn0s[i] = 2 * _np.sum(dlogl_elements)
|
962
|
-
local_tvds[i] = 0.5 * _np.sum(_np.abs(p - f))
|
963
|
-
tvds = layout.allgather_local_array('c', local_tvds)
|
964
|
-
fn0s = layout.allgather_local_array('c', local_fn0s)
|
965
|
-
|
966
|
-
def F(x=None, z=None, debug=True):
|
967
|
-
if z is None and x is None:
|
968
|
-
# (m, x0) where m is number of nonlinear constraints and x0 is in domain of f
|
969
|
-
return (1, _cvxopt.matrix(x0))
|
970
|
-
|
971
|
-
if min(x) < 0.0:
|
972
|
-
return None # don't allow negative wildcard vector components
|
973
|
-
|
974
|
-
#budget.from_vector(_np.array(x))
|
975
|
-
#budget.update_probs(initial_probs, current_probs, objfn.freqs, layout, percircuit_budget_deriv)
|
976
|
-
|
977
|
-
#Evaluate F(x) => return (f, Df)
|
978
|
-
f = _cvxopt.matrix(_np.array([_proxy_agg_dlogl(x, tvds, fn0s, global_percircuit_budget_deriv,
|
979
|
-
two_dlogl_threshold)]).reshape((1, 1))) # shape (m,1)
|
980
|
-
Df = _cvxopt.matrix(_np.empty((1, n), 'd')) # shape (m, n)
|
981
|
-
Df[0, :] = _proxy_agg_dlogl_deriv(x, tvds, fn0s, global_percircuit_budget_deriv)
|
982
|
-
|
983
|
-
if z is None:
|
984
|
-
return f, Df
|
985
|
-
|
986
|
-
# additionally, compute H = z_0 * Hessian(f_0)(wv)
|
987
|
-
H = _cvxopt.matrix(z[0] * _proxy_agg_dlogl_hessian(x, tvds, fn0s, global_percircuit_budget_deriv))
|
988
|
-
evals = _np.linalg.eigvals(H)
|
989
|
-
assert(_np.all(evals >= -1e-8))
|
990
|
-
return f, Df, H
|
991
|
-
|
992
|
-
printer.log("Beginning cvxopt.cpl solve with smoothed (proxy) fn...")
|
993
|
-
c = _cvxopt.matrix(L1weights.reshape((n, 1)))
|
994
|
-
G = -_cvxopt.matrix(_np.concatenate((global_percircuit_budget_deriv, _np.identity(n, 'd')), axis=0))
|
995
|
-
h = -_cvxopt.matrix(_np.concatenate((critical_percircuit_budgets, _np.zeros((n, 1), 'd')), axis=0))
|
996
|
-
result = _cvxopt.solvers.cpl(c, F, G, h) # kktsolver='ldl2'
|
997
|
-
|
998
|
-
printer.log("CVXOPT result = " + str(result))
|
999
|
-
printer.log("x = " + str(list(result['x'])))
|
1000
|
-
printer.log("y = " + str(list(result['y'])))
|
1001
|
-
printer.log("znl = " + str(list(result['znl'])))
|
1002
|
-
printer.log("snl = " + str(list(result['snl'])))
|
1003
|
-
budget.from_vector(result['x'])
|
1004
|
-
return
|
1005
|
-
|
1006
|
-
|
1007
672
|
def _compute_fd(x, fn, compute_hessian=True, eps=1e-7):
|
1008
673
|
x_len = len(x)
|
1009
674
|
grad = _np.zeros(x_len, 'd')
|
@@ -1026,247 +691,3 @@ def _compute_fd(x, fn, compute_hessian=True, eps=1e-7):
|
|
1026
691
|
f_eps_kl = fn(x_eps_kl)
|
1027
692
|
hess[k, l] = (f_eps_kl - f_eps_k - f_eps_l + f0) / eps**2
|
1028
693
|
return grad, hess
|
1029
|
-
|
1030
|
-
|
1031
|
-
#DEBUG: check with finite diff derivatives:
|
1032
|
-
#def _check_fd(wv_base, chk_hessian=False):
|
1033
|
-
# wv_base = _np.array(wv_base, 'd') # [0.0001]*3
|
1034
|
-
# wv_len = len(wv_base)
|
1035
|
-
# grad = _np.zeros(wv_len, 'd')
|
1036
|
-
# f0, grad_chk = F(wv_base, debug=False)
|
1037
|
-
# eps = 1e-7
|
1038
|
-
# for k in range(len(wv_base)):
|
1039
|
-
# wv_eps = wv_base.copy(); wv_eps[k] += eps
|
1040
|
-
# f_eps, _ = F(wv_eps, debug=False)
|
1041
|
-
# grad[k] = (f_eps[0] - f0[0]) / eps
|
1042
|
-
# rel_diff_norm = _np.linalg.norm(grad - grad_chk) / _np.linalg.norm(grad)
|
1043
|
-
# #print("GRAD CHECK:")
|
1044
|
-
# #print(grad)
|
1045
|
-
# #print(grad_chk)
|
1046
|
-
# #print(" diff = ",grad - grad_chk, " rel_diff_norm=", rel_diff_norm)
|
1047
|
-
# print("GRAD CHK ", rel_diff_norm)
|
1048
|
-
# assert(rel_diff_norm < 1e-3)
|
1049
|
-
# if chk_hessian is False: return
|
1050
|
-
#
|
1051
|
-
# hess = _np.zeros((wv_len, wv_len), 'd')
|
1052
|
-
# f0, _, H_chk = F(wv_base, [1.0], debug=False)
|
1053
|
-
# eps = 1e-7
|
1054
|
-
# for k in range(wv_len):
|
1055
|
-
# wv_eps_k = wv_base.copy(); wv_eps_k[k] += eps
|
1056
|
-
# f_eps_k, _ = F(wv_eps_k, debug=False)
|
1057
|
-
# for l in range(wv_len):
|
1058
|
-
# wv_eps_l = wv_base.copy(); wv_eps_l[l] += eps
|
1059
|
-
# f_eps_l, _ = F(wv_eps_l, debug=False)
|
1060
|
-
# wv_eps_kl = wv_eps_k.copy(); wv_eps_kl[l] += eps
|
1061
|
-
# f_eps_kl, _ = F(wv_eps_kl, debug=False)
|
1062
|
-
# hess[k, l] = (f_eps_kl[0] - f_eps_k[0] - f_eps_l[0] + f0[0]) / eps**2
|
1063
|
-
# rel_diff_norm = _np.linalg.norm(hess - H_chk) / _np.linalg.norm(hess)
|
1064
|
-
# #print("HESSIAN CHECK:")
|
1065
|
-
# #print(hess)
|
1066
|
-
# #print(H_chk)
|
1067
|
-
# #print(" diff = ",hess - H_chk, " rel_diff_norm=", rel_diff_norm)
|
1068
|
-
# print("HESS CHK ", rel_diff_norm)
|
1069
|
-
# #assert(rel_diff_norm < 5e-2)
|
1070
|
-
|
1071
|
-
|
1072
|
-
#UNUSED?
|
1073
|
-
#def _wildcard_objective_firstterms(current_probs):
|
1074
|
-
# dlogl_elements = objfn.raw_objfn.terms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
|
1075
|
-
# for i in range(num_circuits):
|
1076
|
-
# dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
|
1077
|
-
#
|
1078
|
-
# two_dlogl_percircuit = 2 * dlogl_percircuit
|
1079
|
-
# two_dlogl = sum(two_dlogl_percircuit)
|
1080
|
-
# return max(0, two_dlogl - two_dlogl_threshold) \
|
1081
|
-
# + sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None))
|
1082
|
-
#
|
1083
|
-
#def _advance_probs(layout, current_probs, dlogl_percircuit, dlogl_delements, delta_percircuit_budgets):
|
1084
|
-
# num_circuits = len(layout.circuits)
|
1085
|
-
# delta_probs = _np.zeros(len(current_probs), 'd')
|
1086
|
-
# for i in range(num_circuits):
|
1087
|
-
# #if 2 * dlogl_percircuit[i] <= redbox_threshold and global_criteria_met: continue
|
1088
|
-
#
|
1089
|
-
# step = delta_percircuit_budgets[i]
|
1090
|
-
# #p = current_probs[layout.indices_for_index(i)]
|
1091
|
-
# chis = dlogl_delements[layout.indices_for_index(i)]
|
1092
|
-
# maxes = _np.array(_np.abs(chis - _np.max(chis)) < 1.e-4, dtype=int)
|
1093
|
-
# mins = _np.array(_np.abs(chis - _np.min(chis)) < 1.e-4, dtype=int)
|
1094
|
-
# add_to = step * mins / sum(mins)
|
1095
|
-
# take_from = step * maxes / sum(maxes)
|
1096
|
-
# delta_probs[layout.indices_for_index(i)] = add_to - take_from
|
1097
|
-
# return delta_probs
|
1098
|
-
#
|
1099
|
-
#
|
1100
|
-
#def wildcard_probs_propagation(budget, initial_wv, final_wv, objfn, layout, num_steps=10):
|
1101
|
-
# #Begin with a zero budget
|
1102
|
-
# current_probs = objfn.probs.copy()
|
1103
|
-
#
|
1104
|
-
# percircuit_budget_deriv = budget.precompute_for_same_circuits(layout.circuits)
|
1105
|
-
# dlogl_percircuit = objfn.percircuit()
|
1106
|
-
#
|
1107
|
-
# num_circuits = len(layout.circuits)
|
1108
|
-
# assert(len(dlogl_percircuit) == num_circuits)
|
1109
|
-
#
|
1110
|
-
# delta_wv = (final_wv - initial_wv) / num_steps
|
1111
|
-
# wv = initial_wv.copy()
|
1112
|
-
# for i in range(nSteps):
|
1113
|
-
# wv += delta_wv
|
1114
|
-
# dlogl_elements = objfn.raw_objfn.terms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
|
1115
|
-
# for i in range(num_circuits):
|
1116
|
-
# dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
|
1117
|
-
# dlogl_delements = objfn.raw_objfn.dterms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
|
1118
|
-
#
|
1119
|
-
# two_dlogl = sum(2 * dlogl_percircuit)
|
1120
|
-
# perbox_residual = sum(_np.clip(2 * dlogl_percircuit - redbox_threshold, 0, None))
|
1121
|
-
# print("Advance: global=", two_dlogl - two_dlogl_threshold, " percircuit=", perbox_residual)
|
1122
|
-
# print(" wv=", wv)
|
1123
|
-
#
|
1124
|
-
# delta_percircuit_budgets = _np.dot(percircuit_budget_deriv, delta_wv)
|
1125
|
-
# delta_probs = _advance_probs(layout, current_probs, dlogl_percircuit,
|
1126
|
-
# dlogl_delements, delta_percircuit_budgets) # updates current_probs
|
1127
|
-
# print("|delta probs| = ", _np.linalg.norm(delta_probs))
|
1128
|
-
# current_probs += delta_probs
|
1129
|
-
# return currrent_probs
|
1130
|
-
#def wildcard_opt_by_propagation() #TODO
|
1131
|
-
# # Time-evolution approach: Walk downhill in steps until constraints ("firstterms") are satisfied
|
1132
|
-
# #wv = budget.to_vector().copy()
|
1133
|
-
#
|
1134
|
-
# def _criteria_deriv(current_probs, dlogl_percircuit, dlogl_delements, mode, global_criteria_met):
|
1135
|
-
# # derivative of firstterms wrt per-circuit wilcard budgets - namely if that budget goes up how to most
|
1136
|
-
# # efficiently reduce firstterms
|
1137
|
-
# # in doing so, this computes how the per-circuit budget should be allocated to probabilities
|
1138
|
-
# # (i.e. how probs should be updated) to achieve this decrease in firstterms
|
1139
|
-
# ret = _np.zeros(num_circuits)
|
1140
|
-
# max_delta = _np.zeros(num_circuits) # maximum amount of change in per-circuit budget before hitting a
|
1141
|
-
# # discontinuity in 2nd deriv
|
1142
|
-
# for i in range(num_circuits):
|
1143
|
-
# if mode == "percircuit" and 2 * dlogl_percircuit[i] <= redbox_threshold:
|
1144
|
-
# continue # don't include this circuit's contribution
|
1145
|
-
# elif mode == "aggregate": # all circuits contribute
|
1146
|
-
# prefactor = 1.0
|
1147
|
-
# else: # mode == "both"
|
1148
|
-
# prefactor = 2.0 # contributes twice: once for per-circuit and once for aggregate
|
1149
|
-
# if 2 * dlogl_percircuit[i] <= redbox_threshold:
|
1150
|
-
# if global_criteria_met: continue # no contribution at all_circuits_needing_data
|
1151
|
-
# else: prefactor = 1.0
|
1152
|
-
#
|
1153
|
-
# chis = dlogl_delements[layout.indices_for_index(i)] # ~ f/p (deriv of f*log(p))
|
1154
|
-
# highest_chi, lowest_chi = _np.max(chis), _np.min(chis)
|
1155
|
-
# bmaxes = _np.array(_np.abs(chis - highest_chi) < 1.e-4, dtype=bool)
|
1156
|
-
# bmins = _np.array(_np.abs(chis - lowest_chi) < 1.e-4, dtype=bool)
|
1157
|
-
# maxes = _np.array(_np.abs(chis - _np.max(chis)) < 1.e-4, dtype=int)
|
1158
|
-
# mins = _np.array(_np.abs(chis - _np.min(chis)) < 1.e-4, dtype=int)
|
1159
|
-
#
|
1160
|
-
# next_chis = chis.copy(); next_chis[bmaxes] = 1.0; next_chis[bmins] = 1.0
|
1161
|
-
# #p = current_probs[layout.indices_for_index(i)]
|
1162
|
-
# f = objfn.freqs[layout.indices_for_index(i)]
|
1163
|
-
# next_highest_chi = _np.max(next_chis) # 2nd highest chi value (may be duplicated)
|
1164
|
-
# next_lowest_chi = _np.min(next_chis) # 2nd lowest chi value (may be duplicated)
|
1165
|
-
#
|
1166
|
-
# # 1/chi = p/f, (1/chi'-1/chi) = dp/f => dp = f(chi - chi')/(chi chi')
|
1167
|
-
# delta_p = _np.zeros(chis.shape, 'd')
|
1168
|
-
# delta_p[bmaxes] = f[bmaxes] * (1. / chis[bmaxes] - 1 / next_highest_chi)
|
1169
|
-
# delta_p[bmins] = f[bmins] * (1. / chis[bmins] - 1 / next_lowest_chi)
|
1170
|
-
# max_delta[i] = _np.max(_np.abs(delta_p))
|
1171
|
-
#
|
1172
|
-
# ret[i] = prefactor * _np.sum(chis * (mins / sum(mins) - maxes / sum(maxes)))
|
1173
|
-
# return ret, max_delta
|
1174
|
-
#
|
1175
|
-
#
|
1176
|
-
# for mode in (): #("both",): #("percircuit", "aggregate"): # how many & which criteria to enforce on each pass.
|
1177
|
-
# print("Stage w/mode = ",mode)
|
1178
|
-
# step = 0.01
|
1179
|
-
# itr = 0
|
1180
|
-
# L1grad = L1weights
|
1181
|
-
# imax = None
|
1182
|
-
# last_objfn_value = None; last_probs = None # DEBUG
|
1183
|
-
# last_dlogl_percircuit = last_dlogl_elements = None # DEBUG
|
1184
|
-
# while True:
|
1185
|
-
#
|
1186
|
-
# #Compute current log-likelihood values and derivates wrt probabilities
|
1187
|
-
# dlogl_elements = objfn.raw_objfn.terms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
|
1188
|
-
# for i in range(num_circuits):
|
1189
|
-
# dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
|
1190
|
-
# dlogl_delements = objfn.raw_objfn.dterms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
|
1191
|
-
# two_dlogl_percircuit = 2 * dlogl_percircuit
|
1192
|
-
# two_dlogl = sum(two_dlogl_percircuit)
|
1193
|
-
# global_criteria_met = two_dlogl < two_dlogl_threshold
|
1194
|
-
#
|
1195
|
-
# # check aggregate and per-circuit criteria - exit if met
|
1196
|
-
# if mode == "aggregate":
|
1197
|
-
# objfn_value = max(two_dlogl - two_dlogl_threshold, 0)
|
1198
|
-
# elif mode == "percircuit":
|
1199
|
-
# perbox_residual = sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None))
|
1200
|
-
# objfn_value = perbox_residual
|
1201
|
-
# elif mode == "both":
|
1202
|
-
# objfn_value = max(two_dlogl - two_dlogl_threshold, 0) \
|
1203
|
-
# + sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None))
|
1204
|
-
#
|
1205
|
-
# print("Iter ", itr, ": mode=", mode, " objfn=", objfn_value, " moved in", imax)
|
1206
|
-
# print(" wv=", wv); itr += 1
|
1207
|
-
# if objfn_value < 1e-10: # if global_criteria_met and perbox_residual < 1e-10:
|
1208
|
-
# break # DONE!
|
1209
|
-
# if last_objfn_value is not None and last_objfn_value < objfn_value:
|
1210
|
-
# iproblem = _np.argmax(dlogl_percircuit - last_dlogl_percircuit)
|
1211
|
-
# print("Circuit ",iproblem," dlogl=", last_dlogl_percircuit[iproblem], " => ",
|
1212
|
-
# dlogl_percircuit[iproblem])
|
1213
|
-
# print(" probs: ",last_probs[layout.indices_for_index(iproblem)], " => ",
|
1214
|
-
# current_probs[layout.indices_for_index(iproblem)])
|
1215
|
-
# print(" freqs: ",objfn.freqs[layout.indices_for_index(iproblem)])
|
1216
|
-
# import bpdb; bpdb.set_trace()
|
1217
|
-
# assert(False), "Objective function should be monotonic!!!"
|
1218
|
-
# last_objfn_value = objfn_value
|
1219
|
-
# last_probs = current_probs.copy()
|
1220
|
-
# last_dlogl_percircuit = dlogl_percircuit.copy()
|
1221
|
-
# last_dlogl_elements = dlogl_elements.copy()
|
1222
|
-
#
|
1223
|
-
# #import bpdb; bpdb.set_trace()
|
1224
|
-
# criteria_deriv_wrt_percircuit_budgets, maximum_percircuit_budget_delta = \
|
1225
|
-
# _criteria_deriv(current_probs, dlogl_percircuit, dlogl_delements, mode, global_criteria_met)
|
1226
|
-
# wv_grad = _np.dot(criteria_deriv_wrt_percircuit_budgets, percircuit_budget_deriv) #+ L1grad
|
1227
|
-
# grad_norm = _np.linalg.norm(wv_grad)
|
1228
|
-
# assert(grad_norm > 1e-6), \
|
1229
|
-
# "Gradient norm == 0! - cannot reduce constraint residuals with more wildcard!"
|
1230
|
-
#
|
1231
|
-
# imax = _np.argmax(_np.abs(wv_grad / L1grad)); sgn = _np.sign(wv_grad[imax])
|
1232
|
-
# wv_grad[:] = 0; wv_grad[imax] = sgn
|
1233
|
-
# downhill_direction = (-wv_grad / _np.linalg.norm(wv_grad))
|
1234
|
-
#
|
1235
|
-
# #Constant step:
|
1236
|
-
# #step = 1e-5
|
1237
|
-
# # Variable step: expected reduction = df/dw * dw, so set |dw| = 0.01 * current_f / |df/dw|
|
1238
|
-
# #step = (0.01 * objfn_value / grad_norm)
|
1239
|
-
#
|
1240
|
-
# #Step based on next discontinuity ("breakpoint")
|
1241
|
-
# # require _np.dot(percircuit_budget_deriv, step * downhill_direction) < maximum_percircuit_budget_delta
|
1242
|
-
# step = _np.min(maximum_percircuit_budget_delta / _np.dot(percircuit_budget_deriv, downhill_direction))
|
1243
|
-
# assert(step > 0)
|
1244
|
-
# step = min(step, 1e-5) # don't allow too large of a step...
|
1245
|
-
#
|
1246
|
-
# delta_wv = downhill_direction * step
|
1247
|
-
# wv += delta_wv
|
1248
|
-
#
|
1249
|
-
# delta_percircuit_budgets = _np.dot(percircuit_budget_deriv, delta_wv)
|
1250
|
-
# #assert(_np.all(delta_percircuit_budgets >= 0))
|
1251
|
-
# if not _np.all(delta_percircuit_budgets >= 0):
|
1252
|
-
# import bpdb; bpdb.set_trace()
|
1253
|
-
# pass
|
1254
|
-
#
|
1255
|
-
# delta_probs = _advance_probs(layout, current_probs, dlogl_percircuit, dlogl_delements,
|
1256
|
-
# delta_percircuit_budgets) #, global_criteria_met) # updates current_probs
|
1257
|
-
# print("|delta probs| = ", _np.linalg.norm(delta_probs))
|
1258
|
-
# current_probs += delta_probs
|
1259
|
-
#
|
1260
|
-
# #assert(False), "STOP"
|
1261
|
-
# wv_new = wv
|
1262
|
-
# print("NEW TEST - final wildcard is ", wv_new)
|
1263
|
-
#
|
1264
|
-
#This didn't work well:
|
1265
|
-
##Experiment with "soft" min and max functions to see if that fixes cvxopt getting stuck
|
1266
|
-
## so far, this hasn't helped.
|
1267
|
-
#
|
1268
|
-
#def _softmax(ar):
|
1269
|
-
# return _np.log(_np.sum([_np.exp(x) for x in ar]))
|
1270
|
-
#
|
1271
|
-
#def _softmin(ar):
|
1272
|
-
# return -_np.log(_np.sum([_np.exp(-x) for x in ar]))
|