pyGSTi 0.9.12__cp39-cp39-win_amd64.whl → 0.9.13__cp39-cp39-win_amd64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (225) hide show
  1. pyGSTi-0.9.13.dist-info/METADATA +197 -0
  2. {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/RECORD +211 -220
  3. {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/WHEEL +1 -1
  4. pygsti/_version.py +2 -2
  5. pygsti/algorithms/contract.py +1 -1
  6. pygsti/algorithms/core.py +62 -35
  7. pygsti/algorithms/fiducialpairreduction.py +95 -110
  8. pygsti/algorithms/fiducialselection.py +17 -8
  9. pygsti/algorithms/gaugeopt.py +2 -2
  10. pygsti/algorithms/germselection.py +87 -77
  11. pygsti/algorithms/mirroring.py +0 -388
  12. pygsti/algorithms/randomcircuit.py +165 -1333
  13. pygsti/algorithms/rbfit.py +0 -234
  14. pygsti/baseobjs/basis.py +94 -396
  15. pygsti/baseobjs/errorgenbasis.py +0 -132
  16. pygsti/baseobjs/errorgenspace.py +0 -10
  17. pygsti/baseobjs/label.py +52 -168
  18. pygsti/baseobjs/opcalc/fastopcalc.cp39-win_amd64.pyd +0 -0
  19. pygsti/baseobjs/opcalc/fastopcalc.pyx +2 -2
  20. pygsti/baseobjs/polynomial.py +13 -595
  21. pygsti/baseobjs/protectedarray.py +72 -132
  22. pygsti/baseobjs/statespace.py +1 -0
  23. pygsti/circuits/__init__.py +1 -1
  24. pygsti/circuits/circuit.py +753 -504
  25. pygsti/circuits/circuitconstruction.py +0 -4
  26. pygsti/circuits/circuitlist.py +47 -5
  27. pygsti/circuits/circuitparser/__init__.py +8 -8
  28. pygsti/circuits/circuitparser/fastcircuitparser.cp39-win_amd64.pyd +0 -0
  29. pygsti/circuits/circuitstructure.py +3 -3
  30. pygsti/circuits/cloudcircuitconstruction.py +27 -14
  31. pygsti/data/datacomparator.py +4 -9
  32. pygsti/data/dataset.py +51 -46
  33. pygsti/data/hypothesistest.py +0 -7
  34. pygsti/drivers/bootstrap.py +0 -49
  35. pygsti/drivers/longsequence.py +46 -10
  36. pygsti/evotypes/basereps_cython.cp39-win_amd64.pyd +0 -0
  37. pygsti/evotypes/chp/opreps.py +0 -61
  38. pygsti/evotypes/chp/statereps.py +0 -32
  39. pygsti/evotypes/densitymx/effectcreps.cpp +9 -10
  40. pygsti/evotypes/densitymx/effectreps.cp39-win_amd64.pyd +0 -0
  41. pygsti/evotypes/densitymx/effectreps.pyx +1 -1
  42. pygsti/evotypes/densitymx/opreps.cp39-win_amd64.pyd +0 -0
  43. pygsti/evotypes/densitymx/opreps.pyx +2 -2
  44. pygsti/evotypes/densitymx/statereps.cp39-win_amd64.pyd +0 -0
  45. pygsti/evotypes/densitymx/statereps.pyx +1 -1
  46. pygsti/evotypes/densitymx_slow/effectreps.py +7 -23
  47. pygsti/evotypes/densitymx_slow/opreps.py +16 -23
  48. pygsti/evotypes/densitymx_slow/statereps.py +10 -3
  49. pygsti/evotypes/evotype.py +39 -2
  50. pygsti/evotypes/stabilizer/effectreps.cp39-win_amd64.pyd +0 -0
  51. pygsti/evotypes/stabilizer/effectreps.pyx +0 -4
  52. pygsti/evotypes/stabilizer/opreps.cp39-win_amd64.pyd +0 -0
  53. pygsti/evotypes/stabilizer/opreps.pyx +0 -4
  54. pygsti/evotypes/stabilizer/statereps.cp39-win_amd64.pyd +0 -0
  55. pygsti/evotypes/stabilizer/statereps.pyx +1 -5
  56. pygsti/evotypes/stabilizer/termreps.cp39-win_amd64.pyd +0 -0
  57. pygsti/evotypes/stabilizer/termreps.pyx +0 -7
  58. pygsti/evotypes/stabilizer_slow/effectreps.py +0 -22
  59. pygsti/evotypes/stabilizer_slow/opreps.py +0 -4
  60. pygsti/evotypes/stabilizer_slow/statereps.py +0 -4
  61. pygsti/evotypes/statevec/effectreps.cp39-win_amd64.pyd +0 -0
  62. pygsti/evotypes/statevec/effectreps.pyx +1 -1
  63. pygsti/evotypes/statevec/opreps.cp39-win_amd64.pyd +0 -0
  64. pygsti/evotypes/statevec/opreps.pyx +2 -2
  65. pygsti/evotypes/statevec/statereps.cp39-win_amd64.pyd +0 -0
  66. pygsti/evotypes/statevec/statereps.pyx +1 -1
  67. pygsti/evotypes/statevec/termreps.cp39-win_amd64.pyd +0 -0
  68. pygsti/evotypes/statevec/termreps.pyx +0 -7
  69. pygsti/evotypes/statevec_slow/effectreps.py +0 -3
  70. pygsti/evotypes/statevec_slow/opreps.py +0 -5
  71. pygsti/extras/__init__.py +0 -1
  72. pygsti/extras/drift/signal.py +1 -1
  73. pygsti/extras/drift/stabilityanalyzer.py +3 -1
  74. pygsti/extras/interpygate/__init__.py +12 -0
  75. pygsti/extras/interpygate/core.py +0 -36
  76. pygsti/extras/interpygate/process_tomography.py +44 -10
  77. pygsti/extras/rpe/rpeconstruction.py +0 -2
  78. pygsti/forwardsims/__init__.py +1 -0
  79. pygsti/forwardsims/forwardsim.py +50 -93
  80. pygsti/forwardsims/mapforwardsim.py +78 -20
  81. pygsti/forwardsims/mapforwardsim_calc_densitymx.cp39-win_amd64.pyd +0 -0
  82. pygsti/forwardsims/mapforwardsim_calc_densitymx.pyx +65 -66
  83. pygsti/forwardsims/mapforwardsim_calc_generic.py +91 -13
  84. pygsti/forwardsims/matrixforwardsim.py +72 -17
  85. pygsti/forwardsims/termforwardsim.py +9 -111
  86. pygsti/forwardsims/termforwardsim_calc_stabilizer.cp39-win_amd64.pyd +0 -0
  87. pygsti/forwardsims/termforwardsim_calc_statevec.cp39-win_amd64.pyd +0 -0
  88. pygsti/forwardsims/termforwardsim_calc_statevec.pyx +0 -651
  89. pygsti/forwardsims/torchfwdsim.py +265 -0
  90. pygsti/forwardsims/weakforwardsim.py +2 -2
  91. pygsti/io/__init__.py +1 -2
  92. pygsti/io/mongodb.py +0 -2
  93. pygsti/io/stdinput.py +6 -22
  94. pygsti/layouts/copalayout.py +10 -12
  95. pygsti/layouts/distlayout.py +0 -40
  96. pygsti/layouts/maplayout.py +103 -25
  97. pygsti/layouts/matrixlayout.py +99 -60
  98. pygsti/layouts/prefixtable.py +1534 -52
  99. pygsti/layouts/termlayout.py +1 -1
  100. pygsti/modelmembers/instruments/instrument.py +3 -3
  101. pygsti/modelmembers/instruments/tpinstrument.py +2 -2
  102. pygsti/modelmembers/modelmember.py +0 -17
  103. pygsti/modelmembers/operations/__init__.py +3 -4
  104. pygsti/modelmembers/operations/affineshiftop.py +206 -0
  105. pygsti/modelmembers/operations/composederrorgen.py +1 -1
  106. pygsti/modelmembers/operations/composedop.py +1 -24
  107. pygsti/modelmembers/operations/denseop.py +5 -5
  108. pygsti/modelmembers/operations/eigpdenseop.py +2 -2
  109. pygsti/modelmembers/operations/embeddederrorgen.py +1 -1
  110. pygsti/modelmembers/operations/embeddedop.py +0 -1
  111. pygsti/modelmembers/operations/experrorgenop.py +5 -2
  112. pygsti/modelmembers/operations/fullarbitraryop.py +1 -0
  113. pygsti/modelmembers/operations/fullcptpop.py +2 -2
  114. pygsti/modelmembers/operations/fulltpop.py +28 -6
  115. pygsti/modelmembers/operations/fullunitaryop.py +5 -4
  116. pygsti/modelmembers/operations/lindbladcoefficients.py +93 -78
  117. pygsti/modelmembers/operations/lindbladerrorgen.py +268 -441
  118. pygsti/modelmembers/operations/linearop.py +7 -27
  119. pygsti/modelmembers/operations/opfactory.py +1 -1
  120. pygsti/modelmembers/operations/repeatedop.py +1 -24
  121. pygsti/modelmembers/operations/staticstdop.py +1 -1
  122. pygsti/modelmembers/povms/__init__.py +3 -3
  123. pygsti/modelmembers/povms/basepovm.py +7 -36
  124. pygsti/modelmembers/povms/complementeffect.py +4 -9
  125. pygsti/modelmembers/povms/composedeffect.py +0 -320
  126. pygsti/modelmembers/povms/computationaleffect.py +1 -1
  127. pygsti/modelmembers/povms/computationalpovm.py +3 -1
  128. pygsti/modelmembers/povms/effect.py +3 -5
  129. pygsti/modelmembers/povms/marginalizedpovm.py +3 -81
  130. pygsti/modelmembers/povms/tppovm.py +74 -2
  131. pygsti/modelmembers/states/__init__.py +2 -5
  132. pygsti/modelmembers/states/composedstate.py +0 -317
  133. pygsti/modelmembers/states/computationalstate.py +3 -3
  134. pygsti/modelmembers/states/cptpstate.py +4 -4
  135. pygsti/modelmembers/states/densestate.py +10 -8
  136. pygsti/modelmembers/states/fullpurestate.py +0 -24
  137. pygsti/modelmembers/states/purestate.py +1 -1
  138. pygsti/modelmembers/states/state.py +5 -6
  139. pygsti/modelmembers/states/tpstate.py +28 -10
  140. pygsti/modelmembers/term.py +3 -6
  141. pygsti/modelmembers/torchable.py +50 -0
  142. pygsti/modelpacks/_modelpack.py +1 -1
  143. pygsti/modelpacks/smq1Q_ZN.py +3 -1
  144. pygsti/modelpacks/smq2Q_XXYYII.py +2 -1
  145. pygsti/modelpacks/smq2Q_XY.py +3 -3
  146. pygsti/modelpacks/smq2Q_XYI.py +2 -2
  147. pygsti/modelpacks/smq2Q_XYICNOT.py +3 -3
  148. pygsti/modelpacks/smq2Q_XYICPHASE.py +3 -3
  149. pygsti/modelpacks/smq2Q_XYXX.py +1 -1
  150. pygsti/modelpacks/smq2Q_XYZICNOT.py +3 -3
  151. pygsti/modelpacks/smq2Q_XYZZ.py +1 -1
  152. pygsti/modelpacks/stdtarget.py +0 -121
  153. pygsti/models/cloudnoisemodel.py +1 -2
  154. pygsti/models/explicitcalc.py +3 -3
  155. pygsti/models/explicitmodel.py +3 -13
  156. pygsti/models/fogistore.py +5 -3
  157. pygsti/models/localnoisemodel.py +1 -2
  158. pygsti/models/memberdict.py +0 -12
  159. pygsti/models/model.py +801 -68
  160. pygsti/models/modelconstruction.py +4 -4
  161. pygsti/models/modelnoise.py +2 -2
  162. pygsti/models/modelparaminterposer.py +1 -1
  163. pygsti/models/oplessmodel.py +1 -1
  164. pygsti/models/qutrit.py +15 -14
  165. pygsti/objectivefns/objectivefns.py +75 -140
  166. pygsti/objectivefns/wildcardbudget.py +2 -7
  167. pygsti/optimize/__init__.py +1 -0
  168. pygsti/optimize/arraysinterface.py +28 -0
  169. pygsti/optimize/customcg.py +0 -12
  170. pygsti/optimize/customlm.py +129 -323
  171. pygsti/optimize/customsolve.py +2 -2
  172. pygsti/optimize/optimize.py +0 -84
  173. pygsti/optimize/simplerlm.py +841 -0
  174. pygsti/optimize/wildcardopt.py +19 -598
  175. pygsti/protocols/confidenceregionfactory.py +28 -14
  176. pygsti/protocols/estimate.py +31 -14
  177. pygsti/protocols/gst.py +238 -142
  178. pygsti/protocols/modeltest.py +19 -12
  179. pygsti/protocols/protocol.py +9 -37
  180. pygsti/protocols/rb.py +450 -79
  181. pygsti/protocols/treenode.py +8 -2
  182. pygsti/protocols/vb.py +108 -206
  183. pygsti/protocols/vbdataframe.py +1 -1
  184. pygsti/report/factory.py +0 -15
  185. pygsti/report/fogidiagram.py +1 -17
  186. pygsti/report/modelfunction.py +12 -3
  187. pygsti/report/mpl_colormaps.py +1 -1
  188. pygsti/report/plothelpers.py +11 -3
  189. pygsti/report/report.py +16 -0
  190. pygsti/report/reportables.py +41 -37
  191. pygsti/report/templates/offline/pygsti_dashboard.css +6 -0
  192. pygsti/report/templates/offline/pygsti_dashboard.js +12 -0
  193. pygsti/report/workspace.py +2 -14
  194. pygsti/report/workspaceplots.py +328 -505
  195. pygsti/tools/basistools.py +9 -36
  196. pygsti/tools/edesigntools.py +124 -96
  197. pygsti/tools/fastcalc.cp39-win_amd64.pyd +0 -0
  198. pygsti/tools/fastcalc.pyx +35 -81
  199. pygsti/tools/internalgates.py +151 -15
  200. pygsti/tools/jamiolkowski.py +5 -5
  201. pygsti/tools/lindbladtools.py +19 -11
  202. pygsti/tools/listtools.py +0 -114
  203. pygsti/tools/matrixmod2.py +1 -1
  204. pygsti/tools/matrixtools.py +173 -339
  205. pygsti/tools/nameddict.py +1 -1
  206. pygsti/tools/optools.py +154 -88
  207. pygsti/tools/pdftools.py +0 -25
  208. pygsti/tools/rbtheory.py +3 -320
  209. pygsti/tools/slicetools.py +64 -12
  210. pyGSTi-0.9.12.dist-info/METADATA +0 -157
  211. pygsti/algorithms/directx.py +0 -711
  212. pygsti/evotypes/qibo/__init__.py +0 -33
  213. pygsti/evotypes/qibo/effectreps.py +0 -78
  214. pygsti/evotypes/qibo/opreps.py +0 -376
  215. pygsti/evotypes/qibo/povmreps.py +0 -98
  216. pygsti/evotypes/qibo/statereps.py +0 -174
  217. pygsti/extras/rb/__init__.py +0 -13
  218. pygsti/extras/rb/benchmarker.py +0 -957
  219. pygsti/extras/rb/dataset.py +0 -378
  220. pygsti/extras/rb/io.py +0 -814
  221. pygsti/extras/rb/simulate.py +0 -1020
  222. pygsti/io/legacyio.py +0 -385
  223. pygsti/modelmembers/povms/denseeffect.py +0 -142
  224. {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/LICENSE +0 -0
  225. {pyGSTi-0.9.12.dist-info → pyGSTi-0.9.13.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,25 @@ import numpy as _np
17
17
  from pygsti.objectivefns.wildcardbudget import update_circuit_probs as _update_circuit_probs
18
18
  from pygsti.optimize.optimize import minimize as _minimize
19
19
 
20
+ """Developer notes
21
+
22
+ Removed functions
23
+ -----------------
24
+
25
+ This file used to have three algorithms for optimizing wildcard budgets that relied on
26
+ CVXOPT's nonlinear optimization interface. In June 2024 we investigated whether these
27
+ algorithms could be re-implemented to rely only on CVXPY's modeling capabilities. We
28
+ came to the conclusion that while that may have been possible, it would have involved
29
+ an inordinate amount of work, and that for the sake of maintainability it was better to
30
+ remove these CVXOPT-based algorithms from pyGSTi altogether.
31
+
32
+ Here's a hash for one of the last commits on pyGSTi's develop branch that had these
33
+ algorithms: 723cd24aec3b90d28b0fcd9b31145b920c256acf.
34
+
35
+ See https://github.com/sandialabs/pyGSTi/pull/444 for more information.
36
+
37
+ """
38
+
20
39
 
21
40
  def optimize_wildcard_budget_neldermead(budget, L1weights, wildcard_objfn, two_dlogl_threshold,
22
41
  redbox_threshold, printer, smart_init=True, max_outer_iters=10,
@@ -48,19 +67,6 @@ def optimize_wildcard_budget_neldermead(budget, L1weights, wildcard_objfn, two_d
48
67
 
49
68
  return max(0, two_dlogl - two_dlogl_threshold) + percircuit_penalty
50
69
 
51
- ##For debugging wildcard (see below for suggested insertion point)
52
- #def _wildcard_fit_criteria_debug(wv):
53
- # dlogl_elements = logl_wildcard_fn.lsvec(wv)**2 # b/c WC fn only has sqrt of terms implemented now
54
- # for i in range(num_circuits):
55
- # dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
56
- # two_dlogl_percircuit = 2 * dlogl_percircuit
57
- # two_dlogl = sum(two_dlogl_percircuit)
58
- # print("Aggregate penalty = ", two_dlogl, "-", two_dlogl_threshold, "=", two_dlogl - two_dlogl_threshold)
59
- # print("Per-circuit (redbox) penalty = ", sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None)))
60
- # print(" per-circuit threshold = ", redbox_threshold, " highest violators = ")
61
- # sorted_percircuit = sorted(enumerate(two_dlogl_percircuit), key=lambda x: x[1], reverse=True)
62
- # print('\n'.join(["(%d) %s: %g" % (i, layout.circuits[i].str, val) for i, val in sorted_percircuit[0:10]]))
63
-
64
70
  num_iters = 0
65
71
  wvec_init = budget.to_vector()
66
72
 
@@ -220,44 +226,6 @@ def _get_critical_circuit_budgets(objfn, redbox_threshold):
220
226
  return global_critical_percircuit_budgets
221
227
 
222
228
 
223
- # Aggregate 2-delta-logl criteria (for cvxopt call below, as we want this function to be <= 0)
224
- # - for each circuit, we have the sum of -2Nf*logl(p) + const. terms
225
- # - the derivatives taken below are complicated because they're derivatives with respect to
226
- # the circuit's *wildcard budget*, which is effectively w.r.t `p` except all the p's must
227
- # sum to 1. We compute these derivatives as follows:
228
- #
229
- # - 1st deriv: the first derivative of each term is -Nf/p and N is common to all the terms of
230
- # a single circuit so this is dictated by chi = f/p >= 0. All these terms are positive (the
231
- # deriv is negative), and we want to move probability from the terms with smallest chi to
232
- # largest chi. Note here that positive `p` means *more* wildcard budget and so the largest-chi
233
- # terms have their p_i increase (dp_i = dp) whereas the smallest-chi terms have p_i decrease
234
- # (dp_i = -dp). When multiple terms have the same chi then we split the total dp
235
- # (delta-probability) according to 1 / 2nd-deriv = p**2/Nf. This is so that if
236
- # chi1 = f1/p1 = chi2 = f2/p2 and we want the chi's to remain equal after
237
- # p1 -> p1 + lambda1*dp, p2 -> p2 + lambda2*dp then we get:
238
- # (p1 + lambda1*dp) / f1 = 1/chi1 + lambda1/f1 * dp = 1/chi2 + lambda2/f2 * dp, so
239
- # lambda1/f1 = lambda2/f2 => lambda1/lambda2 = f1/f2. Since lambda1 + lambda2 = 1,
240
- # we get lambda1 (1 + f2/f1) = 1 => lambda1 = f1 / (f1 + f2)
241
- # In general, lambda_i = f_i / sum_fs_with_max_chi.
242
- # Note: f1/p1 = f2/p2 => f1/f2 = p1/p2 so lambda_i also could be = p_i / sum_ps_with_max_chi
243
- # We could also derive by wanting the derivs wrt chi be equal:
244
- # d(chi1)/dp = d(chi2)/dp => -f1/p1**2 * lambda_1 = -f2/p2**2 * lambda_2
245
- # => lambda1/lambda2 = p1/p2 as before (recall dp1 = lambda1 * dp)
246
- # Note that this also means the lambdas could be weighted by the full 2nd deriv: Nf/p**2
247
- # ** IN SUMMARY, the total derivative is:
248
- # -2N * (sum_max_chi(f_i/p_i * lambda_i) - sum_min_chi(f_i/p_i * lambda_i))
249
- # = -2N * (max_chi - min_chi)
250
- #
251
- # - 2nd deriv: same as above, but now different lambda_i matter:
252
- # = 2N * (sum_max_chi(f_i/p_i**2 * lambda_i**2) - sum_min_chi(f_i/p_i**2 * lambda_i**2))
253
- # (where we take the lambda_i as given by the frequencies, so they aren't diff'd)
254
- # If we took lambda_i = p_i / sum_of_ps then we'd get:
255
- # d/dp (f_i/p_i * lambda_i) = -f_i/p_i**2 * lambda_i**2 + f_i/p_i * dlambda_i/dp
256
- # = -f_i/p_i**2 * lambda_i**2 (see below)
257
- # Note dlambda_i/dp = lambda_i / sum_of_ps - p_i / (sum_ps)**2 * sum(lambda_i) = 0
258
- # So we get the same result.
259
-
260
-
261
229
  def _agg_dlogl(current_probs, objfn, two_dlogl_threshold):
262
230
  #Note: current_probs is a *local* quantity
263
231
  p, f, n, N = current_probs, objfn.freqs, objfn.counts, objfn.total_counts
@@ -369,60 +337,6 @@ def _agg_dlogl_hessian(current_probs, objfn, percircuit_budget_deriv, probs_deri
369
337
  return objfn.layout.allsum_local_quantity('c', local_H, use_shared_mem=False)
370
338
 
371
339
 
372
- def _proxy_agg_dlogl(x, tvds, fn0s, percircuit_budget_deriv, two_dlogl_threshold):
373
- # expects percircuit_budget_deriv to be for all (*global*) circuits
374
- percircuit_budgets = _np.dot(percircuit_budget_deriv, x)
375
- num_circuits = percircuit_budgets.shape[0]
376
- a = 4; b = 2 # fit params: must be same in all proxy fns
377
-
378
- f = 0
379
- for i in range(num_circuits):
380
- fn0 = fn0s[i]; tvd = tvds[i]; x = percircuit_budgets[i]
381
- f += (fn0 / _np.exp(a)) * _np.exp(a - b * (x / tvd)**2 - _np.sqrt(2 * b) * (x / tvd))
382
- return f - two_dlogl_threshold
383
-
384
-
385
- def _proxy_agg_dlogl_deriv(x, tvds, fn0s, percircuit_budget_deriv):
386
- # expects percircuit_budget_deriv to be for all (*global*) circuits
387
- percircuit_budgets = _np.dot(percircuit_budget_deriv, x)
388
- num_circuits = percircuit_budgets.shape[0]
389
- a = 4; b = 2 # fit params: must be same in all proxy fns
390
-
391
- agg_dlogl_deriv_wrt_percircuit_budgets = _np.zeros(num_circuits, 'd')
392
- for i in range(num_circuits):
393
- fn0 = fn0s[i]; tvd = tvds[i]; x = percircuit_budgets[i]
394
- agg_dlogl_deriv_wrt_percircuit_budgets[i] = \
395
- (fn0 / _np.exp(a)) * _np.exp(a - b * (x / tvd)**2
396
- - _np.sqrt(2 * b) * (x / tvd)) * (-2 * b * x / tvd**2
397
- - _np.sqrt(2 * b) / tvd)
398
- #This isn't always true in "proxy" case - maybe clip to 0?
399
- #assert(_np.all(agg_dlogl_deriv_wrt_percircuit_budgets <= 0)), \
400
- # "Derivative of aggregate LLR wrt any circuit budget should be negative"
401
- return _np.dot(agg_dlogl_deriv_wrt_percircuit_budgets, percircuit_budget_deriv)
402
-
403
-
404
- def _proxy_agg_dlogl_hessian(x, tvds, fn0s, percircuit_budget_deriv):
405
- # expects percircuit_budget_deriv to be for all (*global*) circuits
406
- percircuit_budgets = _np.dot(percircuit_budget_deriv, x)
407
- num_circuits = percircuit_budgets.shape[0]
408
- a = 4; b = 2 # fit params: must be same in all proxy fns
409
-
410
- agg_dlogl_hessian_wrt_percircuit_budgets = _np.zeros(num_circuits)
411
- for i in range(num_circuits):
412
- fn0 = fn0s[i]; tvd = tvds[i]; x = percircuit_budgets[i]
413
- agg_dlogl_hessian_wrt_percircuit_budgets[i] = \
414
- (fn0 / _np.exp(a)) * _np.exp(a - b * (x / tvd)**2 - _np.sqrt(2 * b) * (x / tvd)) * (
415
- (-2 * b * x / tvd**2 - _np.sqrt(2 * b) / tvd)**2 - 2 * b / tvd**2)
416
- assert(_np.all(agg_dlogl_hessian_wrt_percircuit_budgets >= -1e-8)), \
417
- "Hessian of aggregate LLR wrt any circuit budget should be positive"
418
- H = _np.dot(percircuit_budget_deriv.T,
419
- _np.dot(_np.diag(agg_dlogl_hessian_wrt_percircuit_budgets),
420
- percircuit_budget_deriv)) # (nW, nC)(nC)(nC, nW)
421
- #evals = _np.linalg.eigvals(H)
422
- #assert(_np.all(evals >= -1e-8))
423
- return H
424
-
425
-
426
340
  def _get_percircuit_budget_deriv(budget, layout):
427
341
  """ Returns local_percircuit_budget_deriv, global_percircuit_budget_deriv """
428
342
  percircuit_budget_deriv = budget.precompute_for_same_circuits(layout.circuits) # for *local* circuits
@@ -492,168 +406,6 @@ def optimize_wildcard_bisect_alpha(budget, objfn, two_dlogl_threshold, redbox_th
492
406
  return
493
407
 
494
408
 
495
- def optimize_wildcard_budget_cvxopt(budget, L1weights, objfn, two_dlogl_threshold, redbox_threshold,
496
- printer, abs_tol=1e-5, rel_tol=1e-5, max_iters=50):
497
- """Uses CVXOPT to optimize the wildcard budget. Includes both aggregate and per-circuit constraints."""
498
- #Use cvxopt
499
- import cvxopt as _cvxopt
500
- # Minimize f_0(wv) = |wv|_1 (perhaps weighted) subject to the constraints:
501
- # dot(percircuit_budget_deriv, wv) >= critical_percircuit_budgets
502
- # 2 * aggregate_dlogl <= two_dlogl_threshold => f_1(wv) = 2 * aggregate_dlogl(wv) - threshold <= 0
503
-
504
- layout = objfn.layout
505
- wv = budget.to_vector().copy()
506
- n = len(wv)
507
- x0 = wv.reshape((n, 1)) # TODO - better guess?
508
-
509
- initial_probs = objfn.probs.copy() # *local*
510
- current_probs = initial_probs.copy()
511
- percircuit_budget_deriv, global_percircuit_budget_deriv = _get_percircuit_budget_deriv(budget, layout)
512
-
513
- critical_percircuit_budgets = _get_critical_circuit_budgets(objfn, redbox_threshold) # for *global* circuits
514
- critical_percircuit_budgets.shape = (len(critical_percircuit_budgets), 1)
515
-
516
- _cvxopt.solvers.options['abstol'] = abs_tol
517
- _cvxopt.solvers.options['reltol'] = rel_tol
518
- _cvxopt.solvers.options['maxiters'] = max_iters
519
-
520
- def F(x=None, z=None, debug=True):
521
- if z is None and x is None:
522
- # (m, x0) where m is number of nonlinear constraints and x0 is in domain of f
523
- return (1, _cvxopt.matrix(x0))
524
-
525
- if min(x) < 0.0:
526
- return None # don't allow negative wildcard vector components
527
-
528
- budget.from_vector(_np.array(x))
529
- p_deriv = budget.update_probs(initial_probs, current_probs, objfn.freqs, layout, percircuit_budget_deriv,
530
- return_deriv=True)
531
-
532
- #Evaluate F(x) => return (f, Df)
533
- f = _cvxopt.matrix(_np.array([_agg_dlogl(current_probs, objfn,
534
- two_dlogl_threshold)]).reshape((1, 1))) # shape (m,1)
535
- Df = _cvxopt.matrix(_np.empty((1, n), 'd')) # shape (m, n)
536
- Df[0, :] = _agg_dlogl_deriv(current_probs, objfn, percircuit_budget_deriv, p_deriv)
537
-
538
- if z is None:
539
- return f, Df
540
-
541
- # additionally, compute H = z_0 * Hessian(f_0)(wv)
542
- H = _cvxopt.matrix(z[0] * _agg_dlogl_hessian(current_probs, objfn, percircuit_budget_deriv, p_deriv))
543
- evals = _np.linalg.eigvals(H)
544
- assert(_np.all(evals >= -1e-8)) # tests *global* H
545
- return f, Df, H
546
-
547
- #check_fd([0.0001] * n, True)
548
-
549
- #CVXOPT
550
- printer.log("Beginning cvxopt.cpl solve...")
551
- c = _cvxopt.matrix(L1weights.reshape((n, 1)))
552
- G = -_cvxopt.matrix(_np.concatenate((global_percircuit_budget_deriv, _np.identity(n, 'd')), axis=0))
553
- h = -_cvxopt.matrix(_np.concatenate((critical_percircuit_budgets, _np.zeros((n, 1), 'd')), axis=0))
554
- #result = _cvxopt.solvers.cpl(c, F) # kktsolver='ldl2'
555
- result = _cvxopt.solvers.cpl(c, F, G, h) # kktsolver='ldl2'
556
-
557
- #This didn't seem to help much:
558
- #print("Attempting restart...")
559
- #x0[:,0] = list(result['x'])
560
- #result = _cvxopt.solvers.cpl(c, F) # kktsolver='ldl2'
561
-
562
- printer.log("CVXOPT result = " + str(result))
563
- printer.log("x = " + str(list(result['x'])))
564
- printer.log("y = " + str(list(result['y'])))
565
- printer.log("znl = " + str(list(result['znl'])))
566
- printer.log("snl = " + str(list(result['snl'])))
567
- budget.from_vector(result['x'])
568
- return
569
-
570
-
571
- def optimize_wildcard_budget_cvxopt_zeroreg(budget, L1weights, objfn, two_dlogl_threshold, redbox_threshold,
572
- printer, abs_tol=1e-5, rel_tol=1e-5, max_iters=50, small=1e-6):
573
- """Adds regularization of the L1 term around zero values of the budget. This doesn't seem to help much."""
574
- #Use cvxopt
575
- import cvxopt as _cvxopt
576
- # Minimize f_0(wv) = |wv|_1 (perhaps weighted) subject to the constraints:
577
- # dot(percircuit_budget_deriv, wv) >= critical_percircuit_budgets
578
- # 2 * aggregate_dlogl <= two_dlogl_threshold => f_1(wv) = 2 * aggregate_dlogl(wv) - threshold <= 0
579
-
580
- layout = objfn.layout
581
- wv = budget.to_vector().copy()
582
- n = len(wv)
583
- x0 = wv.reshape((n, 1))
584
- c = L1weights.reshape((n, 1))
585
- SMALL2 = small**2
586
-
587
- initial_probs = objfn.probs.copy()
588
- current_probs = initial_probs.copy()
589
- percircuit_budget_deriv, global_percircuit_budget_deriv = _get_percircuit_budget_deriv(budget, layout)
590
-
591
- critical_percircuit_budgets = _get_critical_circuit_budgets(objfn, redbox_threshold)
592
- critical_percircuit_budgets.shape = (len(critical_percircuit_budgets), 1)
593
- assert(_np.all(critical_percircuit_budgets >= 0))
594
- assert(_np.all(percircuit_budget_deriv >= 0))
595
-
596
- _cvxopt.solvers.options['abstol'] = abs_tol
597
- _cvxopt.solvers.options['reltol'] = rel_tol
598
- _cvxopt.solvers.options['maxiters'] = max_iters
599
-
600
- def F(x=None, z=None):
601
- if z is None and x is None:
602
- # (m, x0) where m is number of nonlinear constraints and x0 is in domain of f
603
- return (1, _cvxopt.matrix(x0))
604
-
605
- if min(x) < 0.0:
606
- return None # don't allow negative wildcard vector components
607
-
608
- budget.from_vector(x)
609
- p_deriv = budget.update_probs(initial_probs, current_probs, objfn.freqs, layout, percircuit_budget_deriv,
610
- return_deriv=True)
611
-
612
- #Evaluate F(x) => return (f, Df)
613
- sqrtVec = _np.sqrt((c * x)**2 + SMALL2)
614
- f = _cvxopt.matrix(_np.array([float(_np.sum(sqrtVec)),
615
- _agg_dlogl(current_probs, objfn,
616
- two_dlogl_threshold)]).reshape((2, 1))) # shape (m+1,1)
617
-
618
- L1term_grad = c if SMALL2 == 0.0 else c**2 * x / sqrtVec
619
- Df = _cvxopt.matrix(_np.empty((2, n), 'd')) # shape (m+1, n)
620
- Df[0, :] = L1term_grad[:, 0]
621
- Df[1, :] = _agg_dlogl_deriv(current_probs, objfn, percircuit_budget_deriv, p_deriv)
622
- #print("rank Df=", _np.linalg.matrix_rank(Df))
623
- if z is None:
624
- return f, Df
625
-
626
- # additionally, compute H = z_0 * Hessian(f_0)(wv) + z_1 * Hessian(f_1)(wv)
627
- L1_term_hess = _np.zeros((n, n), 'd') if SMALL2 == 0.0 else \
628
- _np.diag(-1.0 / (sqrtVec**3) * (c**2 * x)**2 + c**2 / sqrtVec)
629
- Hf = _cvxopt.matrix(z[0] * L1_term_hess + z[1] * _agg_dlogl_hessian(current_probs, objfn,
630
- percircuit_budget_deriv, p_deriv))
631
- #print("rank Hf=", _np.linalg.matrix_rank(Hf), " z[1]=",z[1])
632
- return f, Df, Hf
633
-
634
- #CVXOPT
635
- printer.log("Beginning cvxopt.cp solve...")
636
- #print("Rank G = ",_np.linalg.matrix_rank(percircuit_budget_deriv))
637
- #result = _cvxopt.solvers.cp(F)
638
- # Condition is Gx <= h => -Gx >= -h
639
- G = -_cvxopt.matrix(_np.concatenate((global_percircuit_budget_deriv, _np.identity(n, 'd')), axis=0))
640
- h = -_cvxopt.matrix(_np.concatenate((critical_percircuit_budgets, _np.zeros((n, 1), 'd')), axis=0))
641
- result = _cvxopt.solvers.cp(F, G, h)
642
-
643
- #This didn't seem to help much:
644
- #print("Attempting restart...")
645
- #x0[:,0] = list(result['x'])
646
- #result = _cvxopt.solvers.cpl(c, F) # kktsolver='ldl2'
647
-
648
- printer.log("CVXOPT result = " + str(result))
649
- printer.log("x = " + str(list(result['x'])))
650
- printer.log("y = " + str(list(result['y'])))
651
- printer.log("znl = " + str(list(result['znl'])))
652
- printer.log("snl = " + str(list(result['snl'])))
653
- budget.from_vector(result['x'])
654
- return
655
-
656
-
657
409
  def optimize_wildcard_budget_barrier(budget, L1weights, objfn, two_dlogl_threshold,
658
410
  redbox_threshold, printer, tol=1e-7, max_iters=50, num_steps=3,
659
411
  save_debugplot_data=False):
@@ -776,13 +528,6 @@ def optimize_wildcard_budget_barrier(budget, L1weights, objfn, two_dlogl_thresho
776
528
  Hobj = t * _np.diag(-1.0 / (sqrtVec**3) * (c**2 * x)**2 + c**2 / sqrtVec) + Hbarrier
777
529
  return obj, Dobj, Hobj
778
530
 
779
- #import scipy.optimize
780
- #def barrier_obj(x):
781
- # x = _np.clip(x, 1e-10, None)
782
- # return t * _np.dot(c.T, x) - _np.log(-barrierF(x, False))
783
- #result = scipy.optimize.minimize(barrier_obj, x, method="CG")
784
- #x = _np.clip(result.x, 0, None)
785
-
786
531
  x, debug_x_list = NewtonSolve(x, NewtonObjective, NewtonObjective_derivs, tol, max_iters, printer - 1)
787
532
  #x, debug_x_list = NewtonSolve(x, NewtonObjective, None, tol, max_iters, printer - 1) # use finite-diff derivs
788
533
 
@@ -924,86 +669,6 @@ def NewtonSolve(initial_x, fn, fn_with_derivs=None, dx_tol=1e-6, max_iters=20, p
924
669
  return x, x_list
925
670
 
926
671
 
927
- def optimize_wildcard_budget_cvxopt_smoothed(budget, L1weights, objfn, two_dlogl_threshold, redbox_threshold,
928
- printer, abs_tol=1e-5, rel_tol=1e-5, max_iters=50):
929
- """
930
- Uses a smooted version of the objective function. Doesn't seem to help much.
931
-
932
- The thinking here was to eliminate the 2nd derivative discontinuities of the original problem.
933
- """
934
- import cvxopt as _cvxopt
935
-
936
- layout = objfn.layout
937
- wv = budget.to_vector().copy()
938
- n = len(wv)
939
- x0 = wv.reshape((n, 1)) # TODO - better guess?
940
-
941
- #initial_probs = objfn.probs.copy()
942
- #current_probs = initial_probs.copy()
943
- percircuit_budget_deriv, global_percircuit_budget_deriv = _get_percircuit_budget_deriv(budget, layout)
944
- critical_percircuit_budgets = _get_critical_circuit_budgets(objfn, redbox_threshold)
945
- critical_percircuit_budgets.shape = (len(critical_percircuit_budgets), 1)
946
- num_circuits = len(layout.circuits)
947
-
948
- _cvxopt.solvers.options['abstol'] = abs_tol
949
- _cvxopt.solvers.options['reltol'] = rel_tol
950
- _cvxopt.solvers.options['maxiters'] = max_iters
951
-
952
- #Prepare for proxy_barrierF evaluations
953
- local_tvds = _np.zeros(num_circuits, 'd')
954
- local_fn0s = _np.zeros(num_circuits, 'd')
955
- for i in range(num_circuits):
956
- p = objfn.probs[layout.indices_for_index(i)]
957
- f = objfn.freqs[layout.indices_for_index(i)]
958
- nn = objfn.counts[layout.indices_for_index(i)] # don't re-use 'n' variable!
959
- N = objfn.total_counts[layout.indices_for_index(i)]
960
- dlogl_elements = objfn.raw_objfn.terms(p, nn, N, f) # N * f * _np.log(f / p)
961
- local_fn0s[i] = 2 * _np.sum(dlogl_elements)
962
- local_tvds[i] = 0.5 * _np.sum(_np.abs(p - f))
963
- tvds = layout.allgather_local_array('c', local_tvds)
964
- fn0s = layout.allgather_local_array('c', local_fn0s)
965
-
966
- def F(x=None, z=None, debug=True):
967
- if z is None and x is None:
968
- # (m, x0) where m is number of nonlinear constraints and x0 is in domain of f
969
- return (1, _cvxopt.matrix(x0))
970
-
971
- if min(x) < 0.0:
972
- return None # don't allow negative wildcard vector components
973
-
974
- #budget.from_vector(_np.array(x))
975
- #budget.update_probs(initial_probs, current_probs, objfn.freqs, layout, percircuit_budget_deriv)
976
-
977
- #Evaluate F(x) => return (f, Df)
978
- f = _cvxopt.matrix(_np.array([_proxy_agg_dlogl(x, tvds, fn0s, global_percircuit_budget_deriv,
979
- two_dlogl_threshold)]).reshape((1, 1))) # shape (m,1)
980
- Df = _cvxopt.matrix(_np.empty((1, n), 'd')) # shape (m, n)
981
- Df[0, :] = _proxy_agg_dlogl_deriv(x, tvds, fn0s, global_percircuit_budget_deriv)
982
-
983
- if z is None:
984
- return f, Df
985
-
986
- # additionally, compute H = z_0 * Hessian(f_0)(wv)
987
- H = _cvxopt.matrix(z[0] * _proxy_agg_dlogl_hessian(x, tvds, fn0s, global_percircuit_budget_deriv))
988
- evals = _np.linalg.eigvals(H)
989
- assert(_np.all(evals >= -1e-8))
990
- return f, Df, H
991
-
992
- printer.log("Beginning cvxopt.cpl solve with smoothed (proxy) fn...")
993
- c = _cvxopt.matrix(L1weights.reshape((n, 1)))
994
- G = -_cvxopt.matrix(_np.concatenate((global_percircuit_budget_deriv, _np.identity(n, 'd')), axis=0))
995
- h = -_cvxopt.matrix(_np.concatenate((critical_percircuit_budgets, _np.zeros((n, 1), 'd')), axis=0))
996
- result = _cvxopt.solvers.cpl(c, F, G, h) # kktsolver='ldl2'
997
-
998
- printer.log("CVXOPT result = " + str(result))
999
- printer.log("x = " + str(list(result['x'])))
1000
- printer.log("y = " + str(list(result['y'])))
1001
- printer.log("znl = " + str(list(result['znl'])))
1002
- printer.log("snl = " + str(list(result['snl'])))
1003
- budget.from_vector(result['x'])
1004
- return
1005
-
1006
-
1007
672
  def _compute_fd(x, fn, compute_hessian=True, eps=1e-7):
1008
673
  x_len = len(x)
1009
674
  grad = _np.zeros(x_len, 'd')
@@ -1026,247 +691,3 @@ def _compute_fd(x, fn, compute_hessian=True, eps=1e-7):
1026
691
  f_eps_kl = fn(x_eps_kl)
1027
692
  hess[k, l] = (f_eps_kl - f_eps_k - f_eps_l + f0) / eps**2
1028
693
  return grad, hess
1029
-
1030
-
1031
- #DEBUG: check with finite diff derivatives:
1032
- #def _check_fd(wv_base, chk_hessian=False):
1033
- # wv_base = _np.array(wv_base, 'd') # [0.0001]*3
1034
- # wv_len = len(wv_base)
1035
- # grad = _np.zeros(wv_len, 'd')
1036
- # f0, grad_chk = F(wv_base, debug=False)
1037
- # eps = 1e-7
1038
- # for k in range(len(wv_base)):
1039
- # wv_eps = wv_base.copy(); wv_eps[k] += eps
1040
- # f_eps, _ = F(wv_eps, debug=False)
1041
- # grad[k] = (f_eps[0] - f0[0]) / eps
1042
- # rel_diff_norm = _np.linalg.norm(grad - grad_chk) / _np.linalg.norm(grad)
1043
- # #print("GRAD CHECK:")
1044
- # #print(grad)
1045
- # #print(grad_chk)
1046
- # #print(" diff = ",grad - grad_chk, " rel_diff_norm=", rel_diff_norm)
1047
- # print("GRAD CHK ", rel_diff_norm)
1048
- # assert(rel_diff_norm < 1e-3)
1049
- # if chk_hessian is False: return
1050
- #
1051
- # hess = _np.zeros((wv_len, wv_len), 'd')
1052
- # f0, _, H_chk = F(wv_base, [1.0], debug=False)
1053
- # eps = 1e-7
1054
- # for k in range(wv_len):
1055
- # wv_eps_k = wv_base.copy(); wv_eps_k[k] += eps
1056
- # f_eps_k, _ = F(wv_eps_k, debug=False)
1057
- # for l in range(wv_len):
1058
- # wv_eps_l = wv_base.copy(); wv_eps_l[l] += eps
1059
- # f_eps_l, _ = F(wv_eps_l, debug=False)
1060
- # wv_eps_kl = wv_eps_k.copy(); wv_eps_kl[l] += eps
1061
- # f_eps_kl, _ = F(wv_eps_kl, debug=False)
1062
- # hess[k, l] = (f_eps_kl[0] - f_eps_k[0] - f_eps_l[0] + f0[0]) / eps**2
1063
- # rel_diff_norm = _np.linalg.norm(hess - H_chk) / _np.linalg.norm(hess)
1064
- # #print("HESSIAN CHECK:")
1065
- # #print(hess)
1066
- # #print(H_chk)
1067
- # #print(" diff = ",hess - H_chk, " rel_diff_norm=", rel_diff_norm)
1068
- # print("HESS CHK ", rel_diff_norm)
1069
- # #assert(rel_diff_norm < 5e-2)
1070
-
1071
-
1072
- #UNUSED?
1073
- #def _wildcard_objective_firstterms(current_probs):
1074
- # dlogl_elements = objfn.raw_objfn.terms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
1075
- # for i in range(num_circuits):
1076
- # dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
1077
- #
1078
- # two_dlogl_percircuit = 2 * dlogl_percircuit
1079
- # two_dlogl = sum(two_dlogl_percircuit)
1080
- # return max(0, two_dlogl - two_dlogl_threshold) \
1081
- # + sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None))
1082
- #
1083
- #def _advance_probs(layout, current_probs, dlogl_percircuit, dlogl_delements, delta_percircuit_budgets):
1084
- # num_circuits = len(layout.circuits)
1085
- # delta_probs = _np.zeros(len(current_probs), 'd')
1086
- # for i in range(num_circuits):
1087
- # #if 2 * dlogl_percircuit[i] <= redbox_threshold and global_criteria_met: continue
1088
- #
1089
- # step = delta_percircuit_budgets[i]
1090
- # #p = current_probs[layout.indices_for_index(i)]
1091
- # chis = dlogl_delements[layout.indices_for_index(i)]
1092
- # maxes = _np.array(_np.abs(chis - _np.max(chis)) < 1.e-4, dtype=int)
1093
- # mins = _np.array(_np.abs(chis - _np.min(chis)) < 1.e-4, dtype=int)
1094
- # add_to = step * mins / sum(mins)
1095
- # take_from = step * maxes / sum(maxes)
1096
- # delta_probs[layout.indices_for_index(i)] = add_to - take_from
1097
- # return delta_probs
1098
- #
1099
- #
1100
- #def wildcard_probs_propagation(budget, initial_wv, final_wv, objfn, layout, num_steps=10):
1101
- # #Begin with a zero budget
1102
- # current_probs = objfn.probs.copy()
1103
- #
1104
- # percircuit_budget_deriv = budget.precompute_for_same_circuits(layout.circuits)
1105
- # dlogl_percircuit = objfn.percircuit()
1106
- #
1107
- # num_circuits = len(layout.circuits)
1108
- # assert(len(dlogl_percircuit) == num_circuits)
1109
- #
1110
- # delta_wv = (final_wv - initial_wv) / num_steps
1111
- # wv = initial_wv.copy()
1112
- # for i in range(nSteps):
1113
- # wv += delta_wv
1114
- # dlogl_elements = objfn.raw_objfn.terms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
1115
- # for i in range(num_circuits):
1116
- # dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
1117
- # dlogl_delements = objfn.raw_objfn.dterms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
1118
- #
1119
- # two_dlogl = sum(2 * dlogl_percircuit)
1120
- # perbox_residual = sum(_np.clip(2 * dlogl_percircuit - redbox_threshold, 0, None))
1121
- # print("Advance: global=", two_dlogl - two_dlogl_threshold, " percircuit=", perbox_residual)
1122
- # print(" wv=", wv)
1123
- #
1124
- # delta_percircuit_budgets = _np.dot(percircuit_budget_deriv, delta_wv)
1125
- # delta_probs = _advance_probs(layout, current_probs, dlogl_percircuit,
1126
- # dlogl_delements, delta_percircuit_budgets) # updates current_probs
1127
- # print("|delta probs| = ", _np.linalg.norm(delta_probs))
1128
- # current_probs += delta_probs
1129
- # return currrent_probs
1130
- #def wildcard_opt_by_propagation() #TODO
1131
- # # Time-evolution approach: Walk downhill in steps until constraints ("firstterms") are satisfied
1132
- # #wv = budget.to_vector().copy()
1133
- #
1134
- # def _criteria_deriv(current_probs, dlogl_percircuit, dlogl_delements, mode, global_criteria_met):
1135
- # # derivative of firstterms wrt per-circuit wilcard budgets - namely if that budget goes up how to most
1136
- # # efficiently reduce firstterms
1137
- # # in doing so, this computes how the per-circuit budget should be allocated to probabilities
1138
- # # (i.e. how probs should be updated) to achieve this decrease in firstterms
1139
- # ret = _np.zeros(num_circuits)
1140
- # max_delta = _np.zeros(num_circuits) # maximum amount of change in per-circuit budget before hitting a
1141
- # # discontinuity in 2nd deriv
1142
- # for i in range(num_circuits):
1143
- # if mode == "percircuit" and 2 * dlogl_percircuit[i] <= redbox_threshold:
1144
- # continue # don't include this circuit's contribution
1145
- # elif mode == "aggregate": # all circuits contribute
1146
- # prefactor = 1.0
1147
- # else: # mode == "both"
1148
- # prefactor = 2.0 # contributes twice: once for per-circuit and once for aggregate
1149
- # if 2 * dlogl_percircuit[i] <= redbox_threshold:
1150
- # if global_criteria_met: continue # no contribution at all_circuits_needing_data
1151
- # else: prefactor = 1.0
1152
- #
1153
- # chis = dlogl_delements[layout.indices_for_index(i)] # ~ f/p (deriv of f*log(p))
1154
- # highest_chi, lowest_chi = _np.max(chis), _np.min(chis)
1155
- # bmaxes = _np.array(_np.abs(chis - highest_chi) < 1.e-4, dtype=bool)
1156
- # bmins = _np.array(_np.abs(chis - lowest_chi) < 1.e-4, dtype=bool)
1157
- # maxes = _np.array(_np.abs(chis - _np.max(chis)) < 1.e-4, dtype=int)
1158
- # mins = _np.array(_np.abs(chis - _np.min(chis)) < 1.e-4, dtype=int)
1159
- #
1160
- # next_chis = chis.copy(); next_chis[bmaxes] = 1.0; next_chis[bmins] = 1.0
1161
- # #p = current_probs[layout.indices_for_index(i)]
1162
- # f = objfn.freqs[layout.indices_for_index(i)]
1163
- # next_highest_chi = _np.max(next_chis) # 2nd highest chi value (may be duplicated)
1164
- # next_lowest_chi = _np.min(next_chis) # 2nd lowest chi value (may be duplicated)
1165
- #
1166
- # # 1/chi = p/f, (1/chi'-1/chi) = dp/f => dp = f(chi - chi')/(chi chi')
1167
- # delta_p = _np.zeros(chis.shape, 'd')
1168
- # delta_p[bmaxes] = f[bmaxes] * (1. / chis[bmaxes] - 1 / next_highest_chi)
1169
- # delta_p[bmins] = f[bmins] * (1. / chis[bmins] - 1 / next_lowest_chi)
1170
- # max_delta[i] = _np.max(_np.abs(delta_p))
1171
- #
1172
- # ret[i] = prefactor * _np.sum(chis * (mins / sum(mins) - maxes / sum(maxes)))
1173
- # return ret, max_delta
1174
- #
1175
- #
1176
- # for mode in (): #("both",): #("percircuit", "aggregate"): # how many & which criteria to enforce on each pass.
1177
- # print("Stage w/mode = ",mode)
1178
- # step = 0.01
1179
- # itr = 0
1180
- # L1grad = L1weights
1181
- # imax = None
1182
- # last_objfn_value = None; last_probs = None # DEBUG
1183
- # last_dlogl_percircuit = last_dlogl_elements = None # DEBUG
1184
- # while True:
1185
- #
1186
- # #Compute current log-likelihood values and derivates wrt probabilities
1187
- # dlogl_elements = objfn.raw_objfn.terms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
1188
- # for i in range(num_circuits):
1189
- # dlogl_percircuit[i] = _np.sum(dlogl_elements[layout.indices_for_index(i)], axis=0)
1190
- # dlogl_delements = objfn.raw_objfn.dterms(current_probs, objfn.counts, objfn.total_counts, objfn.freqs)
1191
- # two_dlogl_percircuit = 2 * dlogl_percircuit
1192
- # two_dlogl = sum(two_dlogl_percircuit)
1193
- # global_criteria_met = two_dlogl < two_dlogl_threshold
1194
- #
1195
- # # check aggregate and per-circuit criteria - exit if met
1196
- # if mode == "aggregate":
1197
- # objfn_value = max(two_dlogl - two_dlogl_threshold, 0)
1198
- # elif mode == "percircuit":
1199
- # perbox_residual = sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None))
1200
- # objfn_value = perbox_residual
1201
- # elif mode == "both":
1202
- # objfn_value = max(two_dlogl - two_dlogl_threshold, 0) \
1203
- # + sum(_np.clip(two_dlogl_percircuit - redbox_threshold, 0, None))
1204
- #
1205
- # print("Iter ", itr, ": mode=", mode, " objfn=", objfn_value, " moved in", imax)
1206
- # print(" wv=", wv); itr += 1
1207
- # if objfn_value < 1e-10: # if global_criteria_met and perbox_residual < 1e-10:
1208
- # break # DONE!
1209
- # if last_objfn_value is not None and last_objfn_value < objfn_value:
1210
- # iproblem = _np.argmax(dlogl_percircuit - last_dlogl_percircuit)
1211
- # print("Circuit ",iproblem," dlogl=", last_dlogl_percircuit[iproblem], " => ",
1212
- # dlogl_percircuit[iproblem])
1213
- # print(" probs: ",last_probs[layout.indices_for_index(iproblem)], " => ",
1214
- # current_probs[layout.indices_for_index(iproblem)])
1215
- # print(" freqs: ",objfn.freqs[layout.indices_for_index(iproblem)])
1216
- # import bpdb; bpdb.set_trace()
1217
- # assert(False), "Objective function should be monotonic!!!"
1218
- # last_objfn_value = objfn_value
1219
- # last_probs = current_probs.copy()
1220
- # last_dlogl_percircuit = dlogl_percircuit.copy()
1221
- # last_dlogl_elements = dlogl_elements.copy()
1222
- #
1223
- # #import bpdb; bpdb.set_trace()
1224
- # criteria_deriv_wrt_percircuit_budgets, maximum_percircuit_budget_delta = \
1225
- # _criteria_deriv(current_probs, dlogl_percircuit, dlogl_delements, mode, global_criteria_met)
1226
- # wv_grad = _np.dot(criteria_deriv_wrt_percircuit_budgets, percircuit_budget_deriv) #+ L1grad
1227
- # grad_norm = _np.linalg.norm(wv_grad)
1228
- # assert(grad_norm > 1e-6), \
1229
- # "Gradient norm == 0! - cannot reduce constraint residuals with more wildcard!"
1230
- #
1231
- # imax = _np.argmax(_np.abs(wv_grad / L1grad)); sgn = _np.sign(wv_grad[imax])
1232
- # wv_grad[:] = 0; wv_grad[imax] = sgn
1233
- # downhill_direction = (-wv_grad / _np.linalg.norm(wv_grad))
1234
- #
1235
- # #Constant step:
1236
- # #step = 1e-5
1237
- # # Variable step: expected reduction = df/dw * dw, so set |dw| = 0.01 * current_f / |df/dw|
1238
- # #step = (0.01 * objfn_value / grad_norm)
1239
- #
1240
- # #Step based on next discontinuity ("breakpoint")
1241
- # # require _np.dot(percircuit_budget_deriv, step * downhill_direction) < maximum_percircuit_budget_delta
1242
- # step = _np.min(maximum_percircuit_budget_delta / _np.dot(percircuit_budget_deriv, downhill_direction))
1243
- # assert(step > 0)
1244
- # step = min(step, 1e-5) # don't allow too large of a step...
1245
- #
1246
- # delta_wv = downhill_direction * step
1247
- # wv += delta_wv
1248
- #
1249
- # delta_percircuit_budgets = _np.dot(percircuit_budget_deriv, delta_wv)
1250
- # #assert(_np.all(delta_percircuit_budgets >= 0))
1251
- # if not _np.all(delta_percircuit_budgets >= 0):
1252
- # import bpdb; bpdb.set_trace()
1253
- # pass
1254
- #
1255
- # delta_probs = _advance_probs(layout, current_probs, dlogl_percircuit, dlogl_delements,
1256
- # delta_percircuit_budgets) #, global_criteria_met) # updates current_probs
1257
- # print("|delta probs| = ", _np.linalg.norm(delta_probs))
1258
- # current_probs += delta_probs
1259
- #
1260
- # #assert(False), "STOP"
1261
- # wv_new = wv
1262
- # print("NEW TEST - final wildcard is ", wv_new)
1263
- #
1264
- #This didn't work well:
1265
- ##Experiment with "soft" min and max functions to see if that fixes cvxopt getting stuck
1266
- ## so far, this hasn't helped.
1267
- #
1268
- #def _softmax(ar):
1269
- # return _np.log(_np.sum([_np.exp(x) for x in ar]))
1270
- #
1271
- #def _softmin(ar):
1272
- # return -_np.log(_np.sum([_np.exp(-x) for x in ar]))