iqm-benchmarks 2.43__py3-none-any.whl → 2.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iqm-benchmarks might be problematic. Click here for more details.

mGST/algorithm.py CHANGED
@@ -28,7 +28,7 @@ from mGST.optimization import (
28
28
  from mGST.reporting.figure_gen import plot_objf
29
29
 
30
30
 
31
- def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
31
+ def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3, mle=False):
32
32
  """Riemannian saddle free Newton step on the POVM parametrization
33
33
 
34
34
  Parameters
@@ -73,7 +73,7 @@ def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
73
73
  Fyy = np.zeros((n_povm, r, n_povm, r)).astype(np.complex128)
74
74
 
75
75
  X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
76
- dA_, dMdM, dMconjdM, dconjdA = ddA_derivs(X, A, B, J, y, r, pdim, n_povm)
76
+ dA_, dMdM, dMconjdM, dconjdA = ddA_derivs(X, A, B, J, y, r, pdim, n_povm, mle=mle)
77
77
 
78
78
  # Second derivatives
79
79
  for i in range(n_povm):
@@ -133,12 +133,12 @@ def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
133
133
 
134
134
  Delta = tangent_proj(A, Delta_A, 1, n_povm)[0]
135
135
 
136
- a = minimize(lineobjf_A_geodesic, 1e-9, args=(Delta, X, A, rho, J, y), method="COBYLA").x
136
+ a = minimize(lineobjf_A_geodesic, 1e-9, args=(Delta, X, A, rho, J, y, mle), method="COBYLA").x
137
137
  A_new = update_A_geodesic(A, Delta, a)
138
138
  return A_new
139
139
 
140
140
 
141
- def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
141
+ def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3, mle=False):
142
142
  """Riemannian saddle free Newton step on the initial state parametrization
143
143
 
144
144
  Parameters
@@ -180,11 +180,9 @@ def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
180
180
  E = np.array([(A[i].T.conj() @ A[i]).reshape(-1) for i in range(n_povm)])
181
181
  H = np.zeros((2, nt, 2, nt)).astype(np.complex128)
182
182
  P_T = np.zeros((2, nt, 2, nt)).astype(np.complex128)
183
- Fyconjy = np.zeros((r, r)).astype(np.complex128)
184
- Fyy = np.zeros((r, r)).astype(np.complex128)
185
183
 
186
184
  X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
187
- dB_, dMdM, dMconjdM, dconjdB = ddB_derivs(X, A, B, J, y, r, pdim)
185
+ dB_, dMdM, dMconjdM, dconjdB = ddB_derivs(X, A, B, J, y, r, pdim, mle=mle)
188
186
 
189
187
  # Second derivatives
190
188
  Fyconjy = dMconjdM + dconjdB
@@ -241,14 +239,15 @@ def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
241
239
  Delta = (H_abs_inv @ G)[:nt]
242
240
  # Projection onto tangent space
243
241
  Delta = Delta - Y * (Y.T.conj() @ Delta + Delta.T.conj() @ Y) / 2
244
- res = minimize(lineobjf_B_geodesic, 1e-9, args=(Delta, X, E, B, J, y), method="COBYLA", options={"maxiter": 20})
242
+ res = minimize(
243
+ lineobjf_B_geodesic, 1e-9, args=(Delta, X, E, B, J, y, mle), method="COBYLA", options={"maxiter": 20}
244
+ )
245
245
  a = res.x
246
-
247
246
  B_new = update_B_geodesic(B, Delta, a)
248
247
  return B_new
249
248
 
250
249
 
251
- def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA"):
250
+ def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA", mle=False):
252
251
  """Do Riemannian gradient descent optimization step on gates
253
252
 
254
253
  Parameters
@@ -291,7 +290,7 @@ def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA"):
291
290
  Delta = np.zeros((d, n, pdim)).astype(np.complex128)
292
291
  X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
293
292
 
294
- dK_ = dK(X, K, E, rho, J, y, d, r, rK)
293
+ dK_ = dK(X, K, E, rho, J, y, d, r, rK, mle=mle)
295
294
  for k in np.where(~fixed_gates)[0]:
296
295
  # derivative
297
296
  Fy = dK_[k].reshape(n, pdim)
@@ -299,14 +298,20 @@ def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA"):
299
298
  # Riem. gradient taken from conjugate derivative
300
299
  rGrad = 2 * (Fy.conj() - Y @ Fy.T @ Y)
301
300
  Delta[k] = rGrad
302
- res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y), method=ls, options={"maxiter": 200})
301
+
302
+ # Additional projection onto tangent space to avoid numerical instability
303
+ Delta = tangent_proj(K, Delta, d, rK)
304
+
305
+ res = minimize(
306
+ lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y, mle), method=ls, options={"maxiter": 200}
307
+ )
303
308
  a = res.x
304
309
  K_new = update_K_geodesic(K, Delta, a)
305
310
 
306
311
  return K_new
307
312
 
308
313
 
309
- def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=None):
314
+ def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=None, mle=False):
310
315
  """Riemannian saddle free Newton step on each gate individually
311
316
 
312
317
  Parameters
@@ -355,8 +360,8 @@ def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=
355
360
  fixed_gates = []
356
361
 
357
362
  # compute derivatives
358
- dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK)
359
- dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK)
363
+ dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK, mle=mle)
364
+ dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK, mle=mle)
360
365
 
361
366
  # Second derivatives
362
367
  Fyconjy = dM11.reshape(d, nt, d, nt) + np.einsum("ijklmnop->ikmojlnp", dconjd).reshape((d, nt, d, nt))
@@ -408,14 +413,16 @@ def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=
408
413
 
409
414
  Delta = tangent_proj(K, Delta_K, d, rK)
410
415
 
411
- res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y), method=ls, options={"maxiter": 200})
416
+ res = minimize(
417
+ lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y, mle), method=ls, options={"maxiter": 200}
418
+ )
412
419
  a = res.x
413
420
  K_new = update_K_geodesic(K, Delta, a)
414
421
 
415
422
  return K_new
416
423
 
417
424
 
418
- def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA"):
425
+ def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", mle=False):
419
426
  """Riemannian saddle free Newton step on product manifold of all gates
420
427
 
421
428
  Parameters
@@ -459,8 +466,8 @@ def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA"):
459
466
  X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
460
467
 
461
468
  # compute derivatives
462
- dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK)
463
- dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK)
469
+ dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK, mle=mle)
470
+ dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK, mle=mle)
464
471
 
465
472
  # Second derivatives
466
473
  Fyconjy = dM11.reshape(d, nt, d, nt) + np.einsum("ijklmnop->ikmojlnp", dconjd).reshape((d, nt, d, nt))
@@ -522,13 +529,13 @@ def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA"):
522
529
 
523
530
  # Delta_K is already in tangent space but not to sufficient numerical accuracy
524
531
  Delta = tangent_proj(K, Delta_K, d, rK)
525
- res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y), method=ls, options={"maxiter": 20})
532
+ res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y, mle), method=ls, options={"maxiter": 20})
526
533
  a = res.x
527
534
  K_new = update_K_geodesic(K, Delta, a)
528
535
  return K_new
529
536
 
530
537
 
531
- def optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements):
538
+ def optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements, mle=False):
532
539
  """Full gate set optimization update alternating on E, K and rho
533
540
 
534
541
  Parameters
@@ -578,27 +585,27 @@ def optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements):
578
585
  A_new = A
579
586
  E_new = np.array([(A_new[i].T.conj() @ A_new[i]).reshape(-1) for i in range(n_povm)])
580
587
  else:
581
- A_new = A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm)
588
+ A_new = A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, mle=mle)
582
589
  E_new = np.array([(A_new[i].T.conj() @ A_new[i]).reshape(-1) for i in range(n_povm)])
583
-
584
590
  if any(((f"G%i" % i in fixed_elements) for i in range(d))):
585
591
  fixed_gates = np.array([(f"G%i" % i in fixed_elements) for i in range(d)])
586
592
  if method == "SFN":
587
- K_new = SFN_riem_Hess(K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=fixed_gates)
593
+ K_new = SFN_riem_Hess(
594
+ K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=fixed_gates, mle=mle
595
+ )
588
596
  else:
589
- K_new = gd(K, E_new, rho, y, J, d, r, rK, ls="COBYLA", fixed_gates=fixed_gates)
597
+ K_new = gd(K, E_new, rho, y, J, d, r, rK, ls="COBYLA", fixed_gates=fixed_gates, mle=mle)
590
598
  else:
591
599
  if method == "SFN":
592
- K_new = SFN_riem_Hess_full(K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA")
600
+ K_new = SFN_riem_Hess_full(K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", mle=mle)
593
601
  else:
594
602
  fixed_gates = np.array([(f"G%i" % i in fixed_elements) for i in range(d)])
595
- K_new = gd(K, E_new, rho, y, J, d, r, rK, fixed_gates=fixed_gates, ls="COBYLA")
596
-
603
+ K_new = gd(K, E_new, rho, y, J, d, r, rK, fixed_gates=fixed_gates, ls="COBYLA", mle=mle)
597
604
  if "rho" in fixed_elements:
598
605
  rho_new = rho
599
606
  B_new = B
600
607
  else:
601
- B_new = B_SFN_riem_Hess(K_new, A_new, B, y, J, d, r, n_povm, lam=1e-3)
608
+ B_new = B_SFN_riem_Hess(K_new, A_new, B, y, J, d, r, n_povm, lam=1e-3, mle=mle)
602
609
  rho_new = (B_new @ B_new.T.conj()).reshape(-1)
603
610
  X_new = np.einsum("ijkl,ijnm -> iknlm", K_new, K_new.conj()).reshape((d, r, r))
604
611
  return K_new, X_new, E_new, rho_new, A_new, B_new
@@ -609,13 +616,13 @@ def run_mGST(
609
616
  method="SFN",
610
617
  max_inits=10,
611
618
  max_iter=200,
612
- final_iter=70,
613
- target_rel_prec=1e-4,
614
- threshold_multiplier=3,
619
+ final_iter=120,
620
+ target_rel_prec=1e-5,
621
+ threshold_multiplier=5,
615
622
  fixed_elements=None,
616
623
  init=None,
617
- testing=False,
618
- ): # pylint: disable=too-many-branches
624
+ verbose_level=0,
625
+ ): # pylint: disable=too-many-branches, too-many-statements
619
626
  """Main mGST routine
620
627
 
621
628
  Parameters
@@ -653,11 +660,11 @@ def run_mGST(
653
660
 
654
661
  Returns
655
662
  -------
656
- K : numpy array
663
+ K: numpy array
657
664
  Updated estimates of Kraus operators
658
- X : numpy array
665
+ X: numpy array
659
666
  Updated estimates of superoperatos corresponding to K_new
660
- E : numpy array
667
+ E: numpy array
661
668
  Updated POVM estimate
662
669
  rho : numpy array
663
670
  Updated initial state estimate
@@ -682,7 +689,8 @@ def run_mGST(
682
689
  )
683
690
 
684
691
  success = False
685
- qcvv_logger.info(f"Starting mGST optimization...")
692
+ if verbose_level > 0:
693
+ qcvv_logger.info(f"Starting mGST optimization...")
686
694
 
687
695
  if init:
688
696
  K, E = (init[0], init[1])
@@ -698,8 +706,8 @@ def run_mGST(
698
706
  A = np.array([la.cholesky(E[k].reshape(pdim, pdim) + 1e-14 * np.eye(pdim)).T.conj() for k in range(n_povm)])
699
707
  B = la.cholesky(rho.reshape(pdim, pdim))
700
708
  res_list = [objf(X, E, rho, J, y)]
701
- with logging_redirect_tqdm(loggers=[qcvv_logger]):
702
- for _ in trange(max_iter):
709
+ with logging_redirect_tqdm(loggers=[qcvv_logger] if verbose_level > 0 else None):
710
+ for _ in trange(max_iter, disable=verbose_level == 0):
703
711
  yb, Jb = batch(y, J, bsize)
704
712
  K, X, E, rho, A, B = optimize(yb, Jb, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements)
705
713
  res_list.append(objf(X, E, rho, J, y))
@@ -707,29 +715,37 @@ def run_mGST(
707
715
  qcvv_logger.info(f"Batch optimization successful, improving estimate over full data....")
708
716
  success = True
709
717
  break
710
- if testing:
711
- plot_objf(res_list, delta, f"Objective function for batch optimization")
718
+ if verbose_level == 2:
719
+ plot_objf(res_list, f"Objective function for batch optimization", delta=delta)
712
720
  if success:
713
721
  break
714
- qcvv_logger.info(f"Run {i+1}/{max_inits} failed, trying new initialization...")
722
+ if verbose_level > 0:
723
+ qcvv_logger.info(f"Run {i+1}/{max_inits} failed, trying new initialization...")
715
724
 
716
- if not success and max_inits > 0:
725
+ if not success and init is None and verbose_level > 0:
717
726
  qcvv_logger.info(f"Success threshold not reached, attempting optimization over full data set...")
718
- with logging_redirect_tqdm(loggers=[qcvv_logger]):
719
- for _ in trange(final_iter):
720
- K, X, E, rho, A, B = optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements)
727
+ with logging_redirect_tqdm(loggers=[qcvv_logger] if verbose_level > 0 else None):
728
+ res_list_mle = []
729
+ for _ in trange(final_iter, disable=verbose_level == 0):
730
+ K, X, E, rho, A, B = optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements, mle=True)
721
731
  res_list.append(objf(X, E, rho, J, y))
722
- if len(res_list) >= 2 and np.abs(res_list[-2] - res_list[-1]) < delta * target_rel_prec:
732
+ res_list_mle.append(objf(X, E, rho, J, y, mle=True))
733
+ if (
734
+ len(res_list_mle) >= 2
735
+ and np.abs(res_list_mle[-2] - res_list_mle[-1]) < res_list_mle[-1] * target_rel_prec
736
+ ):
723
737
  break
724
- if testing:
725
- plot_objf(res_list, delta, f"Objective function over batches and full data")
726
- if success or (res_list[-1] < delta):
727
- qcvv_logger.info(f"Convergence criterion satisfied")
728
- else:
738
+ if verbose_level == 2:
739
+ plot_objf(res_list, f"Least squares error over batches and full data", delta=delta)
740
+ plot_objf(res_list_mle, f"Negative log-likelihood over full data")
741
+ if verbose_level > 0:
742
+ if success or (res_list[-1] < delta):
743
+ qcvv_logger.info(f"Convergence criterion satisfied")
744
+ else:
745
+ qcvv_logger.warning(
746
+ f"Convergence criterion not satisfied. Potential causes include too low max_iterations, bad initialization or model mismatch."
747
+ )
729
748
  qcvv_logger.info(
730
- f"Convergence criterion not satisfied, inspect results and consider increasing max_iter or using new initializations.",
749
+ f"Final objective {Decimal(res_list[-1]):.2e} in time {(time.time() - t0):.2f}s",
731
750
  )
732
- qcvv_logger.info(
733
- f"Final objective {Decimal(res_list[-1]):.2e} in time {(time.time() - t0):.2f}s",
734
- )
735
751
  return K, X, E, rho, res_list
mGST/low_level_jit.py CHANGED
@@ -104,8 +104,8 @@ def contract(X, j_vec):
104
104
  return res
105
105
 
106
106
 
107
- @njit(cache=True, fastmath=True, parallel=True)
108
- def objf(X, E, rho, J, y):
107
+ @njit(cache=True, fastmath=True) # , parallel=True)
108
+ def objf(X, E, rho, J, y, mle=False):
109
109
  """Calculate the objective function value for matrices, POVM elements, and target values.
110
110
 
111
111
  This function computes the objective function value based on input matrices X, POVM elements E,
@@ -123,6 +123,8 @@ def objf(X, E, rho, J, y):
123
123
  A 2D array representing the indices for which the objective function will be evaluated.
124
124
  y : numpy.ndarray
125
125
  A 2D array of shape (n_povm, len(J)) containing the target values.
126
+ mle : bool
127
+ If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
126
128
 
127
129
  Returns
128
130
  -------
@@ -139,8 +141,11 @@ def objf(X, E, rho, J, y):
139
141
  for ind in j[::-1]:
140
142
  state = X[ind] @ state
141
143
  for o in range(n_povm):
142
- objf_ += abs(E[o].conj() @ state - y[o, i]) ** 2
143
- return objf_ / m / n_povm
144
+ if mle:
145
+ objf_ -= np.log(abs(E[o].conj() @ state)) * y[o, i]
146
+ else:
147
+ objf_ += abs(E[o].conj() @ state - y[o, i]) ** 2 / m / n_povm
148
+ return objf_
144
149
 
145
150
 
146
151
  @njit(cache=True)
@@ -242,8 +247,8 @@ def Mp_norm_lower(X_true, E_true, rho_true, X, E, rho, J, n_povm, p):
242
247
  return dist ** (1 / p) / m / n_povm, max_dist ** (1 / p)
243
248
 
244
249
 
245
- @njit(cache=True, parallel=True)
246
- def dK(X, K, E, rho, J, y, d, r, rK):
250
+ @njit(cache=True) # , parallel=True)
251
+ def dK(X, K, E, rho, J, y, d, r, rK, mle=False):
247
252
  """Compute the derivative of the objective function with respect to the Kraus tensor K.
248
253
 
249
254
  This function calculates the derivative of the Kraus operator K, based on the
@@ -269,6 +274,8 @@ def dK(X, K, E, rho, J, y, d, r, rK):
269
274
  The rank of the problem.
270
275
  rK : int
271
276
  The number of rows in the reshaped Kraus operator K.
277
+ mle : bool
278
+ If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
272
279
 
273
280
  Returns
274
281
  -------
@@ -296,13 +303,29 @@ def dK(X, K, E, rho, J, y, d, r, rK):
296
303
  L = E[o].conj()
297
304
  for ind in j[:i]:
298
305
  L = L @ X[ind]
299
- D_ind = L @ X[k] @ R - y[o, n]
300
- dK_[k] += D_ind * K[k].conj() @ np.kron(L.reshape(pdim, pdim).T, R.reshape(pdim, pdim).T)
301
- return dK_.reshape(d, rK, pdim, pdim) * 2 / m / n_povm
302
-
303
-
304
- @njit(cache=True, parallel=False)
305
- def dK_dMdM(X, K, E, rho, J, y, d, r, rK):
306
+ if mle:
307
+ p_ind = L @ X[k] @ R
308
+ dK_[k] -= (
309
+ K[k].conj()
310
+ @ np.kron(L.reshape(pdim, pdim).T, R.reshape(pdim, pdim).T)
311
+ * y[o, n]
312
+ / p_ind
313
+ )
314
+ else:
315
+ D_ind = L @ X[k] @ R - y[o, n]
316
+ dK_[k] += (
317
+ D_ind
318
+ * K[k].conj()
319
+ @ np.kron(L.reshape(pdim, pdim).T, R.reshape(pdim, pdim).T)
320
+ * 2
321
+ / m
322
+ / n_povm
323
+ )
324
+ return dK_.reshape(d, rK, pdim, pdim)
325
+
326
+
327
+ @njit(cache=True) # , parallel=False)
328
+ def dK_dMdM(X, K, E, rho, J, y, d, r, rK, mle=False):
306
329
  """Compute the derivatives of the objective function with respect to K and the
307
330
  product of derivatives of the measurement map with respect to K.
308
331
 
@@ -329,6 +352,8 @@ def dK_dMdM(X, K, E, rho, J, y, d, r, rK):
329
352
  The number of rows for the matrix K.
330
353
  rK : int
331
354
  The number of columns for the matrix K.
355
+ mle : bool
356
+ If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
332
357
 
333
358
  Returns
334
359
  -------
@@ -346,24 +371,40 @@ def dK_dMdM(X, K, E, rho, J, y, d, r, rK):
346
371
  for n in range(m):
347
372
  j = J[n][J[n] >= 0]
348
373
  dM = np.ascontiguousarray(np.zeros((n_povm, d, rK, r)).astype(np.complex128))
349
- for i, _ in enumerate(j):
350
- k = j[i]
351
- C = contract(X, j[:i])
352
- R = contract(X, j[i + 1 :]) @ rho
353
- for o in range(n_povm):
354
- L = E[o].conj() @ C
355
- D_ind = L @ X[k] @ R - y[o, n]
374
+ p_ind_array = np.zeros(n_povm).astype(np.complex128)
375
+ for o in range(n_povm):
376
+ for i, k in enumerate(j):
377
+ R = rho.copy()
378
+ for ind in j[i + 1 :][::-1]:
379
+ R = X[ind] @ R
380
+ L = E[o].conj().copy()
381
+ for ind in j[:i]:
382
+ L = L @ X[ind]
356
383
  dM_loc = K[k].conj() @ np.kron(L.reshape((pdim, pdim)).T, R.reshape((pdim, pdim)).T)
357
- dM[o, k, :, :] += dM_loc
358
- dK_[k] += D_ind * dM_loc
384
+ p_ind = L @ X[k] @ R
385
+ if mle:
386
+ dM[o, k] += dM_loc
387
+ dK_[k] -= dM_loc * y[o, n] / p_ind
388
+ else:
389
+ dM[o, k] += dM_loc
390
+ D_ind = p_ind - y[o, n]
391
+ dK_[k] += D_ind * dM_loc * 2 / m / n_povm
392
+ if len(j) == 0:
393
+ p_ind_array[o] = E[o].conj() @ rho
394
+ else:
395
+ p_ind_array[o] = p_ind
359
396
  for o in range(n_povm):
360
- dM11 += np.kron(dM[o].conj().reshape(-1), dM[o].reshape(-1))
361
- dM10 += np.kron(dM[o].reshape(-1), dM[o].reshape(-1))
362
- return (dK_.reshape((d, rK, pdim, pdim)) * 2 / m / n_povm, 2 * dM10 / m / n_povm, 2 * dM11 / m / n_povm)
397
+ if mle:
398
+ dM11 += np.kron(dM[o].conj().reshape(-1), dM[o].reshape(-1)) * y[o, n] / p_ind_array[o] ** 2
399
+ dM10 += np.kron(dM[o].reshape(-1), dM[o].reshape(-1)) * y[o, n] / p_ind_array[o] ** 2
400
+ else:
401
+ dM11 += np.kron(dM[o].conj().reshape(-1), dM[o].reshape(-1)) * 2 / m / n_povm
402
+ dM10 += np.kron(dM[o].reshape(-1), dM[o].reshape(-1)) * 2 / m / n_povm
403
+ return (dK_.reshape((d, rK, pdim, pdim)), dM10, dM11)
363
404
 
364
405
 
365
- @njit(cache=True, parallel=False)
366
- def ddM(X, K, E, rho, J, y, d, r, rK):
406
+ @njit(cache=True) # , parallel=False)
407
+ def ddM(X, K, E, rho, J, y, d, r, rK, mle=False):
367
408
  """Compute the second derivative of the objective function with respect to the Kraus tensor K.
368
409
 
369
410
  This function calculates the second derivative of the objective function for a given
@@ -389,6 +430,8 @@ def ddM(X, K, E, rho, J, y, d, r, rK):
389
430
  Dimension of the local basis.
390
431
  rK : int
391
432
  Number of rows in the Kraus operator matrix.
433
+ mle : bool
434
+ If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
392
435
 
393
436
  Returns
394
437
  -------
@@ -421,11 +464,12 @@ def ddM(X, K, E, rho, J, y, d, r, rK):
421
464
  for o in range(n_povm):
422
465
  L = E[o].conj() @ L0
423
466
  if i1 == i2:
424
- D_ind = L @ X[k1] @ R - y[o, n]
467
+ p_ind = L @ X[k1] @ R
425
468
  elif i1 < i2:
426
- D_ind = L @ X[k1] @ C.reshape(r, r) @ X[k2] @ R - y[o, n]
469
+ p_ind = L @ X[k1] @ C.reshape(r, r) @ X[k2] @ R
427
470
  else:
428
- D_ind = L @ X[k2] @ C.reshape(r, r) @ X[k1] @ R - y[o, n]
471
+ p_ind = L @ X[k2] @ C.reshape(r, r) @ X[k1] @ R
472
+ D_ind = p_ind - y[o, n]
429
473
 
430
474
  ddK_loc = np.zeros((rK**2, r, r)).astype(np.complex128)
431
475
  dconjdK_loc = np.zeros((rK**2, r, r)).astype(np.complex128)
@@ -479,16 +523,19 @@ def ddM(X, K, E, rho, J, y, d, r, rK):
479
523
  .reshape(pdim, pdim, pdim, pdim)
480
524
  .transpose(2, 0, 1, 3)
481
525
  ).reshape(r, r)
482
-
483
- ddK[k1 * d + k2] += D_ind * ddK_loc
484
- dconjdK[k1 * d + k2] += D_ind * dconjdK_loc
526
+ if mle:
527
+ ddK[k1 * d + k2] -= ddK_loc * y[o, n] / p_ind
528
+ dconjdK[k1 * d + k2] -= dconjdK_loc * y[o, n] / p_ind
529
+ else:
530
+ ddK[k1 * d + k2] += D_ind * ddK_loc * 2 / m / n_povm
531
+ dconjdK[k1 * d + k2] += D_ind * dconjdK_loc * 2 / m / n_povm
485
532
  return (
486
- ddK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim) * 2 / m / n_povm,
487
- dconjdK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim) * 2 / m / n_povm,
533
+ ddK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim),
534
+ dconjdK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim),
488
535
  )
489
536
 
490
537
 
491
- @njit(parallel=True, cache=True)
538
+ @njit(cache=True) # , parallel=True)
492
539
  def dA(X, A, B, J, y, r, pdim, n_povm):
493
540
  """Compute the derivative of to the objective function with respect to the POVM tensor A
494
541
 
@@ -526,7 +573,8 @@ def dA(X, A, B, J, y, r, pdim, n_povm):
526
573
  rho = (B @ B.T.conj()).reshape(-1)
527
574
  dA_ = np.zeros((n_povm, pdim, pdim)).astype(np.complex128)
528
575
  m = len(J)
529
- for n in prange(m): # pylint: disable=not-an-iterable
576
+ # pylint: disable=not-an-iterable
577
+ for n in prange(m):
530
578
  j = J[n][J[n] >= 0]
531
579
  inner_deriv = contract(X, j) @ rho
532
580
  dA_step = np.zeros((n_povm, pdim, pdim)).astype(np.complex128)
@@ -537,7 +585,7 @@ def dA(X, A, B, J, y, r, pdim, n_povm):
537
585
  return dA_ * 2 / m / n_povm
538
586
 
539
587
 
540
- @njit(parallel=True, cache=True)
588
+ @njit(cache=True) # , parallel=True)
541
589
  def dB(X, A, B, J, y, pdim):
542
590
  """Compute the derivative of the objective function with respect to the state tensor B.
543
591
 
@@ -577,8 +625,8 @@ def dB(X, A, B, J, y, pdim):
577
625
  return dB_
578
626
 
579
627
 
580
- @njit(parallel=True, cache=True)
581
- def ddA_derivs(X, A, B, J, y, r, pdim, n_povm):
628
+ @njit(cache=True) # , parallel=True)
629
+ def ddA_derivs(X, A, B, J, y, r, pdim, n_povm, mle=False):
582
630
  """Calculate all nonzero terms of the second derivatives with respect to the POVM tensor A.
583
631
 
584
632
  Parameters
@@ -599,6 +647,8 @@ def ddA_derivs(X, A, B, J, y, r, pdim, n_povm):
599
647
  The dimension of the input matrices A and B.
600
648
  n_povm : int
601
649
  The number of POVM elements.
650
+ mle : bool
651
+ If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
602
652
 
603
653
  Returns
604
654
  -------
@@ -629,21 +679,32 @@ def ddA_derivs(X, A, B, J, y, r, pdim, n_povm):
629
679
  dMconjdM_step = np.zeros((n_povm, r, r)).astype(np.complex128)
630
680
  dconjdA_step = np.zeros((n_povm, r, r)).astype(np.complex128)
631
681
  for o in range(n_povm):
632
- D_ind = E[o].conj() @ R - y[o, n]
633
682
  dM = A[o].conj() @ R.reshape(pdim, pdim).T
634
- dMdM_step[o] += np.outer(dM, dM)
635
- dMconjdM_step[o] += np.outer(dM.conj(), dM)
636
- dA_step[o] += D_ind * dM
637
- dconjdA_step[o] += D_ind * np.kron(np.eye(pdim).astype(np.complex128), R.reshape(pdim, pdim).T)
683
+ if mle:
684
+ p_ind = E[o].conj() @ R
685
+ dMdM_step[o] += np.outer(dM, dM) * y[o, n] / p_ind**2
686
+ dMconjdM_step[o] += np.outer(dM.conj(), dM) * y[o, n] / p_ind**2
687
+ dA_step[o] -= dM * y[o, n] / p_ind
688
+ dconjdA_step[o] -= (
689
+ np.kron(np.eye(pdim).astype(np.complex128), R.reshape(pdim, pdim).T) * y[o, n] / p_ind
690
+ )
691
+ else:
692
+ D_ind = E[o].conj() @ R - y[o, n]
693
+ dMdM_step[o] += np.outer(dM, dM) * 2 / m / n_povm
694
+ dMconjdM_step[o] += np.outer(dM.conj(), dM) * 2 / m / n_povm
695
+ dA_step[o] += D_ind * dM * 2 / m / n_povm
696
+ dconjdA_step[o] += (
697
+ D_ind * np.kron(np.eye(pdim).astype(np.complex128), R.reshape(pdim, pdim).T) * 2 / m / n_povm
698
+ )
638
699
  dA_ += dA_step
639
700
  dMdM += dMdM_step
640
701
  dMconjdM += dMconjdM_step
641
702
  dconjdA += dconjdA_step
642
- return dA_ * 2 / m / n_povm, dMdM * 2 / m / n_povm, dMconjdM * 2 / m / n_povm, dconjdA * 2 / m / n_povm
703
+ return dA_, dMdM, dMconjdM, dconjdA
643
704
 
644
705
 
645
- @njit(parallel=True, cache=True)
646
- def ddB_derivs(X, A, B, J, y, r, pdim):
706
+ @njit(cache=True) # , parallel=True)
707
+ def ddB_derivs(X, A, B, J, y, r, pdim, mle=False):
647
708
  """Calculate all nonzero terms of the second derivative with respect to the state tensor B.
648
709
 
649
710
  Parameters
@@ -690,12 +751,17 @@ def ddB_derivs(X, A, B, J, y, r, pdim):
690
751
  C = contract(X, j)
691
752
  for o in range(n_povm):
692
753
  L = E[o].conj() @ C
693
- D_ind = L @ rho - y[o, n]
694
-
695
754
  dM = L.reshape(pdim, pdim) @ B.conj()
696
- dMdM += np.outer(dM, dM)
697
- dMconjdM += np.outer(dM.conj(), dM)
698
-
699
- dB_ += D_ind * dM
700
- dconjdB += D_ind * np.kron(L.reshape(pdim, pdim), np.eye(pdim).astype(np.complex128))
701
- return dB_ * 2 / m / n_povm, dMdM * 2 / m / n_povm, dMconjdM * 2 / m / n_povm, dconjdB.T * 2 / m / n_povm
755
+ if mle:
756
+ p_ind = L @ rho
757
+ dMdM += np.outer(dM, dM) * y[o, n] / p_ind**2
758
+ dMconjdM += np.outer(dM.conj(), dM) * y[o, n] / p_ind**2
759
+ dB_ -= dM * y[o, n] / p_ind
760
+ dconjdB -= np.kron(L.reshape(pdim, pdim), np.eye(pdim).astype(np.complex128)) * y[o, n] / p_ind
761
+ else:
762
+ D_ind = L @ rho - y[o, n]
763
+ dMdM += np.outer(dM, dM) * 2 / m / n_povm
764
+ dMconjdM += np.outer(dM.conj(), dM) * 2 / m / n_povm
765
+ dB_ += D_ind * dM * 2 / m / n_povm
766
+ dconjdB += D_ind * np.kron(L.reshape(pdim, pdim), np.eye(pdim).astype(np.complex128)) * 2 / m / n_povm
767
+ return dB_, dMdM, dMconjdM, dconjdB.T