PyPI - iqm-benchmarks - Versions diffs - 2.43__py3-none-any.whl → 2.45__py3-none-any.whl - Mend

iqm-benchmarks 2.43py3-none-any.whl → 2.45py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of iqm-benchmarks might be problematic. Click here for more details.

Files changed (22) hide show

iqm/benchmarks/benchmark_definition.py +1 -0
iqm/benchmarks/compressive_gst/compressive_gst.py +100 -46
iqm/benchmarks/compressive_gst/gst_analysis.py +480 -381
iqm/benchmarks/entanglement/ghz.py +5 -5
iqm/benchmarks/entanglement/graph_states.py +21 -21
iqm/benchmarks/optimization/qscore.py +2 -2
iqm/benchmarks/quantum_volume/clops.py +2 -3
iqm/benchmarks/randomized_benchmarking/eplg/eplg.py +23 -24
iqm/benchmarks/utils.py +72 -8
iqm/benchmarks/utils_plots.py +309 -65
{iqm_benchmarks-2.43.dist-info → iqm_benchmarks-2.45.dist-info}/METADATA +2 -2
{iqm_benchmarks-2.43.dist-info → iqm_benchmarks-2.45.dist-info}/RECORD +22 -22
mGST/additional_fns.py +35 -20
mGST/algorithm.py +73 -57
mGST/low_level_jit.py +122 -56
mGST/optimization.py +12 -6
mGST/qiskit_interface.py +64 -87
mGST/reporting/figure_gen.py +390 -57
mGST/reporting/reporting.py +209 -11
{iqm_benchmarks-2.43.dist-info → iqm_benchmarks-2.45.dist-info}/WHEEL +0 -0
{iqm_benchmarks-2.43.dist-info → iqm_benchmarks-2.45.dist-info}/licenses/LICENSE +0 -0
{iqm_benchmarks-2.43.dist-info → iqm_benchmarks-2.45.dist-info}/top_level.txt +0 -0

mGST/algorithm.py CHANGED Viewed

@@ -28,7 +28,7 @@ from mGST.optimization import (
 from mGST.reporting.figure_gen import plot_objf
-def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
+def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3, mle=False):
     """Riemannian saddle free Newton step on the POVM parametrization
     Parameters
@@ -73,7 +73,7 @@ def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
     Fyy = np.zeros((n_povm, r, n_povm, r)).astype(np.complex128)
     X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
-    dA_, dMdM, dMconjdM, dconjdA = ddA_derivs(X, A, B, J, y, r, pdim, n_povm)
+    dA_, dMdM, dMconjdM, dconjdA = ddA_derivs(X, A, B, J, y, r, pdim, n_povm, mle=mle)
     # Second derivatives
     for i in range(n_povm):
@@ -133,12 +133,12 @@ def A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
     Delta = tangent_proj(A, Delta_A, 1, n_povm)[0]
-    a = minimize(lineobjf_A_geodesic, 1e-9, args=(Delta, X, A, rho, J, y), method="COBYLA").x
+    a = minimize(lineobjf_A_geodesic, 1e-9, args=(Delta, X, A, rho, J, y, mle), method="COBYLA").x
     A_new = update_A_geodesic(A, Delta, a)
     return A_new
-def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
+def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3, mle=False):
     """Riemannian saddle free Newton step on the initial state parametrization
     Parameters
@@ -180,11 +180,9 @@ def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
     E = np.array([(A[i].T.conj() @ A[i]).reshape(-1) for i in range(n_povm)])
     H = np.zeros((2, nt, 2, nt)).astype(np.complex128)
     P_T = np.zeros((2, nt, 2, nt)).astype(np.complex128)
-    Fyconjy = np.zeros((r, r)).astype(np.complex128)
-    Fyy = np.zeros((r, r)).astype(np.complex128)
     X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
-    dB_, dMdM, dMconjdM, dconjdB = ddB_derivs(X, A, B, J, y, r, pdim)
+    dB_, dMdM, dMconjdM, dconjdB = ddB_derivs(X, A, B, J, y, r, pdim, mle=mle)
     # Second derivatives
     Fyconjy = dMconjdM + dconjdB
@@ -241,14 +239,15 @@ def B_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, lam=1e-3):
     Delta = (H_abs_inv @ G)[:nt]
     # Projection onto tangent space
     Delta = Delta - Y * (Y.T.conj() @ Delta + Delta.T.conj() @ Y) / 2
-    res = minimize(lineobjf_B_geodesic, 1e-9, args=(Delta, X, E, B, J, y), method="COBYLA", options={"maxiter": 20})
+    res = minimize(
+        lineobjf_B_geodesic, 1e-9, args=(Delta, X, E, B, J, y, mle), method="COBYLA", options={"maxiter": 20}
+    )
     a = res.x
     B_new = update_B_geodesic(B, Delta, a)
     return B_new
-def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA"):
+def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA", mle=False):
     """Do Riemannian gradient descent optimization step on gates
     Parameters
@@ -291,7 +290,7 @@ def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA"):
     Delta = np.zeros((d, n, pdim)).astype(np.complex128)
     X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
-    dK_ = dK(X, K, E, rho, J, y, d, r, rK)
+    dK_ = dK(X, K, E, rho, J, y, d, r, rK, mle=mle)
     for k in np.where(~fixed_gates)[0]:
         # derivative
         Fy = dK_[k].reshape(n, pdim)
@@ -299,14 +298,20 @@ def gd(K, E, rho, y, J, d, r, rK, fixed_gates, ls="COBYLA"):
         # Riem. gradient taken from conjugate derivative
         rGrad = 2 * (Fy.conj() - Y @ Fy.T @ Y)
         Delta[k] = rGrad
-    res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y), method=ls, options={"maxiter": 200})
+    # Additional projection onto tangent space to avoid numerical instability
+    Delta = tangent_proj(K, Delta, d, rK)
+    res = minimize(
+        lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y, mle), method=ls, options={"maxiter": 200}
+    )
     a = res.x
     K_new = update_K_geodesic(K, Delta, a)
     return K_new
-def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=None):
+def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=None, mle=False):
     """Riemannian saddle free Newton step on each gate individually
     Parameters
@@ -355,8 +360,8 @@ def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=
         fixed_gates = []
     # compute derivatives
-    dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK)
-    dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK)
+    dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK, mle=mle)
+    dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK, mle=mle)
     # Second derivatives
     Fyconjy = dM11.reshape(d, nt, d, nt) + np.einsum("ijklmnop->ikmojlnp", dconjd).reshape((d, nt, d, nt))
@@ -408,14 +413,16 @@ def SFN_riem_Hess(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=
     Delta = tangent_proj(K, Delta_K, d, rK)
-    res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y), method=ls, options={"maxiter": 200})
+    res = minimize(
+        lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y, mle), method=ls, options={"maxiter": 200}
+    )
     a = res.x
     K_new = update_K_geodesic(K, Delta, a)
     return K_new
-def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA"):
+def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", mle=False):
     """Riemannian saddle free Newton step on product manifold of all gates
     Parameters
@@ -459,8 +466,8 @@ def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA"):
     X = np.einsum("ijkl,ijnm -> iknlm", K, K.conj()).reshape((d, r, r))
     # compute derivatives
-    dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK)
-    dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK)
+    dK_, dM10, dM11 = dK_dMdM(X, K, E, rho, J, y, d, r, rK, mle=mle)
+    dd, dconjd = ddM(X, K, E, rho, J, y, d, r, rK, mle=mle)
     # Second derivatives
     Fyconjy = dM11.reshape(d, nt, d, nt) + np.einsum("ijklmnop->ikmojlnp", dconjd).reshape((d, nt, d, nt))
@@ -522,13 +529,13 @@ def SFN_riem_Hess_full(K, E, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA"):
     # Delta_K is already in tangent space but not to sufficient numerical accuracy
     Delta = tangent_proj(K, Delta_K, d, rK)
-    res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y), method=ls, options={"maxiter": 20})
+    res = minimize(lineobjf_isom_geodesic, 1e-8, args=(Delta, K, E, rho, J, y, mle), method=ls, options={"maxiter": 20})
     a = res.x
     K_new = update_K_geodesic(K, Delta, a)
     return K_new
-def optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements):
+def optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements, mle=False):
     """Full gate set optimization update alternating on E, K and rho
     Parameters
@@ -578,27 +585,27 @@ def optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements):
         A_new = A
         E_new = np.array([(A_new[i].T.conj() @ A_new[i]).reshape(-1) for i in range(n_povm)])
     else:
-        A_new = A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm)
+        A_new = A_SFN_riem_Hess(K, A, B, y, J, d, r, n_povm, mle=mle)
         E_new = np.array([(A_new[i].T.conj() @ A_new[i]).reshape(-1) for i in range(n_povm)])
     if any(((f"G%i" % i in fixed_elements) for i in range(d))):
         fixed_gates = np.array([(f"G%i" % i in fixed_elements) for i in range(d)])
         if method == "SFN":
-            K_new = SFN_riem_Hess(K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=fixed_gates)
+            K_new = SFN_riem_Hess(
+                K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", fixed_gates=fixed_gates, mle=mle
+            )
         else:
-            K_new = gd(K, E_new, rho, y, J, d, r, rK, ls="COBYLA", fixed_gates=fixed_gates)
+            K_new = gd(K, E_new, rho, y, J, d, r, rK, ls="COBYLA", fixed_gates=fixed_gates, mle=mle)
     else:
         if method == "SFN":
-            K_new = SFN_riem_Hess_full(K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA")
+            K_new = SFN_riem_Hess_full(K, E_new, rho, y, J, d, r, rK, lam=1e-3, ls="COBYLA", mle=mle)
         else:
             fixed_gates = np.array([(f"G%i" % i in fixed_elements) for i in range(d)])
-            K_new = gd(K, E_new, rho, y, J, d, r, rK, fixed_gates=fixed_gates, ls="COBYLA")
+            K_new = gd(K, E_new, rho, y, J, d, r, rK, fixed_gates=fixed_gates, ls="COBYLA", mle=mle)
     if "rho" in fixed_elements:
         rho_new = rho
         B_new = B
     else:
-        B_new = B_SFN_riem_Hess(K_new, A_new, B, y, J, d, r, n_povm, lam=1e-3)
+        B_new = B_SFN_riem_Hess(K_new, A_new, B, y, J, d, r, n_povm, lam=1e-3, mle=mle)
         rho_new = (B_new @ B_new.T.conj()).reshape(-1)
     X_new = np.einsum("ijkl,ijnm -> iknlm", K_new, K_new.conj()).reshape((d, r, r))
     return K_new, X_new, E_new, rho_new, A_new, B_new
@@ -609,13 +616,13 @@ def run_mGST(
     method="SFN",
     max_inits=10,
     max_iter=200,
-    final_iter=70,
-    target_rel_prec=1e-4,
-    threshold_multiplier=3,
+    final_iter=120,
+    target_rel_prec=1e-5,
+    threshold_multiplier=5,
     fixed_elements=None,
     init=None,
-    testing=False,
-):  # pylint: disable=too-many-branches
+    verbose_level=0,
+):  # pylint: disable=too-many-branches, too-many-statements
     """Main mGST routine
     Parameters
@@ -653,11 +660,11 @@ def run_mGST(
     Returns
     -------
-    K : numpy array
+    K: numpy array
         Updated estimates of Kraus operators
-    X : numpy array
+    X: numpy array
         Updated estimates of superoperatos corresponding to K_new
-    E : numpy array
+    E: numpy array
         Updated POVM estimate
     rho : numpy array
         Updated initial state estimate
@@ -682,7 +689,8 @@ def run_mGST(
         )
     success = False
-    qcvv_logger.info(f"Starting mGST optimization...")
+    if verbose_level > 0:
+        qcvv_logger.info(f"Starting mGST optimization...")
     if init:
         K, E = (init[0], init[1])
@@ -698,8 +706,8 @@ def run_mGST(
             A = np.array([la.cholesky(E[k].reshape(pdim, pdim) + 1e-14 * np.eye(pdim)).T.conj() for k in range(n_povm)])
             B = la.cholesky(rho.reshape(pdim, pdim))
             res_list = [objf(X, E, rho, J, y)]
-            with logging_redirect_tqdm(loggers=[qcvv_logger]):
-                for _ in trange(max_iter):
+            with logging_redirect_tqdm(loggers=[qcvv_logger] if verbose_level > 0 else None):
+                for _ in trange(max_iter, disable=verbose_level == 0):
                     yb, Jb = batch(y, J, bsize)
                     K, X, E, rho, A, B = optimize(yb, Jb, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements)
                     res_list.append(objf(X, E, rho, J, y))
@@ -707,29 +715,37 @@ def run_mGST(
                         qcvv_logger.info(f"Batch optimization successful, improving estimate over full data....")
                         success = True
                         break
-            if testing:
-                plot_objf(res_list, delta, f"Objective function for batch optimization")
+            if verbose_level == 2:
+                plot_objf(res_list, f"Objective function for batch optimization", delta=delta)
             if success:
                 break
-            qcvv_logger.info(f"Run {i+1}/{max_inits} failed, trying new initialization...")
+            if verbose_level > 0:
+                qcvv_logger.info(f"Run {i+1}/{max_inits} failed, trying new initialization...")
-    if not success and max_inits > 0:
+    if not success and init is None and verbose_level > 0:
         qcvv_logger.info(f"Success threshold not reached, attempting optimization over full data set...")
-    with logging_redirect_tqdm(loggers=[qcvv_logger]):
-        for _ in trange(final_iter):
-            K, X, E, rho, A, B = optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements)
+    with logging_redirect_tqdm(loggers=[qcvv_logger] if verbose_level > 0 else None):
+        res_list_mle = []
+        for _ in trange(final_iter, disable=verbose_level == 0):
+            K, X, E, rho, A, B = optimize(y, J, d, r, rK, n_povm, method, K, rho, A, B, fixed_elements, mle=True)
             res_list.append(objf(X, E, rho, J, y))
-            if len(res_list) >= 2 and np.abs(res_list[-2] - res_list[-1]) < delta * target_rel_prec:
+            res_list_mle.append(objf(X, E, rho, J, y, mle=True))
+            if (
+                len(res_list_mle) >= 2
+                and np.abs(res_list_mle[-2] - res_list_mle[-1]) < res_list_mle[-1] * target_rel_prec
+            ):
                 break
-    if testing:
-        plot_objf(res_list, delta, f"Objective function over batches and full data")
-    if success or (res_list[-1] < delta):
-        qcvv_logger.info(f"Convergence criterion satisfied")
-    else:
+    if verbose_level == 2:
+        plot_objf(res_list, f"Least squares error over batches and full data", delta=delta)
+        plot_objf(res_list_mle, f"Negative log-likelihood over full data")
+    if verbose_level > 0:
+        if success or (res_list[-1] < delta):
+            qcvv_logger.info(f"Convergence criterion satisfied")
+        else:
+            qcvv_logger.warning(
+                f"Convergence criterion not satisfied. Potential causes include too low max_iterations, bad initialization or model mismatch."
+            )
         qcvv_logger.info(
-            f"Convergence criterion not satisfied, inspect results and consider increasing max_iter or using new initializations.",
+            f"Final objective {Decimal(res_list[-1]):.2e} in time {(time.time() - t0):.2f}s",
         )
-    qcvv_logger.info(
-        f"Final objective {Decimal(res_list[-1]):.2e} in time {(time.time() - t0):.2f}s",
-    )
     return K, X, E, rho, res_list

mGST/low_level_jit.py CHANGED Viewed

@@ -104,8 +104,8 @@ def contract(X, j_vec):
     return res
-@njit(cache=True, fastmath=True, parallel=True)
-def objf(X, E, rho, J, y):
+@njit(cache=True, fastmath=True)  # , parallel=True)
+def objf(X, E, rho, J, y, mle=False):
     """Calculate the objective function value for matrices, POVM elements, and target values.
     This function computes the objective function value based on input matrices X, POVM elements E,
@@ -123,6 +123,8 @@ def objf(X, E, rho, J, y):
         A 2D array representing the indices for which the objective function will be evaluated.
     y : numpy.ndarray
         A 2D array of shape (n_povm, len(J)) containing the target values.
+    mle : bool
+        If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
     Returns
     -------
@@ -139,8 +141,11 @@ def objf(X, E, rho, J, y):
         for ind in j[::-1]:
             state = X[ind] @ state
         for o in range(n_povm):
-            objf_ += abs(E[o].conj() @ state - y[o, i]) ** 2
-    return objf_ / m / n_povm
+            if mle:
+                objf_ -= np.log(abs(E[o].conj() @ state)) * y[o, i]
+            else:
+                objf_ += abs(E[o].conj() @ state - y[o, i]) ** 2 / m / n_povm
+    return objf_
 @njit(cache=True)
@@ -242,8 +247,8 @@ def Mp_norm_lower(X_true, E_true, rho_true, X, E, rho, J, n_povm, p):
     return dist ** (1 / p) / m / n_povm, max_dist ** (1 / p)
-@njit(cache=True, parallel=True)
-def dK(X, K, E, rho, J, y, d, r, rK):
+@njit(cache=True)  # , parallel=True)
+def dK(X, K, E, rho, J, y, d, r, rK, mle=False):
     """Compute the derivative of the objective function with respect to the Kraus tensor K.
     This function calculates the derivative of the Kraus operator K, based on the
@@ -269,6 +274,8 @@ def dK(X, K, E, rho, J, y, d, r, rK):
         The rank of the problem.
     rK : int
         The number of rows in the reshaped Kraus operator K.
+    mle : bool
+        If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
     Returns
     -------
@@ -296,13 +303,29 @@ def dK(X, K, E, rho, J, y, d, r, rK):
                         L = E[o].conj()
                         for ind in j[:i]:
                             L = L @ X[ind]
-                        D_ind = L @ X[k] @ R - y[o, n]
-                        dK_[k] += D_ind * K[k].conj() @ np.kron(L.reshape(pdim, pdim).T, R.reshape(pdim, pdim).T)
-    return dK_.reshape(d, rK, pdim, pdim) * 2 / m / n_povm
-@njit(cache=True, parallel=False)
-def dK_dMdM(X, K, E, rho, J, y, d, r, rK):
+                        if mle:
+                            p_ind = L @ X[k] @ R
+                            dK_[k] -= (
+                                K[k].conj()
+                                @ np.kron(L.reshape(pdim, pdim).T, R.reshape(pdim, pdim).T)
+                                * y[o, n]
+                                / p_ind
+                            )
+                        else:
+                            D_ind = L @ X[k] @ R - y[o, n]
+                            dK_[k] += (
+                                D_ind
+                                * K[k].conj()
+                                @ np.kron(L.reshape(pdim, pdim).T, R.reshape(pdim, pdim).T)
+                                * 2
+                                / m
+                                / n_povm
+                            )
+    return dK_.reshape(d, rK, pdim, pdim)
+@njit(cache=True)  # , parallel=False)
+def dK_dMdM(X, K, E, rho, J, y, d, r, rK, mle=False):
     """Compute the derivatives of the objective function with respect to K and the
     product of derivatives of the measurement map with respect to K.
@@ -329,6 +352,8 @@ def dK_dMdM(X, K, E, rho, J, y, d, r, rK):
         The number of rows for the matrix K.
     rK : int
         The number of columns for the matrix K.
+    mle : bool
+        If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
     Returns
     -------
@@ -346,24 +371,40 @@ def dK_dMdM(X, K, E, rho, J, y, d, r, rK):
     for n in range(m):
         j = J[n][J[n] >= 0]
         dM = np.ascontiguousarray(np.zeros((n_povm, d, rK, r)).astype(np.complex128))
-        for i, _ in enumerate(j):
-            k = j[i]
-            C = contract(X, j[:i])
-            R = contract(X, j[i + 1 :]) @ rho
-            for o in range(n_povm):
-                L = E[o].conj() @ C
-                D_ind = L @ X[k] @ R - y[o, n]
+        p_ind_array = np.zeros(n_povm).astype(np.complex128)
+        for o in range(n_povm):
+            for i, k in enumerate(j):
+                R = rho.copy()
+                for ind in j[i + 1 :][::-1]:
+                    R = X[ind] @ R
+                L = E[o].conj().copy()
+                for ind in j[:i]:
+                    L = L @ X[ind]
                 dM_loc = K[k].conj() @ np.kron(L.reshape((pdim, pdim)).T, R.reshape((pdim, pdim)).T)
-                dM[o, k, :, :] += dM_loc
-                dK_[k] += D_ind * dM_loc
+                p_ind = L @ X[k] @ R
+                if mle:
+                    dM[o, k] += dM_loc
+                    dK_[k] -= dM_loc * y[o, n] / p_ind
+                else:
+                    dM[o, k] += dM_loc
+                    D_ind = p_ind - y[o, n]
+                    dK_[k] += D_ind * dM_loc * 2 / m / n_povm
+            if len(j) == 0:
+                p_ind_array[o] = E[o].conj() @ rho
+            else:
+                p_ind_array[o] = p_ind
         for o in range(n_povm):
-            dM11 += np.kron(dM[o].conj().reshape(-1), dM[o].reshape(-1))
-            dM10 += np.kron(dM[o].reshape(-1), dM[o].reshape(-1))
-    return (dK_.reshape((d, rK, pdim, pdim)) * 2 / m / n_povm, 2 * dM10 / m / n_povm, 2 * dM11 / m / n_povm)
+            if mle:
+                dM11 += np.kron(dM[o].conj().reshape(-1), dM[o].reshape(-1)) * y[o, n] / p_ind_array[o] ** 2
+                dM10 += np.kron(dM[o].reshape(-1), dM[o].reshape(-1)) * y[o, n] / p_ind_array[o] ** 2
+            else:
+                dM11 += np.kron(dM[o].conj().reshape(-1), dM[o].reshape(-1)) * 2 / m / n_povm
+                dM10 += np.kron(dM[o].reshape(-1), dM[o].reshape(-1)) * 2 / m / n_povm
+    return (dK_.reshape((d, rK, pdim, pdim)), dM10, dM11)
-@njit(cache=True, parallel=False)
-def ddM(X, K, E, rho, J, y, d, r, rK):
+@njit(cache=True)  # , parallel=False)
+def ddM(X, K, E, rho, J, y, d, r, rK, mle=False):
     """Compute the second derivative of the objective function with respect to the Kraus tensor K.
     This function calculates the second derivative of the objective function for a given
@@ -389,6 +430,8 @@ def ddM(X, K, E, rho, J, y, d, r, rK):
         Dimension of the local basis.
     rK : int
         Number of rows in the Kraus operator matrix.
+    mle : bool
+        If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
     Returns
     -------
@@ -421,11 +464,12 @@ def ddM(X, K, E, rho, J, y, d, r, rK):
                             for o in range(n_povm):
                                 L = E[o].conj() @ L0
                                 if i1 == i2:
-                                    D_ind = L @ X[k1] @ R - y[o, n]
+                                    p_ind = L @ X[k1] @ R
                                 elif i1 < i2:
-                                    D_ind = L @ X[k1] @ C.reshape(r, r) @ X[k2] @ R - y[o, n]
+                                    p_ind = L @ X[k1] @ C.reshape(r, r) @ X[k2] @ R
                                 else:
-                                    D_ind = L @ X[k2] @ C.reshape(r, r) @ X[k1] @ R - y[o, n]
+                                    p_ind = L @ X[k2] @ C.reshape(r, r) @ X[k1] @ R
+                                D_ind = p_ind - y[o, n]
                                 ddK_loc = np.zeros((rK**2, r, r)).astype(np.complex128)
                                 dconjdK_loc = np.zeros((rK**2, r, r)).astype(np.complex128)
@@ -479,16 +523,19 @@ def ddM(X, K, E, rho, J, y, d, r, rK):
                                                 .reshape(pdim, pdim, pdim, pdim)
                                                 .transpose(2, 0, 1, 3)
                                             ).reshape(r, r)
-                                ddK[k1 * d + k2] += D_ind * ddK_loc
-                                dconjdK[k1 * d + k2] += D_ind * dconjdK_loc
+                                if mle:
+                                    ddK[k1 * d + k2] -= ddK_loc * y[o, n] / p_ind
+                                    dconjdK[k1 * d + k2] -= dconjdK_loc * y[o, n] / p_ind
+                                else:
+                                    ddK[k1 * d + k2] += D_ind * ddK_loc * 2 / m / n_povm
+                                    dconjdK[k1 * d + k2] += D_ind * dconjdK_loc * 2 / m / n_povm
     return (
-        ddK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim) * 2 / m / n_povm,
-        dconjdK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim) * 2 / m / n_povm,
+        ddK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim),
+        dconjdK.reshape(d, d, rK, rK, pdim, pdim, pdim, pdim),
     )
-@njit(parallel=True, cache=True)
+@njit(cache=True)  # , parallel=True)
 def dA(X, A, B, J, y, r, pdim, n_povm):
     """Compute the derivative of to the objective function with respect to the POVM tensor A
@@ -526,7 +573,8 @@ def dA(X, A, B, J, y, r, pdim, n_povm):
     rho = (B @ B.T.conj()).reshape(-1)
     dA_ = np.zeros((n_povm, pdim, pdim)).astype(np.complex128)
     m = len(J)
-    for n in prange(m):  # pylint: disable=not-an-iterable
+    # pylint: disable=not-an-iterable
+    for n in prange(m):
         j = J[n][J[n] >= 0]
         inner_deriv = contract(X, j) @ rho
         dA_step = np.zeros((n_povm, pdim, pdim)).astype(np.complex128)
@@ -537,7 +585,7 @@ def dA(X, A, B, J, y, r, pdim, n_povm):
     return dA_ * 2 / m / n_povm
-@njit(parallel=True, cache=True)
+@njit(cache=True)  # , parallel=True)
 def dB(X, A, B, J, y, pdim):
     """Compute the derivative of the objective function with respect to the state tensor B.
@@ -577,8 +625,8 @@ def dB(X, A, B, J, y, pdim):
     return dB_
-@njit(parallel=True, cache=True)
-def ddA_derivs(X, A, B, J, y, r, pdim, n_povm):
+@njit(cache=True)  # , parallel=True)
+def ddA_derivs(X, A, B, J, y, r, pdim, n_povm, mle=False):
     """Calculate all nonzero terms of the second derivatives with respect to the POVM tensor A.
     Parameters
@@ -599,6 +647,8 @@ def ddA_derivs(X, A, B, J, y, r, pdim, n_povm):
         The dimension of the input matrices A and B.
     n_povm : int
         The number of POVM elements.
+    mle : bool
+        If True, the log-likelihood objective function is used, otherwise the least squares objective function is used
     Returns
     -------
@@ -629,21 +679,32 @@ def ddA_derivs(X, A, B, J, y, r, pdim, n_povm):
         dMconjdM_step = np.zeros((n_povm, r, r)).astype(np.complex128)
         dconjdA_step = np.zeros((n_povm, r, r)).astype(np.complex128)
         for o in range(n_povm):
-            D_ind = E[o].conj() @ R - y[o, n]
             dM = A[o].conj() @ R.reshape(pdim, pdim).T
-            dMdM_step[o] += np.outer(dM, dM)
-            dMconjdM_step[o] += np.outer(dM.conj(), dM)
-            dA_step[o] += D_ind * dM
-            dconjdA_step[o] += D_ind * np.kron(np.eye(pdim).astype(np.complex128), R.reshape(pdim, pdim).T)
+            if mle:
+                p_ind = E[o].conj() @ R
+                dMdM_step[o] += np.outer(dM, dM) * y[o, n] / p_ind**2
+                dMconjdM_step[o] += np.outer(dM.conj(), dM) * y[o, n] / p_ind**2
+                dA_step[o] -= dM * y[o, n] / p_ind
+                dconjdA_step[o] -= (
+                    np.kron(np.eye(pdim).astype(np.complex128), R.reshape(pdim, pdim).T) * y[o, n] / p_ind
+                )
+            else:
+                D_ind = E[o].conj() @ R - y[o, n]
+                dMdM_step[o] += np.outer(dM, dM) * 2 / m / n_povm
+                dMconjdM_step[o] += np.outer(dM.conj(), dM) * 2 / m / n_povm
+                dA_step[o] += D_ind * dM * 2 / m / n_povm
+                dconjdA_step[o] += (
+                    D_ind * np.kron(np.eye(pdim).astype(np.complex128), R.reshape(pdim, pdim).T) * 2 / m / n_povm
+                )
         dA_ += dA_step
         dMdM += dMdM_step
         dMconjdM += dMconjdM_step
         dconjdA += dconjdA_step
-    return dA_ * 2 / m / n_povm, dMdM * 2 / m / n_povm, dMconjdM * 2 / m / n_povm, dconjdA * 2 / m / n_povm
+    return dA_, dMdM, dMconjdM, dconjdA
-@njit(parallel=True, cache=True)
-def ddB_derivs(X, A, B, J, y, r, pdim):
+@njit(cache=True)  # , parallel=True)
+def ddB_derivs(X, A, B, J, y, r, pdim, mle=False):
     """Calculate all nonzero terms of the second derivative with respect to the state tensor B.
     Parameters
@@ -690,12 +751,17 @@ def ddB_derivs(X, A, B, J, y, r, pdim):
         C = contract(X, j)
         for o in range(n_povm):
             L = E[o].conj() @ C
-            D_ind = L @ rho - y[o, n]
             dM = L.reshape(pdim, pdim) @ B.conj()
-            dMdM += np.outer(dM, dM)
-            dMconjdM += np.outer(dM.conj(), dM)
-            dB_ += D_ind * dM
-            dconjdB += D_ind * np.kron(L.reshape(pdim, pdim), np.eye(pdim).astype(np.complex128))
-    return dB_ * 2 / m / n_povm, dMdM * 2 / m / n_povm, dMconjdM * 2 / m / n_povm, dconjdB.T * 2 / m / n_povm
+            if mle:
+                p_ind = L @ rho
+                dMdM += np.outer(dM, dM) * y[o, n] / p_ind**2
+                dMconjdM += np.outer(dM.conj(), dM) * y[o, n] / p_ind**2
+                dB_ -= dM * y[o, n] / p_ind
+                dconjdB -= np.kron(L.reshape(pdim, pdim), np.eye(pdim).astype(np.complex128)) * y[o, n] / p_ind
+            else:
+                D_ind = L @ rho - y[o, n]
+                dMdM += np.outer(dM, dM) * 2 / m / n_povm
+                dMconjdM += np.outer(dM.conj(), dM) * 2 / m / n_povm
+                dB_ += D_ind * dM * 2 / m / n_povm
+                dconjdB += D_ind * np.kron(L.reshape(pdim, pdim), np.eye(pdim).astype(np.complex128)) * 2 / m / n_povm
+    return dB_, dMdM, dMconjdM, dconjdB.T

iqm-benchmarks 2.43__py3-none-any.whl → 2.45__py3-none-any.whl

Potentially problematic release.

iqm-benchmarks 2.43py3-none-any.whl → 2.45py3-none-any.whl