PyPI - lifejacket - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

lifejacket 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

lifejacket/calculate_derivatives.py +0 -2
lifejacket/constants.py +4 -16
lifejacket/deployment_conditioning_monitor.py +19 -12
lifejacket/form_adjusted_meat_adjustments_directly.py +25 -27
lifejacket/get_datum_for_blowup_supervised_learning.py +71 -77
lifejacket/helper_functions.py +15 -148
lifejacket/input_checks.py +49 -50
lifejacket/{after_study_analysis.py → post_deployment_analysis.py} +377 -144
lifejacket/small_sample_corrections.py +11 -13
{lifejacket-1.0.0.dist-info → lifejacket-1.1.0.dist-info}/METADATA +1 -1
lifejacket-1.1.0.dist-info/RECORD +17 -0
{lifejacket-1.0.0.dist-info → lifejacket-1.1.0.dist-info}/WHEEL +1 -1
lifejacket-1.1.0.dist-info/entry_points.txt +2 -0
lifejacket-1.0.0.dist-info/RECORD +0 -17
lifejacket-1.0.0.dist-info/entry_points.txt +0 -2
{lifejacket-1.0.0.dist-info → lifejacket-1.1.0.dist-info}/top_level.txt +0 -0

lifejacket/{after_study_analysis.py → post_deployment_analysis.py} RENAMED Viewed

@@ -53,6 +53,8 @@ logging.basicConfig(
     level=logging.INFO,
 )
+jax.config.update("jax_enable_x64", True)
 @click.group()
 def cli():
@@ -217,9 +219,9 @@ def cli():
     type=click.Choice(
         [
             SmallSampleCorrections.NONE,
-            SmallSampleCorrections.HC1theta,
-            SmallSampleCorrections.HC2theta,
-            SmallSampleCorrections.HC3theta,
+            SmallSampleCorrections.Z1theta,
+            SmallSampleCorrections.Z2theta,
+            SmallSampleCorrections.Z3theta,
         ]
     ),
     default=SmallSampleCorrections.NONE,
@@ -235,13 +237,13 @@ def cli():
     "--form_adjusted_meat_adjustments_explicitly",
     type=bool,
     default=False,
-    help="If True, explicitly forms the per-subject meat adjustments that differentiate the adaptive sandwich from the classical sandwich. This is for diagnostic purposes, as the adaptive sandwich is formed without doing this.",
+    help="If True, explicitly forms the per-subject meat adjustments that differentiate the adjusted sandwich from the classical sandwich. This is for diagnostic purposes, as the adjusted sandwich is formed without doing this.",
 )
 @click.option(
-    "--stabilize_joint_adjusted_bread_inverse",
+    "--stabilize_joint_bread",
     type=bool,
     default=True,
-    help="If True, stabilizes the joint adaptive bread inverse matrix if it does not meet conditioning thresholds.",
+    help="If True, stabilizes the joint bread matrix if it does not meet conditioning thresholds.",
 )
 def analyze_dataset_wrapper(**kwargs):
     """
@@ -324,15 +326,15 @@ def analyze_dataset(
     small_sample_correction: str,
     collect_data_for_blowup_supervised_learning: bool,
     form_adjusted_meat_adjustments_explicitly: bool,
-    stabilize_joint_adjusted_bread_inverse: bool,
+    stabilize_joint_bread: bool,
 ) -> None:
     """
-    Analyzes a dataset to provide a parameter estimate and an estimate of its variance using adaptive and classical sandwich estimators.
+    Analyzes a dataset to provide a parameter estimate and an estimate of its variance using  and classical sandwich estimators.
     There are two modes of use for this function.
     First, it may be called indirectly from the command line by passing through
-    analyze_dataset.
+    analyze_dataset_wrapper.
     Second, it may be called directly from Python code with in-memory objects.
@@ -388,17 +390,17 @@ def analyze_dataset(
     small_sample_correction (str):
         Type of small sample correction to apply.
     collect_data_for_blowup_supervised_learning (bool):
-        Whether to collect data for doing supervised learning about adaptive sandwich blowup.
+        Whether to collect data for doing supervised learning about adjusted sandwich blowup.
     form_adjusted_meat_adjustments_explicitly (bool):
-        If True, explicitly forms the per-subject meat adjustments that differentiate the adaptive
+        If True, explicitly forms the per-subject meat adjustments that differentiate the
         sandwich from the classical sandwich. This is for diagnostic purposes, as the
-        adaptive sandwich is formed without doing this.
-    stabilize_joint_adjusted_bread_inverse (bool):
-        If True, stabilizes the joint adaptive bread inverse matrix if it does not meet conditioning
+        adjusted sandwich is formed without doing this.
+    stabilize_joint_bread (bool):
+        If True, stabilizes the joint bread matrix if it does not meet conditioning
         thresholds.
     Returns:
-    dict: A dictionary containing the theta estimate, adaptive sandwich variance estimate, and
+    dict: A dictionary containing the theta estimate, adjusted sandwich variance estimate, and
     classical sandwich variance estimate.
     """
@@ -438,7 +440,6 @@ def analyze_dataset(
         )
     ### Begin collecting data structures that will be used to compute the joint bread matrix.
     beta_index_by_policy_num, initial_policy_num = (
         construct_beta_index_by_policy_num_map(
             analysis_df, policy_num_col_name, active_col_name
@@ -475,20 +476,20 @@ def analyze_dataset(
         active_col_name,
     )
-    # Use a per-subject weighted estimating function stacking functino to derive classical and joint
-    # adaptive meat and inverse bread matrices.  This is facilitated because the *value* of the
+    # Use a per-subject weighted estimating function stacking function to derive classical and joint
+    # meat and bread matrices.  This is facilitated because the *value* of the
     # weighted and unweighted stacks are the same, as the weights evaluate to 1 pre-differentiation.
     logger.info(
-        "Constructing joint adaptive bread inverse matrix, joint adaptive meat matrix, the classical analogs, and the avg estimating function stack across subjects."
+        "Constructing joint bread matrix, joint meat matrix, the classical analogs, and the avg estimating function stack across subjects."
     )
     subject_ids = jnp.array(analysis_df[subject_id_col_name].unique())
     (
-        stabilized_joint_adjusted_bread_inverse_matrix,
-        raw_joint_adjusted_bread_inverse_matrix,
+        stabilized_joint_bread_matrix,
+        raw_joint_bread_matrix,
         joint_adjusted_meat_matrix,
-        joint_adjusted_sandwich_matrix,
-        classical_bread_inverse_matrix,
+        joint_sandwich_matrix,
+        classical_bread_matrix,
         classical_meat_matrix,
         classical_sandwich_var_estimate,
         avg_estimating_function_stack,
@@ -524,7 +525,7 @@ def analyze_dataset(
         suppress_interactive_data_checks,
         small_sample_correction,
         form_adjusted_meat_adjustments_explicitly,
-        stabilize_joint_adjusted_bread_inverse,
+        stabilize_joint_bread,
         analysis_df,
         active_col_name,
         action_col_name,
@@ -545,22 +546,19 @@ def analyze_dataset(
     # This bottom right corner of the joint (betas and theta) variance matrix is the portion
     # corresponding to just theta.
-    adjusted_sandwich_var_estimate = joint_adjusted_sandwich_matrix[
-        -theta_dim:, -theta_dim:
-    ]
+    adjusted_sandwich_var_estimate = joint_sandwich_matrix[-theta_dim:, -theta_dim:]
     # Check for negative diagonal elements and set them to zero if found
-    adaptive_diagonal = np.diag(adjusted_sandwich_var_estimate)
-    if np.any(adaptive_diagonal < 0):
+    adjusted_diagonal = np.diag(adjusted_sandwich_var_estimate)
+    if np.any(adjusted_diagonal < 0):
         logger.warning(
-            "Found negative diagonal elements in adaptive sandwich variance estimate. Setting them to zero."
+            "Found negative diagonal elements in adjusted sandwich variance estimate. Setting them to zero."
         )
         np.fill_diagonal(
-            adjusted_sandwich_var_estimate, np.maximum(adaptive_diagonal, 0)
+            adjusted_sandwich_var_estimate, np.maximum(adjusted_diagonal, 0)
         )
     logger.info("Writing results to file...")
-    # Write analysis results to same directory that input files are in
     output_folder_abs_path = pathlib.Path(output_dir).resolve()
     analysis_dict = {
@@ -574,25 +572,229 @@ def analyze_dataset(
             f,
         )
-    joint_adjusted_bread_inverse_cond = jnp.linalg.cond(
-        raw_joint_adjusted_bread_inverse_matrix
+    joint_bread_cond = jnp.linalg.cond(raw_joint_bread_matrix)
+    logger.info(
+        "Joint bread condition number: %f",
+        joint_bread_cond,
+    )
+    # calculate the max eigenvalue of the theta-only adjusted sandwich
+    max_eigenvalue_theta_only_adjusted_sandwich = scipy.linalg.eigvalsh(
+        adjusted_sandwich_var_estimate
+    ).max()
+    logger.info(
+        "Max eigenvalue of theta-only adjusted sandwich matrix: %f",
+        max_eigenvalue_theta_only_adjusted_sandwich,
+    )
+    # Compute ratios: max eigenvalue / median eigenvalue among those >= 1e-8 * max.
+    eigvals_joint_sandwich = scipy.linalg.eigvalsh(joint_sandwich_matrix)
+    max_eig_joint = float(eigvals_joint_sandwich.max())
+    logger.info(
+        "Max eigenvalue of joint adjusted sandwich matrix: %f",
+        max_eig_joint,
+    )
+    joint_keep = eigvals_joint_sandwich >= (1e-8 * max_eig_joint)
+    joint_median_kept = (
+        float(np.median(eigvals_joint_sandwich[joint_keep]))
+        if np.any(joint_keep)
+        else math.nan
+    )
+    max_to_median_ratio_joint_sandwich = (
+        (max_eig_joint / joint_median_kept)
+        if (not math.isnan(joint_median_kept) and joint_median_kept > 0)
+        else (
+            math.inf
+            if (not math.isnan(joint_median_kept) and joint_median_kept == 0)
+            else math.nan
+        )
     )
     logger.info(
-        "Joint adjusted bread inverse condition number: %f",
-        joint_adjusted_bread_inverse_cond,
+        "Max/median eigenvalue ratio (joint sandwich; median over eigvals >= 1e-8*max): %f",
+        max_to_median_ratio_joint_sandwich,
+    )
+    eigvals_theta_only_adjusted_sandwich = scipy.linalg.eigvalsh(
+        adjusted_sandwich_var_estimate
     )
+    max_eig_theta = float(eigvals_theta_only_adjusted_sandwich.max())
+    theta_keep = eigvals_theta_only_adjusted_sandwich >= (1e-8 * max_eig_theta)
+    theta_median_kept = (
+        float(np.median(eigvals_theta_only_adjusted_sandwich[theta_keep]))
+        if np.any(theta_keep)
+        else math.nan
+    )
+    max_to_median_ratio_theta_only_adjusted_sandwich = (
+        (max_eig_theta / theta_median_kept)
+        if (not math.isnan(theta_median_kept) and theta_median_kept > 0)
+        else (
+            math.inf
+            if (not math.isnan(theta_median_kept) and theta_median_kept == 0)
+            else math.nan
+        )
+    )
+    logger.info(
+        "Max/median eigenvalue ratio (theta-only adjusted sandwich; median over eigvals >= 1e-8*max): %f",
+        max_to_median_ratio_theta_only_adjusted_sandwich,
+    )
+    # --- Local linearization validity diagnostic (single-run) ---
+    # We compare the nonlinear Taylor remainder of the joint estimating-function map to the
+    # retained linear term, at perturbations on the O(1/sqrt(n)) scale.
+    #
+    # Define r(delta) = || g(eta+delta) - g(eta) - B delta ||_2 / || B delta ||_2,
+    # where g(eta) is the avg per-subject weighted estimating-function stack and B is the
+    # stabilized joint bread (Jacobian of g w.r.t. flattened betas+theta).
+    #
+    # This ratio is dimensionless and can be used as a necessary/sanity diagnostic that the
+    # first-order linearization is locally accurate at the estimation scale.
+    def _compute_local_linearization_error_ratio() -> tuple[float, float]:
+        # Ensure float64 for diagnostics even if upstream ran in float32.
+        joint_bread_float64 = jnp.asarray(
+            stabilized_joint_bread_matrix, dtype=jnp.float64
+        )
+        g_hat = jnp.asarray(avg_estimating_function_stack, dtype=jnp.float64)
+        stacks_float64 = jnp.asarray(
+            per_subject_estimating_function_stacks, dtype=jnp.float64
+        )
+        num_subjects = stacks_float64.shape[0]
+        def _eval_avg_stack_jit(flattened_betas_and_theta: jnp.ndarray) -> jnp.ndarray:
+            return jnp.asarray(
+                get_avg_weighted_estimating_function_stacks_and_aux_values(
+                    flattened_betas_and_theta,
+                    beta_dim,
+                    theta_dim,
+                    subject_ids,
+                    action_prob_func,
+                    action_prob_func_args_beta_index,
+                    alg_update_func,
+                    alg_update_func_type,
+                    alg_update_func_args_beta_index,
+                    alg_update_func_args_action_prob_index,
+                    alg_update_func_args_action_prob_times_index,
+                    alg_update_func_args_previous_betas_index,
+                    inference_func,
+                    inference_func_type,
+                    inference_func_args_theta_index,
+                    inference_func_args_action_prob_index,
+                    action_prob_func_args,
+                    policy_num_by_decision_time_by_subject_id,
+                    initial_policy_num,
+                    beta_index_by_policy_num,
+                    inference_func_args_by_subject_id,
+                    inference_action_prob_decision_times_by_subject_id,
+                    alg_update_func_args,
+                    action_by_decision_time_by_subject_id,
+                    True,  # suppress_all_data_checks
+                    True,  # suppress_interactive_data_checks
+                    False,  # include_auxiliary_outputs
+                ),
+                dtype=jnp.float64,
+            )
+        # Evaluate at the final estimate.
+        eta_hat = jnp.asarray(
+            flatten_params(all_post_update_betas, theta_est), dtype=jnp.float64
+        )
+        # Draw perturbations delta_j on the O(1/sqrt(n)) scale, aligned with the empirical
+        # joint estimating function stack covariance, without forming a d_joint x d_joint matrix
+        # square-root. If G is the (n x d) matrix of per-subject stacks, then (1/n) G^T G is the
+        # empirical covariance in joint estimating function stack space. Sampling u = (G^T w)/sqrt(n) with w~N(0, I_n) gives
+        # u ~ N(0, empirical joint estimating function stack covariance G^T G/n ) in joint estimating function stack space.
+        key = jax.random.PRNGKey(0)
+        # The number of perturbations we will probe
+        J = 15
+        # Each requires num_subjects standard normal draws, which we will then transform
+        # into joint estimating function space perturbations in U
+        W = jax.random.normal(key, shape=(J, num_subjects), dtype=jnp.float64)
+        # Joint estimating function space perturbations: u_j in R^{d_joint}
+        # U = (1/sqrt(n)) * W G, where rows of G are g_i^T
+        U = (W @ stacks_float64) / jnp.sqrt(num_subjects)
+        # Parameter perturbations: delta = (c/sqrt(n)) * B^{-1} u
+        # Use solve rather than explicit inverse.
+        c = 1.0
+        delta = (c / jnp.sqrt(num_subjects)) * jnp.linalg.solve(
+            joint_bread_float64, U.T
+        ).T
+        # Compute ratios r_j.
+        # NOTE: We use the Euclidean norm in score space; this is dimensionless and avoids
+        # forming/pseudoinverting a potentially rank-deficient matrix.
+        B_delta = (joint_bread_float64 @ delta.T).T
+        g_plus = jax.vmap(lambda d: _eval_avg_stack_jit(eta_hat + d))(delta)
+        remainder = g_plus - g_hat - B_delta
+        denom = jnp.linalg.norm(B_delta, axis=1)
+        numer = jnp.linalg.norm(remainder, axis=1)
+        # Avoid division by zero (should not happen unless delta collapses numerically).
+        ratios = jnp.where(denom > 0, numer / denom, jnp.inf)
+        local_error_ratio_median = float(jnp.median(ratios))
+        local_error_ratio_p90 = float(jnp.quantile(ratios, 0.9))
+        local_error_ratio_max = float(jnp.max(ratios))
+        logger.info(
+            "Local linearization error ratio (median over %d draws): %.6f",
+            J,
+            local_error_ratio_median,
+        )
+        logger.info(
+            "Local linearization error ratio (90th pct over %d draws): %.6f",
+            J,
+            local_error_ratio_p90,
+        )
+        logger.info(
+            "Local linearization error ratio (max over %d draws): %.6f",
+            J,
+            local_error_ratio_max,
+        )
+        return local_error_ratio_median, local_error_ratio_p90, local_error_ratio_max
+    try:
+        local_error_ratio_median, local_error_ratio_p90, local_error_ratio_max = (
+            _compute_local_linearization_error_ratio()
+        )
+    except Exception as e:
+        # This diagnostic is best-effort; failure should not break analysis.
+        logger.warning(
+            "Failed to compute local linearization error ratio diagnostic: %s",
+            str(e),
+        )
+        local_error_ratio_median = math.nan
+        local_error_ratio_p90 = math.nan
+        local_error_ratio_max = math.nan
     debug_pieces_dict = {
         "theta_est": theta_est,
         "adjusted_sandwich_var_estimate": adjusted_sandwich_var_estimate,
         "classical_sandwich_var_estimate": classical_sandwich_var_estimate,
-        "raw_joint_bread_inverse_matrix": raw_joint_adjusted_bread_inverse_matrix,
-        "stabilized_joint_bread_inverse_matrix": stabilized_joint_adjusted_bread_inverse_matrix,
+        "raw_joint_bread_matrix": raw_joint_bread_matrix,
+        "stabilized_joint_bread_matrix": stabilized_joint_bread_matrix,
         "joint_meat_matrix": joint_adjusted_meat_matrix,
-        "classical_bread_inverse_matrix": classical_bread_inverse_matrix,
+        "classical_bread_matrix": classical_bread_matrix,
         "classical_meat_matrix": classical_meat_matrix,
         "all_estimating_function_stacks": per_subject_estimating_function_stacks,
-        "joint_bread_inverse_condition_number": joint_adjusted_bread_inverse_cond,
+        "joint_bread_condition_number": joint_bread_cond,
+        "max_eigenvalue_joint_sandwich": max_eig_joint,
+        "all_eigenvalues_joint_sandwich": eigvals_joint_sandwich,
+        "max_to_median_ratio_joint_sandwich": max_to_median_ratio_joint_sandwich,
+        "max_eigenvalue_theta_only_adjusted_sandwich": max_eig_theta,
+        "all_eigenvalues_theta_only_adjusted_sandwich": eigvals_theta_only_adjusted_sandwich,
+        "max_to_median_ratio_theta_only_adjusted_sandwich": max_to_median_ratio_theta_only_adjusted_sandwich,
+        "local_linearization_error_ratio_median": local_error_ratio_median,
+        "local_linearization_error_ratio_p90": local_error_ratio_p90,
+        "local_linearization_error_ratio_max": local_error_ratio_max,
         "all_post_update_betas": all_post_update_betas,
         "per_subject_adjusted_corrections": per_subject_adjusted_corrections,
         "per_subject_classical_corrections": per_subject_classical_corrections,
@@ -606,8 +808,8 @@ def analyze_dataset(
     if collect_data_for_blowup_supervised_learning:
         datum_and_label_dict = get_datum_for_blowup_supervised_learning.get_datum_for_blowup_supervised_learning(
-            raw_joint_adjusted_bread_inverse_matrix,
-            joint_adjusted_bread_inverse_cond,
+            raw_joint_bread_matrix,
+            joint_bread_cond,
             avg_estimating_function_stack,
             per_subject_estimating_function_stacks,
             all_post_update_betas,
@@ -752,12 +954,16 @@ def single_subject_weighted_estimating_function_stacker(
     policy_num_by_decision_time: dict[collections.abc.Hashable, dict[int, int | float]],
     action_by_decision_time: dict[collections.abc.Hashable, dict[int, int]],
     beta_index_by_policy_num: dict[int | float, int],
-) -> tuple[
-    jnp.ndarray[jnp.float32],
-    jnp.ndarray[jnp.float32],
-    jnp.ndarray[jnp.float32],
-    jnp.ndarray[jnp.float32],
-]:
+    include_auxiliary_outputs: bool = True,
+) -> (
+    tuple[
+        jnp.ndarray[jnp.float32],
+        jnp.ndarray[jnp.float32],
+        jnp.ndarray[jnp.float32],
+        jnp.ndarray[jnp.float32],
+    ]
+    | jnp.ndarray[jnp.float32]
+):
     """
     Computes a weighted estimating function stack for a given algorithm estimating function
     and arguments, inference estimating functio and arguments, and action probability function and
@@ -821,12 +1027,23 @@ def single_subject_weighted_estimating_function_stacker(
             A dictionary mapping policy numbers to the index of the corresponding beta in
             all_post_update_betas. Note that this is only for non-initial, non-fallback policies.
+        include_auxiliary_outputs (bool):
+            If True, returns the adjusted meat, classical meat, and classical bread contributions in
+            a second returned tuple. If False, only returns the weighted estimating function stack.
     Returns:
         jnp.ndarray: A 1-D JAX NumPy array representing the subject's weighted estimating function
             stack.
-        jnp.ndarray: A 2-D JAX NumPy matrix representing the subject's adaptive meat contribution.
+        jnp.ndarray: A 2-D JAX NumPy matrix representing the subject's adjusted meat contribution.
         jnp.ndarray: A 2-D JAX NumPy matrix representing the subject's classical meat contribution.
         jnp.ndarray: A 2-D JAX NumPy matrix representing the subject's classical bread contribution.
+        or
+        jnp.ndarray: A 1-D JAX NumPy array representing the subject's weighted estimating function
+            stack.
+        depending on the value of include_auxiliary_outputs.
     """
     logger.info(
@@ -1008,22 +1225,26 @@ def single_subject_weighted_estimating_function_stacker(
     # 6. Return the following outputs:
     # a. The first is simply the weighted estimating function stack for this subject. The average
-    # of these is what we differentiate with respect to theta to form the inverse adaptive joint
+    # of these is what we differentiate with respect to theta to form the joint
     # bread matrix, and we also compare that average to zero to check the estimating functions'
     # fidelity.
-    # b. The average outer product of these per-subject stacks across subjects is the adaptive joint meat
+    # b. The average outer product of these per-subject stacks across subjects is the adjusted joint meat
     # matrix, hence the second output.
     # c. The third output is averaged across subjects to obtain the classical meat matrix.
     # d. The fourth output is averaged across subjects to obtain the inverse classical bread
     # matrix.
-    return (
-        weighted_stack,
-        jnp.outer(weighted_stack, weighted_stack),
-        jnp.outer(inference_component, inference_component),
-        jax.jacrev(inference_estimating_func, argnums=inference_func_args_theta_index)(
-            *threaded_inference_func_args
-        ),
-    )
+    if include_auxiliary_outputs:
+        return (
+            weighted_stack,
+            jnp.outer(weighted_stack, weighted_stack),
+            jnp.outer(inference_component, inference_component),
+            jax.jacrev(
+                inference_estimating_func, argnums=inference_func_args_theta_index
+            )(*threaded_inference_func_args),
+        )
+    else:
+        return weighted_stack
 def get_avg_weighted_estimating_function_stacks_and_aux_values(
@@ -1063,12 +1284,13 @@ def get_avg_weighted_estimating_function_stacks_and_aux_values(
     ],
     suppress_all_data_checks: bool,
     suppress_interactive_data_checks: bool,
+    include_auxiliary_outputs: bool = True,
 ) -> tuple[
     jnp.ndarray, tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray]
 ]:
     """
     Computes the average weighted estimating function stack across all subjects, along with
-    auxiliary values used to construct the adaptive and classical sandwich variances.
+    auxiliary values used to construct the adjusted and classical sandwich variances.
     Args:
         flattened_betas_and_theta (jnp.ndarray):
@@ -1137,18 +1359,26 @@ def get_avg_weighted_estimating_function_stacks_and_aux_values(
             If True, suppresses interactive data checks that would otherwise be performed to ensure
             the correctness of the threaded arguments. The checks are still performed, but
             any interactive prompts are suppressed.
+        include_auxiliary_outputs (bool):
+            If True, returns the adjusted meat, classical meat, and classical bread contributions in addition to the average weighted estimating function stack.
+            If False, returns only the average weighted estimating function stack.
     Returns:
         jnp.ndarray:
             A 2D JAX NumPy array holding the average weighted estimating function stack.
         tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray, jnp.ndarray]:
             A tuple containing
             1. the average weighted estimating function stack
-            2. the subject-level adaptive meat matrix contributions
+            2. the subject-level adjusted meat matrix contributions
             3. the subject-level classical meat matrix contributions
             4. the subject-level inverse classical bread matrix contributions
             5. raw per-subject weighted estimating function
             stacks.
+        or jnp.ndarray:
+            A 1-D JAX NumPy array representing the subject's weighted estimating function
+            stack.
+        depending on the value of include_auxiliary_outputs.
     """
     # 1. Collect estimating functions by differentiating the loss functions if needed.
@@ -1248,7 +1478,7 @@ def get_avg_weighted_estimating_function_stacks_and_aux_values(
         )
     # 5. Now we can compute the weighted estimating function stacks for all subjects
-    # as well as collect related values used to construct the adaptive and classical
+    # as well as collect related values used to construct the adjusted and classical
     # sandwich variances.
     results = [
         single_subject_weighted_estimating_function_stacker(
@@ -1271,16 +1501,21 @@ def get_avg_weighted_estimating_function_stacks_and_aux_values(
     ]
     stacks = jnp.array([result[0] for result in results])
+    if not include_auxiliary_outputs:
+        return jnp.mean(stacks, axis=0)
     outer_products = jnp.array([result[1] for result in results])
     inference_only_outer_products = jnp.array([result[2] for result in results])
     inference_hessians = jnp.array([result[3] for result in results])
     # 6. Note this strange return structure! We will differentiate the first output,
     # but the second tuple will be passed along without modification via has_aux=True and then used
-    # for the adaptive meat matrix, estimating functions sum check, and classical meat and inverse
-    # bread matrices. The raw per-subject stacks are also returned for debugging purposes.
+    # for the estimating functions sum check, per_subject_classical_bread_contributions, and
+    # classical meat and inverse read matrices. The raw per-subject stacks are also returned for
+    # debugging purposes.
-    # Note that returning the raw stacks here as the first arguments is potentially
+    # Note that returning the raw stacks here as the first argument is potentially
     # memory-intensive when combined with differentiation. Keep this in mind if the per-subject bread
     # inverse contributions are needed for something like CR2/CR3 small-sample corrections.
     return jnp.mean(stacks, axis=0), (
@@ -1330,7 +1565,7 @@ def construct_classical_and_adjusted_sandwiches(
     suppress_interactive_data_checks: bool,
     small_sample_correction: str,
     form_adjusted_meat_adjustments_explicitly: bool,
-    stabilize_joint_adjusted_bread_inverse: bool,
+    stabilize_joint_bread: bool,
     analysis_df: pd.DataFrame | None,
     active_col_name: str | None,
     action_col_name: str | None,
@@ -1352,11 +1587,11 @@ def construct_classical_and_adjusted_sandwiches(
     jnp.ndarray[jnp.float32],
 ]:
     """
-    Constructs the classical and adaptive sandwich matrices, as well as various
+    Constructs the classical and adjusted sandwich matrices, as well as various
     intermediate pieces in their consruction.
     This is done by computing and differentiating the average weighted estimating function stack
-    with respect to the betas and theta, using the resulting Jacobian to compute the inverse bread
+    with respect to the betas and theta, using the resulting Jacobian to compute the bread
     and meat matrices, and then stably computing sandwiches.
     Args:
@@ -1426,13 +1661,13 @@ def construct_classical_and_adjusted_sandwiches(
             The type of small sample correction to apply. See SmallSampleCorrections class for
             options.
         form_adjusted_meat_adjustments_explicitly (bool):
-            If True, explicitly forms the per-subject meat adjustments that differentiate the adaptive
+            If True, explicitly forms the per-subject meat adjustments that differentiate the adjusted
             sandwich from the classical sandwich. This is for diagnostic purposes, as the
-            adaptive sandwich is formed without doing this.
-        stabilize_joint_adjusted_bread_inverse (bool):
-            If True, will apply various techniques to stabilize the joint adaptive bread inverse if necessary.
+            adjusted sandwich is formed without doing this.
+        stabilize_joint_bread (bool):
+            If True, will apply various techniques to stabilize the joint bread if necessary.
         analysis_df (pd.DataFrame):
-            The full analysis dataframe, needed if forming the adaptive meat adjustments explicitly.
+            The full analysis dataframe, needed if forming the adjusted meat adjustments explicitly.
         active_col_name (str):
             The name of the column in analysis_df indicating whether a subject is active at a given decision time.
         action_col_name (str):
@@ -1443,25 +1678,25 @@ def construct_classical_and_adjusted_sandwiches(
             The name of the column in analysis_df indicating the subject ID.
         action_prob_func_args (tuple):
             The arguments to be passed to the action probability function, needed if forming the
-            adaptive meat adjustments explicitly.
+            adjusted meat adjustments explicitly.
         action_prob_col_name (str):
             The name of the column in analysis_df indicating the action probability of the action taken,
-            needed if forming the adaptive meat adjustments explicitly.
+            needed if forming the adjusted meat adjustments explicitly.
     Returns:
         tuple[jnp.ndarray[jnp.float32], jnp.ndarray[jnp.float32], jnp.ndarray[jnp.float32], jnp.ndarray[jnp.float32], jnp.ndarray[jnp.float32]]:
             A tuple containing:
-            - The raw joint adaptive inverse bread matrix.
-            - The (possibly) stabilized joint adaptive inverse bread matrix.
-            - The joint adaptive meat matrix.
-            - The joint adaptive sandwich matrix.
-            - The classical inverse bread matrix.
+            - The raw joint bread matrix.
+            - The (possibly) stabilized joint bread matrix.
+            - The joint meat matrix.
+            - The joint sandwich matrix.
+            - The classical bread matrix.
             - The classical meat matrix.
             - The classical sandwich matrix.
             - The average weighted estimating function stack.
             - All per-subject weighted estimating function stacks.
-            - The per-subject adaptive meat small-sample corrections.
+            - The per-subject adjusted meat small-sample corrections.
             - The per-subject classical meat small-sample corrections.
-            - The per-subject adaptive meat adjustments, if form_adjusted_meat_adjustments_explicitly
+            - The per-subject adjusted meat adjustments, if form_adjusted_meat_adjustments_explicitly
               is True, otherwise an array of NaNs.
     """
     logger.info(
@@ -1470,11 +1705,11 @@ def construct_classical_and_adjusted_sandwiches(
     theta_dim = theta_est.shape[0]
     beta_dim = all_post_update_betas.shape[1]
     # Note that these "contributions" are per-subject Jacobians of the weighted estimating function stack.
-    raw_joint_adjusted_bread_inverse_matrix, (
+    raw_joint_bread_matrix, (
         avg_estimating_function_stack,
         per_subject_joint_adjusted_meat_contributions,
         per_subject_classical_meat_contributions,
-        per_subject_classical_bread_inverse_contributions,
+        per_subject_classical_bread_contributions,
         per_subject_estimating_function_stacks,
     ) = jax.jacrev(
         get_avg_weighted_estimating_function_stacks_and_aux_values, has_aux=True
@@ -1521,40 +1756,38 @@ def construct_classical_and_adjusted_sandwiches(
         small_sample_correction,
         per_subject_joint_adjusted_meat_contributions,
         per_subject_classical_meat_contributions,
-        per_subject_classical_bread_inverse_contributions,
+        per_subject_classical_bread_contributions,
         num_subjects,
         theta_dim,
     )
     # Increase diagonal block dominance possibly improve conditioning of diagonal
-    # blocks as necessary, to ensure mathematical stability of joint bread inverse
-    stabilized_joint_adjusted_bread_inverse_matrix = (
+    # blocks as necessary, to ensure mathematical stability of joint bread
+    stabilized_joint_bread_matrix = (
         (
-            stabilize_joint_adjusted_bread_inverse_if_necessary(
-                raw_joint_adjusted_bread_inverse_matrix,
+            stabilize_joint_bread_if_necessary(
+                raw_joint_bread_matrix,
                 beta_dim,
                 theta_dim,
             )
         )
-        if stabilize_joint_adjusted_bread_inverse
-        else raw_joint_adjusted_bread_inverse_matrix
+        if stabilize_joint_bread
+        else raw_joint_bread_matrix
     )
     # Now stably (no explicit inversion) form our sandwiches.
-    joint_adjusted_sandwich = form_sandwich_from_bread_inverse_and_meat(
-        stabilized_joint_adjusted_bread_inverse_matrix,
+    joint_sandwich = form_sandwich_from_bread_and_meat(
+        stabilized_joint_bread_matrix,
         joint_adjusted_meat_matrix,
         num_subjects,
-        method=SandwichFormationMethods.BREAD_INVERSE_T_QR,
-    )
-    classical_bread_inverse_matrix = jnp.mean(
-        per_subject_classical_bread_inverse_contributions, axis=0
+        method=SandwichFormationMethods.BREAD_T_QR,
     )
-    classical_sandwich = form_sandwich_from_bread_inverse_and_meat(
-        classical_bread_inverse_matrix,
+    classical_bread_matrix = jnp.mean(per_subject_classical_bread_contributions, axis=0)
+    classical_sandwich = form_sandwich_from_bread_and_meat(
+        classical_bread_matrix,
         classical_meat_matrix,
         num_subjects,
-        method=SandwichFormationMethods.BREAD_INVERSE_T_QR,
+        method=SandwichFormationMethods.BREAD_T_QR,
     )
     per_subject_adjusted_meat_adjustments = jnp.full(
@@ -1565,7 +1798,7 @@ def construct_classical_and_adjusted_sandwiches(
             form_adjusted_meat_adjustments_directly(
                 theta_dim,
                 all_post_update_betas.shape[1],
-                stabilized_joint_adjusted_bread_inverse_matrix,
+                stabilized_joint_bread_matrix,
                 per_subject_estimating_function_stacks,
                 analysis_df,
                 active_col_name,
@@ -1582,9 +1815,9 @@ def construct_classical_and_adjusted_sandwiches(
                 action_prob_col_name,
             )
         )
-        # Validate that the adaptive meat adjustments we just formed are accurate by constructing
-        # the theta-only adaptive sandwich from them and checking that it matches the standard result
-        # we get by taking a subset of the joint adaptive sandwich.
+        # Validate that the adjusted meat adjustments we just formed are accurate by constructing
+        # the theta-only adjusted sandwich from them and checking that it matches the standard result
+        # we get by taking a subset of the joint sandwich.
         # First just apply any small-sample correction for parity.
         (
             _,
@@ -1595,19 +1828,19 @@ def construct_classical_and_adjusted_sandwiches(
             small_sample_correction,
             per_subject_joint_adjusted_meat_contributions,
             per_subject_adjusted_classical_meat_contributions,
-            per_subject_classical_bread_inverse_contributions,
+            per_subject_classical_bread_contributions,
             num_subjects,
             theta_dim,
         )
         theta_only_adjusted_sandwich_from_adjustments = (
-            form_sandwich_from_bread_inverse_and_meat(
-                classical_bread_inverse_matrix,
+            form_sandwich_from_bread_and_meat(
+                classical_bread_matrix,
                 theta_only_adjusted_meat_matrix_v2,
                 num_subjects,
-                method=SandwichFormationMethods.BREAD_INVERSE_T_QR,
+                method=SandwichFormationMethods.BREAD_T_QR,
             )
         )
-        theta_only_adjusted_sandwich = joint_adjusted_sandwich[-theta_dim:, -theta_dim:]
+        theta_only_adjusted_sandwich = joint_sandwich[-theta_dim:, -theta_dim:]
         if not np.allclose(
             theta_only_adjusted_sandwich,
@@ -1615,17 +1848,17 @@ def construct_classical_and_adjusted_sandwiches(
             rtol=3e-2,
         ):
             logger.warning(
-                "There may be a bug in the explicit meat adjustment calculation (this doesn't affect the actual calculation, just diagnostics). We've calculated the theta-only adaptive sandwich two different ways and they do not match sufficiently."
+                "There may be a bug in the explicit meat adjustment calculation (this doesn't affect the actual calculation, just diagnostics). We've calculated the theta-only adjusted sandwich two different ways and they do not match sufficiently."
             )
-    # Stack the joint adaptive inverse bread pieces together horizontally and return the auxiliary
-    # values too. The joint adaptive bread inverse should always be block lower triangular.
+    # Stack the joint bread pieces together horizontally and return the auxiliary
+    # values too. The joint bread should always be block lower triangular.
     return (
-        raw_joint_adjusted_bread_inverse_matrix,
-        stabilized_joint_adjusted_bread_inverse_matrix,
+        raw_joint_bread_matrix,
+        stabilized_joint_bread_matrix,
         joint_adjusted_meat_matrix,
-        joint_adjusted_sandwich,
-        classical_bread_inverse_matrix,
+        joint_sandwich,
+        classical_bread_matrix,
         classical_meat_matrix,
         classical_sandwich,
         avg_estimating_function_stack,
@@ -1639,25 +1872,25 @@ def construct_classical_and_adjusted_sandwiches(
 # TODO: I think there should be interaction to confirm stabilization.  It is
 # important for the subject to know if this is happening. Even if enabled, it is important
 # that the subject know it actually kicks in.
-def stabilize_joint_adjusted_bread_inverse_if_necessary(
-    joint_adjusted_bread_inverse_matrix: jnp.ndarray,
+def stabilize_joint_bread_if_necessary(
+    joint_bread_matrix: jnp.ndarray,
     beta_dim: int,
     theta_dim: int,
 ) -> jnp.ndarray:
     """
-    Stabilizes the joint adaptive bread inverse matrix if necessary by increasing diagonal block
+    Stabilizes the joint bread matrix if necessary by increasing diagonal block
     dominance and/or adding a small ridge penalty to the diagonal blocks.
     Args:
-        joint_adjusted_bread_inverse_matrix (jnp.ndarray):
-            A 2-D JAX NumPy array representing the joint adaptive bread inverse matrix.
+        joint_bread_matrix (jnp.ndarray):
+            A 2-D JAX NumPy array representing the joint bread matrix.
         beta_dim (int):
             The dimension of each beta parameter.
         theta_dim (int):
             The dimension of the theta parameter.
     Returns:
         jnp.ndarray:
-            A 2-D NumPy array representing the stabilized joint adaptive bread inverse matrix.
+            A 2-D NumPy array representing the stabilized joint bread matrix.
     """
     # TODO: come up with more sophisticated settings here. These are maybe a little loose,
@@ -1670,7 +1903,7 @@ def stabilize_joint_adjusted_bread_inverse_if_necessary(
     # Grab just the RL block and convert numpy array for easier manipulation.
     RL_stack_beta_derivatives_block = np.array(
-        joint_adjusted_bread_inverse_matrix[:-theta_dim, :-theta_dim]
+        joint_bread_matrix[:-theta_dim, :-theta_dim]
     )
     num_updates = RL_stack_beta_derivatives_block.shape[0] // beta_dim
     for i in range(1, num_updates + 1):
@@ -1789,31 +2022,31 @@ def stabilize_joint_adjusted_bread_inverse_if_necessary(
         [
             [
                 RL_stack_beta_derivatives_block,
-                joint_adjusted_bread_inverse_matrix[:-theta_dim, -theta_dim:],
+                joint_bread_matrix[:-theta_dim, -theta_dim:],
             ],
             [
-                joint_adjusted_bread_inverse_matrix[-theta_dim:, :-theta_dim],
-                joint_adjusted_bread_inverse_matrix[-theta_dim:, -theta_dim:],
+                joint_bread_matrix[-theta_dim:, :-theta_dim],
+                joint_bread_matrix[-theta_dim:, -theta_dim:],
             ],
         ]
     )
-def form_sandwich_from_bread_inverse_and_meat(
-    bread_inverse: jnp.ndarray,
+def form_sandwich_from_bread_and_meat(
+    bread: jnp.ndarray,
     meat: jnp.ndarray,
     num_subjects: int,
-    method: str = SandwichFormationMethods.BREAD_INVERSE_T_QR,
+    method: str = SandwichFormationMethods.BREAD_T_QR,
 ) -> jnp.ndarray:
     """
-    Forms a sandwich variance matrix from the provided bread inverse and meat matrices.
+    Forms a sandwich variance matrix from the provided bread and meat matrices.
-    Attempts to do so STABLY without ever forming the bread matrix itself
+    Attempts to do so STABLY without ever forming the bread inverse matrix itself
     (except with naive option).
     Args:
-        bread_inverse (jnp.ndarray):
-            A 2-D JAX NumPy array representing the bread inverse matrix.
+        bread (jnp.ndarray):
+            A 2-D JAX NumPy array representing the bread matrix.
         meat (jnp.ndarray):
             A 2-D JAX NumPy array representing the meat matrix.
         num_subjects (int):
@@ -1821,12 +2054,12 @@ def form_sandwich_from_bread_inverse_and_meat(
         method (str):
             The method to use for forming the sandwich.
-            SandwichFormationMethods.BREAD_INVERSE_T_QR uses the QR decomposition of the transpose
-            of the bread inverse matrix.
+            SandwichFormationMethods.BREAD_T_QR uses the QR decomposition of the transpose
+            of the bread matrix.
             SandwichFormationMethods.MEAT_SVD_SOLVE uses a decomposition of the meat matrix.
-            SandwichFormationMethods.NAIVE simply inverts the bread inverse and forms the sandwich.
+            SandwichFormationMethods.NAIVE simply inverts the bread and forms the sandwich.
     Returns:
@@ -1834,9 +2067,9 @@ def form_sandwich_from_bread_inverse_and_meat(
             A 2-D JAX NumPy array representing the sandwich variance matrix.
     """
-    if method == SandwichFormationMethods.BREAD_INVERSE_T_QR:
+    if method == SandwichFormationMethods.BREAD_T_QR:
         # QR of B^T → Q orthogonal, R upper triangular; L = R^T lower triangular
-        Q, R = np.linalg.qr(bread_inverse.T, mode="reduced")
+        Q, R = np.linalg.qr(bread.T, mode="reduced")
         L = R.T
         new_meat = scipy.linalg.solve_triangular(
@@ -1854,21 +2087,21 @@ def form_sandwich_from_bread_inverse_and_meat(
         C_right = Vh.T * np.sqrt(s)
         # Solve B W_left = C_left and B W_right = C_right (no explicit inverses).
-        W_left = scipy.linalg.solve(bread_inverse, C_left)
-        W_right = scipy.linalg.solve(bread_inverse, C_right)
+        W_left = scipy.linalg.solve(bread, C_left)
+        W_right = scipy.linalg.solve(bread, C_right)
         # Return the exact sandwich: V = (B^{-1} C_left) (B^{-1} C_right)^T / num_subjects
         return W_left @ W_right.T / num_subjects
     elif method == SandwichFormationMethods.NAIVE:
-        # Simply invert the bread inverse and form the sandwich directly.
+        # Simply invert the bread and form the sandwich directly.
         # This is NOT numerically stable and is only included for comparison purposes.
-        bread = np.linalg.inv(bread_inverse)
-        return bread @ meat @ meat.T / num_subjects
+        bread_inverse = np.linalg.inv(bread)
+        return bread_inverse @ meat @ bread_inverse.T / num_subjects
     else:
         raise ValueError(
-            f"Unknown sandwich method: {method}. Please use 'bread_inverse_t_qr' or 'meat_decomposition_solve'."
+            f"Unknown sandwich method: {method}. Please use 'bread_t_qr' or 'meat_decomposition_solve'."
         )

lifejacket 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

lifejacket 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl