PyPI - lifejacket - Versions diffs - 0.2.1__tar.gz → 1.0.2__tar.gz - Mend

lifejacket 0.2.1tar.gz → 1.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

lifejacket-1.0.2/PKG-INFO ADDED Viewed

@@ -0,0 +1,56 @@
+Metadata-Version: 2.4
+Name: lifejacket
+Version: 1.0.2
+Summary: Consistent standard errors for longitudinal data collected under pooling online decision policies.
+Author-email: Nowell Closser <nowellclosser@gmail.com>
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: click>=8.0
+Requires-Dist: jax>=0.4.0
+Requires-Dist: jaxlib>=0.4.0
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: pandas>=1.3.0
+Requires-Dist: scipy>=1.7.0
+Requires-Dist: plotext>=5.0.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: black>=22.0; extra == "dev"
+Requires-Dist: flake8>=4.0; extra == "dev"
+```python
+  _ _  __     _            _        _
+ | (_)/ _|   (_)          | |      | |
+ | |_| |_ ___ _  __ _  ___| | _____| |_
+ | | |  _/ _ \ |/ _` |/ __| |/ / _ \ __|
+ | | | ||  __/ | (_| | (__|   <  __/ |_
+ |_|_|_| \___| |\__,_|\___|_|\_\___|\__|
+            _/ |
+           |__/
+```
+Save your standard errors from pooling in online decision-making algorithms.
+## Setup (if not using conda)
+### Create and activate a virtual environment
+- `python3 -m venv .venv; source /.venv/bin/activate`
+### Adding a package
+- Add to `requirements.txt` with a specific version or no version if you want the latest stable
+- Run `pip freeze > requirements.txt` to lock the versions of your package and all its subpackages
+## Running the code
+- `export PYTHONPATH to the absolute path of this repository on your computer
+- `./run_local_synthetic.sh`, which outputs to `simulated_data/` by default. See all the possible flags to be toggled in the script code.
+## Linting/Formatting
+## Testing
+python -m pytest
+python -m pytest tests/unit_tests
+python -m pytest tests/integration_tests
+## TODO
+1. Add precommit hooks (pip freeze, linting, formatting)

lifejacket-1.0.2/README.md ADDED Viewed

@@ -0,0 +1,37 @@
+```python
+  _ _  __     _            _        _
+ | (_)/ _|   (_)          | |      | |
+ | |_| |_ ___ _  __ _  ___| | _____| |_
+ | | |  _/ _ \ |/ _` |/ __| |/ / _ \ __|
+ | | | ||  __/ | (_| | (__|   <  __/ |_
+ |_|_|_| \___| |\__,_|\___|_|\_\___|\__|
+            _/ |
+           |__/
+```
+Save your standard errors from pooling in online decision-making algorithms.
+## Setup (if not using conda)
+### Create and activate a virtual environment
+- `python3 -m venv .venv; source /.venv/bin/activate`
+### Adding a package
+- Add to `requirements.txt` with a specific version or no version if you want the latest stable
+- Run `pip freeze > requirements.txt` to lock the versions of your package and all its subpackages
+## Running the code
+- `export PYTHONPATH to the absolute path of this repository on your computer
+- `./run_local_synthetic.sh`, which outputs to `simulated_data/` by default. See all the possible flags to be toggled in the script code.
+## Linting/Formatting
+## Testing
+python -m pytest
+python -m pytest tests/unit_tests
+python -m pytest tests/integration_tests
+## TODO
+1. Add precommit hooks (pip freeze, linting, formatting)

{lifejacket-0.2.1 → lifejacket-1.0.2}/lifejacket/arg_threading_helpers.py RENAMED Viewed

@@ -20,10 +20,10 @@ def replace_tuple_index(tupl, index, value):
 def thread_action_prob_func_args(
-    action_prob_func_args_by_user_id_by_decision_time: dict[
+    action_prob_func_args_by_subject_id_by_decision_time: dict[
         int, dict[collections.abc.Hashable, tuple[Any, ...]]
     ],
-    policy_num_by_decision_time_by_user_id: dict[
+    policy_num_by_decision_time_by_subject_id: dict[
         collections.abc.Hashable, dict[int, int | float]
     ],
     initial_policy_num: int | float,
@@ -39,12 +39,12 @@ def thread_action_prob_func_args(
     decision time to enable correct differentiation.
     Args:
-        action_prob_func_args_by_user_id_by_decision_time (dict[int, dict[collections.abc.Hashable, tuple[Any, ...]]]):
+        action_prob_func_args_by_subject_id_by_decision_time (dict[int, dict[collections.abc.Hashable, tuple[Any, ...]]]):
             A map from decision times to maps of user ids to tuples of arguments for action
             probability function. This is for all decision times for all users (args are an empty
             tuple if they are not in the study). Should be sorted by decision time.
-        policy_num_by_decision_time_by_user_id (dict[collections.abc.Hashable, dict[int, int | float]]):
+        policy_num_by_decision_time_by_subject_id (dict[collections.abc.Hashable, dict[int, int | float]]):
             A dictionary mapping decision times to the policy number in use. This may be user-specific.
             Should be sorted by decision time.
@@ -69,56 +69,58 @@ def thread_action_prob_func_args(
             A map from user ids to maps of decision times to action probability function
             arguments tuples with the shared betas threaded in. Note the key order switch.
     """
-    threaded_action_prob_func_args_by_decision_time_by_user_id = (
+    threaded_action_prob_func_args_by_decision_time_by_subject_id = (
         collections.defaultdict(dict)
     )
-    action_prob_func_args_by_decision_time_by_user_id = collections.defaultdict(dict)
+    action_prob_func_args_by_decision_time_by_subject_id = collections.defaultdict(dict)
     for (
         decision_time,
-        action_prob_func_args_by_user_id,
-    ) in action_prob_func_args_by_user_id_by_decision_time.items():
-        for user_id, args in action_prob_func_args_by_user_id.items():
+        action_prob_func_args_by_subject_id,
+    ) in action_prob_func_args_by_subject_id_by_decision_time.items():
+        for subject_id, args in action_prob_func_args_by_subject_id.items():
             # Always add a contribution to the reversed key order dictionary.
-            action_prob_func_args_by_decision_time_by_user_id[user_id][
+            action_prob_func_args_by_decision_time_by_subject_id[subject_id][
                 decision_time
             ] = args
             # Now proceed with the threading, if necessary.
             if not args:
-                threaded_action_prob_func_args_by_decision_time_by_user_id[user_id][
-                    decision_time
-                ] = ()
+                threaded_action_prob_func_args_by_decision_time_by_subject_id[
+                    subject_id
+                ][decision_time] = ()
                 continue
-            policy_num = policy_num_by_decision_time_by_user_id[user_id][decision_time]
+            policy_num = policy_num_by_decision_time_by_subject_id[subject_id][
+                decision_time
+            ]
             # The expectation is that fallback policies have empty args, and the only other
             # policy not represented in beta_index_by_policy_num is the initial policy.
             if policy_num == initial_policy_num:
-                threaded_action_prob_func_args_by_decision_time_by_user_id[user_id][
-                    decision_time
-                ] = action_prob_func_args_by_user_id[user_id]
+                threaded_action_prob_func_args_by_decision_time_by_subject_id[
+                    subject_id
+                ][decision_time] = action_prob_func_args_by_subject_id[subject_id]
                 continue
             beta_to_introduce = all_post_update_betas[
                 beta_index_by_policy_num[policy_num]
             ]
-            threaded_action_prob_func_args_by_decision_time_by_user_id[user_id][
+            threaded_action_prob_func_args_by_decision_time_by_subject_id[subject_id][
                 decision_time
             ] = replace_tuple_index(
-                action_prob_func_args_by_user_id[user_id],
+                action_prob_func_args_by_subject_id[subject_id],
                 action_prob_func_args_beta_index,
                 beta_to_introduce,
             )
     return (
-        threaded_action_prob_func_args_by_decision_time_by_user_id,
-        action_prob_func_args_by_decision_time_by_user_id,
+        threaded_action_prob_func_args_by_decision_time_by_subject_id,
+        action_prob_func_args_by_decision_time_by_subject_id,
     )
 def thread_update_func_args(
-    update_func_args_by_by_user_id_by_policy_num: dict[
+    update_func_args_by_by_subject_id_by_policy_num: dict[
         int | float, dict[collections.abc.Hashable, tuple[Any, ...]]
     ],
     all_post_update_betas: jnp.ndarray,
@@ -127,7 +129,7 @@ def thread_update_func_args(
     alg_update_func_args_action_prob_index: int,
     alg_update_func_args_action_prob_times_index: int,
     alg_update_func_args_previous_betas_index: int,
-    threaded_action_prob_func_args_by_decision_time_by_user_id: dict[
+    threaded_action_prob_func_args_by_decision_time_by_subject_id: dict[
         collections.abc.Hashable, dict[int, tuple[Any, ...]]
     ],
     action_prob_func: callable,
@@ -139,7 +141,7 @@ def thread_update_func_args(
     with reconstructed action probabilities computed using the shared betas.
     Args:
-        update_func_args_by_by_user_id_by_policy_num (dict[int | float, dict[collections.abc.Hashable, tuple[Any, ...]]]):
+        update_func_args_by_by_subject_id_by_policy_num (dict[int | float, dict[collections.abc.Hashable, tuple[Any, ...]]]):
             A dictionary where keys are policy
             numbers and values are dictionaries mapping user IDs to their respective update function
             arguments.
@@ -170,7 +172,7 @@ def thread_update_func_args(
         alg_update_func_args_previous_betas_index (int):
             The index in the update function with previous beta parameters
-        threaded_action_prob_func_args_by_decision_time_by_user_id (dict[collections.abc.Hashable, dict[int, tuple[Any, ...]]]):
+        threaded_action_prob_func_args_by_decision_time_by_subject_id (dict[collections.abc.Hashable, dict[int, tuple[Any, ...]]]):
             A dictionary mapping decision times to the function arguments required to compute action
             probabilities for this user, and with the shared betas thread in.
@@ -183,49 +185,51 @@ def thread_update_func_args(
             arguments tuples for the specified user with the shared betas threaded in. Note the key
             order switch relative to the supplied args!
     """
-    threaded_update_func_args_by_policy_num_by_user_id = collections.defaultdict(dict)
+    threaded_update_func_args_by_policy_num_by_subject_id = collections.defaultdict(
+        dict
+    )
     for (
         policy_num,
-        update_func_args_by_user_id,
-    ) in update_func_args_by_by_user_id_by_policy_num.items():
-        for user_id, args in update_func_args_by_user_id.items():
+        update_func_args_by_subject_id,
+    ) in update_func_args_by_by_subject_id_by_policy_num.items():
+        for subject_id, args in update_func_args_by_subject_id.items():
             if not args:
-                threaded_update_func_args_by_policy_num_by_user_id[user_id][
+                threaded_update_func_args_by_policy_num_by_subject_id[subject_id][
                     policy_num
                 ] = ()
                 continue
             logger.debug(
                 "Threading in shared betas to update function arguments for user %s and policy number %s.",
-                user_id,
+                subject_id,
                 policy_num,
             )
             beta_to_introduce = all_post_update_betas[
                 beta_index_by_policy_num[policy_num]
             ]
-            threaded_update_func_args_by_policy_num_by_user_id[user_id][policy_num] = (
-                replace_tuple_index(
-                    update_func_args_by_user_id[user_id],
-                    alg_update_func_args_beta_index,
-                    beta_to_introduce,
-                )
+            threaded_update_func_args_by_policy_num_by_subject_id[subject_id][
+                policy_num
+            ] = replace_tuple_index(
+                update_func_args_by_subject_id[subject_id],
+                alg_update_func_args_beta_index,
+                beta_to_introduce,
             )
             if alg_update_func_args_previous_betas_index >= 0:
                 previous_betas_to_introduce = all_post_update_betas[
                     : len(
-                        update_func_args_by_user_id[user_id][
+                        update_func_args_by_subject_id[subject_id][
                             alg_update_func_args_previous_betas_index
                         ]
                     )
                 ]
                 if previous_betas_to_introduce.size > 0:
-                    threaded_update_func_args_by_policy_num_by_user_id[user_id][
+                    threaded_update_func_args_by_policy_num_by_subject_id[subject_id][
                         policy_num
                     ] = replace_tuple_index(
-                        threaded_update_func_args_by_policy_num_by_user_id[user_id][
-                            policy_num
-                        ],
+                        threaded_update_func_args_by_policy_num_by_subject_id[
+                            subject_id
+                        ][policy_num],
                         alg_update_func_args_previous_betas_index,
                         previous_betas_to_introduce,
                     )
@@ -234,20 +238,20 @@ def thread_update_func_args(
                 logger.debug(
                     "Action probabilities are used in the algorithm update function. Reconstructing them using the shared betas."
                 )
-                action_prob_times = update_func_args_by_user_id[user_id][
+                action_prob_times = update_func_args_by_subject_id[subject_id][
                     alg_update_func_args_action_prob_times_index
                 ]
                 # Vectorized computation of action_probs_to_introduce using jax.vmap
                 flattened_times = action_prob_times.flatten()
                 args_list = [
-                    threaded_action_prob_func_args_by_decision_time_by_user_id[user_id][
-                        int(t)
-                    ]
+                    threaded_action_prob_func_args_by_decision_time_by_subject_id[
+                        subject_id
+                    ][int(t)]
                     for t in flattened_times.tolist()
                 ]
                 if len(args_list) == 0:
                     action_probs_to_introduce = jnp.array([]).reshape(
-                        update_func_args_by_user_id[user_id][
+                        update_func_args_by_subject_id[subject_id][
                             alg_update_func_args_action_prob_index
                         ].shape
                     )
@@ -264,31 +268,31 @@ def thread_update_func_args(
                         action_prob_func, in_axes=tuple(0 for _ in batched_tensors)
                     )
                     action_probs_to_introduce = vmapped_func(*batched_tensors).reshape(
-                        update_func_args_by_user_id[user_id][
+                        update_func_args_by_subject_id[subject_id][
                             alg_update_func_args_action_prob_index
                         ].shape
                     )
-                threaded_update_func_args_by_policy_num_by_user_id[user_id][
+                threaded_update_func_args_by_policy_num_by_subject_id[subject_id][
                     policy_num
                 ] = replace_tuple_index(
-                    threaded_update_func_args_by_policy_num_by_user_id[user_id][
+                    threaded_update_func_args_by_policy_num_by_subject_id[subject_id][
                         policy_num
                     ],
                     alg_update_func_args_action_prob_index,
                     action_probs_to_introduce,
                 )
-    return threaded_update_func_args_by_policy_num_by_user_id
+    return threaded_update_func_args_by_policy_num_by_subject_id
 def thread_inference_func_args(
-    inference_func_args_by_user_id: dict[collections.abc.Hashable, tuple[Any, ...]],
+    inference_func_args_by_subject_id: dict[collections.abc.Hashable, tuple[Any, ...]],
     inference_func_args_theta_index: int,
     theta: jnp.ndarray,
     inference_func_args_action_prob_index: int,
-    threaded_action_prob_func_args_by_decision_time_by_user_id: dict[
+    threaded_action_prob_func_args_by_decision_time_by_subject_id: dict[
         collections.abc.Hashable, dict[int, tuple[Any, ...]]
     ],
-    inference_action_prob_decision_times_by_user_id: dict[
+    inference_action_prob_decision_times_by_subject_id: dict[
         collections.abc.Hashable, list[int]
     ],
     action_prob_func: callable,
@@ -300,7 +304,7 @@ def thread_inference_func_args(
     probabilities computed using the shared betas.
     Args:
-        inference_func_args_by_user_id (dict[collections.abc.Hashable, tuple[Any, ...]]):
+        inference_func_args_by_subject_id (dict[collections.abc.Hashable, tuple[Any, ...]]):
             A dictionary mapping user IDs to their respective inference function arguments.
         inference_func_args_theta_index (int):
@@ -315,11 +319,11 @@ def thread_inference_func_args(
             tuple where new beta-threaded action probabilities should be inserted, if applicable.
             -1 otherwise.
-        threaded_action_prob_func_args_by_decision_time_by_user_id (dict[collections.abc.Hashable, dict[int, tuple[Any, ...]]]):
+        threaded_action_prob_func_args_by_decision_time_by_subject_id (dict[collections.abc.Hashable, dict[int, tuple[Any, ...]]]):
             A dictionary mapping decision times to the function arguments required to compute action
             probabilities for this user, and with the shared betas thread in.
-        inference_action_prob_decision_times_by_user_id (dict[collections.abc.Hashable, list[int]]):
+        inference_action_prob_decision_times_by_subject_id (dict[collections.abc.Hashable, list[int]]):
             For each user, a list of decision times to which action probabilities correspond if
             provided. Typically just in-study times if action probabilites are used in the inference
             loss or estimating function.
@@ -332,9 +336,9 @@ def thread_inference_func_args(
             threaded in.
     """
-    threaded_inference_func_args_by_user_id = {}
-    for user_id, args in inference_func_args_by_user_id.items():
-        threaded_inference_func_args_by_user_id[user_id] = replace_tuple_index(
+    threaded_inference_func_args_by_subject_id = {}
+    for subject_id, args in inference_func_args_by_subject_id.items():
+        threaded_inference_func_args_by_subject_id[subject_id] = replace_tuple_index(
             args,
             inference_func_args_theta_index,
             theta,
@@ -343,12 +347,12 @@ def thread_inference_func_args(
         if inference_func_args_action_prob_index >= 0:
             # Use a vmap-like pattern to compute action probabilities in batch.
             action_prob_times_flattened = (
-                inference_action_prob_decision_times_by_user_id[user_id].flatten()
+                inference_action_prob_decision_times_by_subject_id[subject_id].flatten()
             )
             args_list = [
-                threaded_action_prob_func_args_by_decision_time_by_user_id[user_id][
-                    int(t)
-                ]
+                threaded_action_prob_func_args_by_decision_time_by_subject_id[
+                    subject_id
+                ][int(t)]
                 for t in action_prob_times_flattened.tolist()
             ]
             if len(args_list) == 0:
@@ -369,9 +373,11 @@ def thread_inference_func_args(
                 action_probs_to_introduce = vmapped_func(*batched_tensors).reshape(
                     args[inference_func_args_action_prob_index].shape
                 )
-            threaded_inference_func_args_by_user_id[user_id] = replace_tuple_index(
-                threaded_inference_func_args_by_user_id[user_id],
-                inference_func_args_action_prob_index,
-                action_probs_to_introduce,
+            threaded_inference_func_args_by_subject_id[subject_id] = (
+                replace_tuple_index(
+                    threaded_inference_func_args_by_subject_id[subject_id],
+                    inference_func_args_action_prob_index,
+                    action_probs_to_introduce,
+                )
             )
-    return threaded_inference_func_args_by_user_id
+    return threaded_inference_func_args_by_subject_id

{lifejacket-0.2.1 → lifejacket-1.0.2}/lifejacket/calculate_derivatives.py RENAMED Viewed

@@ -18,8 +18,6 @@ logging.basicConfig(
     level=logging.INFO,
 )
-# TODO: Consolidate function loading logic
 def get_batched_arg_lists_and_involved_user_ids(func, sorted_user_ids, args_by_user_id):
     """
@@ -198,10 +196,10 @@ def pad_in_study_derivatives_with_zeros(
 def calculate_pi_and_weight_gradients(
     study_df,
-    in_study_col_name,
+    active_col_name,
     action_col_name,
     calendar_t_col_name,
-    user_id_col_name,
+    subject_id_col_name,
     action_prob_func,
     action_prob_func_args,
     action_prob_func_args_beta_index,
@@ -226,10 +224,10 @@ def calculate_pi_and_weight_gradients(
         pi_gradients, weight_gradients = calculate_pi_and_weight_gradients_specific_t(
             study_df,
-            in_study_col_name,
+            active_col_name,
             action_col_name,
             calendar_t_col_name,
-            user_id_col_name,
+            subject_id_col_name,
             action_prob_func,
             action_prob_func_args_beta_index,
             calendar_t,
@@ -252,10 +250,10 @@ def calculate_pi_and_weight_gradients(
 def calculate_pi_and_weight_gradients_specific_t(
     study_df,
-    in_study_col_name,
+    active_col_name,
     action_col_name,
     calendar_t_col_name,
-    user_id_col_name,
+    subject_id_col_name,
     action_prob_func,
     action_prob_func_args_beta_index,
     calendar_t,
@@ -320,10 +318,10 @@ def calculate_pi_and_weight_gradients_specific_t(
             study_df,
             calendar_t,
             sorted_user_ids,
-            in_study_col_name,
+            active_col_name,
             action_col_name,
             calendar_t_col_name,
-            user_id_col_name,
+            subject_id_col_name,
         )
         # Note the first argument here: we extract the betas to pass in
         # again as the "target" denominator betas, whereas we differentiate with
@@ -382,10 +380,10 @@ def collect_batched_in_study_actions(
     study_df,
     calendar_t,
     sorted_user_ids,
-    in_study_col_name,
+    active_col_name,
     action_col_name,
     calendar_t_col_name,
-    user_id_col_name,
+    subject_id_col_name,
 ):
     # TODO: This for loop can be removed, just grabbing the actions col after
@@ -394,9 +392,9 @@ def collect_batched_in_study_actions(
     batched_actions_list = []
     for user_id in sorted_user_ids:
         filtered_user_data = study_df.loc[
-            (study_df[user_id_col_name] == user_id)
+            (study_df[subject_id_col_name] == user_id)
             & (study_df[calendar_t_col_name] == calendar_t)
-            & (study_df[in_study_col_name] == 1)
+            & (study_df[active_col_name] == 1)
         ]
         if not filtered_user_data.empty:
             batched_actions_list.append(filtered_user_data[action_col_name].values[0])
@@ -785,9 +783,9 @@ def calculate_inference_loss_derivatives(
     inference_func,
     inference_func_args_theta_index,
     user_ids,
-    user_id_col_name,
+    subject_id_col_name,
     action_prob_col_name,
-    in_study_col_name,
+    active_col_name,
     calendar_t_col_name,
     inference_func_type=FunctionTypes.LOSS,
 ):
@@ -819,18 +817,18 @@ def calculate_inference_loss_derivatives(
         max_calendar_time = study_df[calendar_t_col_name].max()
     for user_id in user_ids:
         user_args_list = []
-        filtered_user_data = study_df.loc[study_df[user_id_col_name] == user_id]
+        filtered_user_data = study_df.loc[study_df[subject_id_col_name] == user_id]
         for idx, col_name in enumerate(inference_func_arg_names):
             if idx == inference_func_args_theta_index:
                 user_args_list.append(theta_est)
             else:
                 user_args_list.append(
-                    get_study_df_column(filtered_user_data, col_name, in_study_col_name)
+                    get_study_df_column(filtered_user_data, col_name, active_col_name)
                 )
         args_by_user_id[user_id] = tuple(user_args_list)
         if using_action_probs:
             action_prob_decision_times_by_user_id[user_id] = get_study_df_column(
-                filtered_user_data, calendar_t_col_name, in_study_col_name
+                filtered_user_data, calendar_t_col_name, active_col_name
             )
     # Get a list of subdicts of the user args dict, with each united by having
@@ -957,9 +955,7 @@ def calculate_inference_loss_derivatives(
     return loss_gradients, loss_hessians, loss_gradient_pi_derivatives
-def get_study_df_column(study_df, col_name, in_study_col_name):
+def get_study_df_column(study_df, col_name, active_col_name):
     return jnp.array(
-        study_df.loc[study_df[in_study_col_name] == 1, col_name]
-        .to_numpy()
-        .reshape(-1, 1)
+        study_df.loc[study_df[active_col_name] == 1, col_name].to_numpy().reshape(-1, 1)
     )

lifejacket-1.0.2/lifejacket/constants.py ADDED Viewed

@@ -0,0 +1,16 @@
+class SmallSampleCorrections:
+    NONE = "none"
+    Z1theta = "Z1theta"
+    Z2theta = "Z2theta"
+    Z3theta = "Z3theta"
+class FunctionTypes:
+    LOSS = "loss"
+    ESTIMATING = "estimating"
+class SandwichFormationMethods:
+    BREAD_T_QR = "bread_T_qr"
+    MEAT_SVD_SOLVE = "meat_svd_solve"
+    NAIVE = "naive"

lifejacket 0.2.1__tar.gz → 1.0.2__tar.gz

lifejacket 0.2.1tar.gz → 1.0.2tar.gz