PyPI - pySEQTarget - Versions diffs - 0.10.0__tar.gz → 0.12.0__tar.gz - Mend

pySEQTarget 0.10.0tar.gz → 0.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pySEQTarget
-Version: 0.10.0
+Version: 0.12.0
 Summary: Sequentially Nested Target Trial Emulation
 Author-email: Ryan O'Dea <ryan.odea@psi.ch>, Alejandro Szmulewicz <aszmulewicz@hsph.harvard.edu>, Tom Palmer <tom.palmer@bristol.ac.uk>, Miguel Hernan <mhernan@hsph.harvard.edu>
 Maintainer-email: Ryan O'Dea <ryan.odea@psi.ch>
@@ -33,6 +33,11 @@ Requires-Dist: statsmodels
 Requires-Dist: matplotlib
 Requires-Dist: pyarrow
 Requires-Dist: lifelines
+Requires-Dist: joblib
+Provides-Extra: output
+Requires-Dist: markdown; extra == "output"
+Requires-Dist: weasyprint; extra == "output"
+Requires-Dist: tabulate; extra == "output"
 Dynamic: license-file
 # pySEQTarget - Sequentially Nested Target Trial Emulation
@@ -68,8 +73,9 @@ From the user side, this amounts to creating a dataclass, `SEQopts`, and then fe
 ```python
 import polars as pl
 from pySEQTarget import SEQuential, SEQopts
+from pySEQTarget.data import load_data
-data = pl.from_pandas(SEQdata)
+data = load_data("SEQdata")
 options = SEQopts(km_curves = True)
 # Initiate the class
@@ -77,17 +83,18 @@ model = SEQuential(data,
                    id_col = "ID",
                    time_col = "time",
                    eligible_col = "eligible",
+                   treatment_col = "tx_init",
+                   outcome_col = "outcome",
                    time_varying_cols = ["N", "L", "P"],
                    fixed_cols = ["sex"],
                    method = "ITT",
-                   options = options)
+                   parameters = options)
 model.expand()  # Construct the nested structure
 model.bootstrap(bootstrap_nboot = 20) # Run 20 bootstrap samples
 model.fit() # Fit the model
 model.survival() # Create survival curves
 model.plot() # Create and show a plot of the survival curves
 model.collect() # Collection of important information
 ```
 ## Assumptions
@@ -95,4 +102,3 @@ There are several key assumptions in this package -
 1. User provided `time_col` begins at 0 per unique `id_col`, we also assume this column contains only integers and continues by 1 for every time step, e.g. (0, 1, 2, 3, 4, ...) is allowed and (0, 1, 2, 2.5, ...) or (0, 1, 4, 5) are not
     1. Provided `time_col` entries may be out of order at intake as a sort is enforced at expansion.
 2. `eligible_col` and elements of `excused_colnames` are once 1, only 1 (with respect to `time_col`) flag variables.

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/README.md RENAMED Viewed

@@ -31,8 +31,9 @@ From the user side, this amounts to creating a dataclass, `SEQopts`, and then fe
 ```python
 import polars as pl
 from pySEQTarget import SEQuential, SEQopts
+from pySEQTarget.data import load_data
-data = pl.from_pandas(SEQdata)
+data = load_data("SEQdata")
 options = SEQopts(km_curves = True)
 # Initiate the class
@@ -40,17 +41,18 @@ model = SEQuential(data,
                    id_col = "ID",
                    time_col = "time",
                    eligible_col = "eligible",
+                   treatment_col = "tx_init",
+                   outcome_col = "outcome",
                    time_varying_cols = ["N", "L", "P"],
                    fixed_cols = ["sex"],
                    method = "ITT",
-                   options = options)
+                   parameters = options)
 model.expand()  # Construct the nested structure
 model.bootstrap(bootstrap_nboot = 20) # Run 20 bootstrap samples
 model.fit() # Fit the model
 model.survival() # Create survival curves
 model.plot() # Create and show a plot of the survival curves
 model.collect() # Collection of important information
 ```
 ## Assumptions
@@ -58,4 +60,3 @@ There are several key assumptions in this package -
 1. User provided `time_col` begins at 0 per unique `id_col`, we also assume this column contains only integers and continues by 1 for every time step, e.g. (0, 1, 2, 3, 4, ...) is allowed and (0, 1, 2, 2.5, ...) or (0, 1, 4, 5) are not
     1. Provided `time_col` entries may be out of order at intake as a sort is enforced at expansion.
 2. `eligible_col` and elements of `excused_colnames` are once 1, only 1 (with respect to `time_col`) flag variables.

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/SEQopts.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import multiprocessing
+import os
 from dataclasses import dataclass, field
 from typing import List, Literal, Optional
@@ -18,7 +19,7 @@ class SEQopts:
     :type bootstrap_CI_method: str
     :param cense_colname: Column name for censoring effect (LTFU, etc.)
     :type cense_colname: str
-    :param cense_denominator: Override to specify denominator patsy formula for censoring models
+    :param cense_denominator: Override to specify denominator patsy formula for censoring models; "1" or "" indicate intercept only model
     :type cense_denominator: Optional[str] or None
     :param cense_numerator: Override to specify numerator patsy formula for censoring models
     :type cense_numerator: Optional[str] or None
@@ -54,8 +55,12 @@ class SEQopts:
     :type km_curves: bool
     :param ncores: Number of cores to use if running in parallel
     :type ncores: int
-    :param numerator: Override to specify the outcome patsy formula for numerator models
+    :param numerator: Override to specify the outcome patsy formula for numerator models; "1" or "" indicate intercept only model
     :type numerator: str
+    :param offload: Boolean to offload intermediate model data to disk
+    :type offload: bool
+    :param offload_dir: Directory to offload intermediate model data
+    :type offload_dir: str
     :param parallel: Boolean to run model fitting in parallel
     :type parallel: bool
     :param plot_colors: List of colors for KM plots, if applicable
@@ -80,8 +85,12 @@ class SEQopts:
     :type treatment_level: List[int]
     :param trial_include: Boolean to force trial values into model covariates
     :type trial_include: bool
+    :param visit_colname: Column name specifying visit number
+    :type visit_colname: str
     :param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting
     :type weight_eligible_colnames: List[str]
+    :param weight_fit_method: The fitting method to be used ["newton", "bfgs", "lbfgs", "nm"], default "newton"
+    :type weight_fit_method: str
     :param weight_min: Minimum weight
     :type weight_min: float
     :param weight_max: Maximum weight
@@ -120,6 +129,8 @@ class SEQopts:
     km_curves: bool = False
     ncores: int = multiprocessing.cpu_count()
     numerator: Optional[str] = None
+    offload: bool = False
+    offload_dir: str = "_seq_models"
     parallel: bool = False
     plot_colors: List[str] = field(
         default_factory=lambda: ["#F8766D", "#00BFC4", "#555555"]
@@ -136,6 +147,7 @@ class SEQopts:
     trial_include: bool = True
     visit_colname: str = None
     weight_eligible_colnames: List[str] = field(default_factory=lambda: [])
+    weight_fit_method: Literal["newton", "bfgs", "lbfgs", "nm"] = "newton"
     weight_min: float = 0.0
     weight_max: float = None
     weight_lag_condition: bool = True
@@ -195,3 +207,6 @@ class SEQopts:
             attr = getattr(self, i)
             if attr is not None and not isinstance(attr, list):
                 setattr(self, i, "".join(attr.split()))
+        if self.offload:
+            os.makedirs(self.offload_dir, exist_ok=True)

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/SEQuential.py RENAMED Viewed

@@ -7,19 +7,20 @@ from typing import List, Literal, Optional
 import numpy as np
 import polars as pl
-from .analysis import (_calculate_hazard, _calculate_survival, _outcome_fit,
-                       _pred_risk, _risk_estimates, _subgroup_fit)
-from .error import _datachecker, _param_checker
+from .analysis import (_calculate_hazard, _calculate_survival, _clamp,
+                       _outcome_fit, _pred_risk, _risk_estimates,
+                       _subgroup_fit)
+from .error import _data_checker, _param_checker
 from .expansion import _binder, _diagnostics, _dynamic, _random_selection
-from .helpers import _col_string, _format_time, bootstrap_loop
+from .helpers import Offloader, _col_string, _format_time, bootstrap_loop
 from .initialization import (_cense_denominator, _cense_numerator,
                              _denominator, _numerator, _outcome)
 from .plot import _survival_plot
 from .SEQopts import SEQopts
 from .SEQoutput import SEQoutput
 from .weighting import (_fit_denominator, _fit_LTFU, _fit_numerator,
-                        _fit_visit, _weight_bind, _weight_predict,
-                        _weight_setup, _weight_stats)
+                        _fit_visit, _offload_weights, _weight_bind,
+                        _weight_predict, _weight_setup, _weight_stats)
 class SEQuential:
@@ -83,6 +84,8 @@ class SEQuential:
             np.random.RandomState(self.seed) if self.seed is not None else np.random
         )
+        self._offloader = Offloader(enabled=self.offload, dir=self.offload_dir)
         if self.covariates is None:
             self.covariates = _outcome(self)
@@ -101,7 +104,7 @@ class SEQuential:
                     self.cense_denominator = _cense_denominator(self)
         _param_checker(self)
-        _datachecker(self)
+        _data_checker(self)
     def expand(self) -> None:
         """
@@ -190,7 +193,6 @@ class SEQuential:
             )
             id_counts = Counter(sampled_IDs)
             self._boot_samples.append(id_counts)
-        return self
     @bootstrap_loop
     def fit(self) -> None:
@@ -201,6 +203,9 @@ class SEQuential:
             raise ValueError(
                 "Bootstrap sampling not found. Please run the 'bootstrap' method before fitting with bootstrapping."
             )
+        boot_idx = None
+        if hasattr(self, "_current_boot_idx"):
+            boot_idx = self._current_boot_idx
         if self.weighted:
             WDT = _weight_setup(self)
@@ -217,6 +222,9 @@ class SEQuential:
             _fit_numerator(self, WDT)
             _fit_denominator(self, WDT)
+            if self.offload:
+                _offload_weights(self, boot_idx)
             WDT = pl.from_pandas(WDT)
             WDT = _weight_predict(self, WDT)
             _weight_bind(self, WDT)
@@ -244,6 +252,11 @@ class SEQuential:
                 self.weighted,
                 "weight",
             )
+        if self.offload:
+            offloaded_models = {}
+            for key, model in models.items():
+                offloaded_models[key] = self._offloader.save_model(model, key, boot_idx)
+            return offloaded_models
         return models
     def survival(self, **kwargs) -> None:
@@ -266,7 +279,7 @@ class SEQuential:
         risk_data = _pred_risk(self)
         surv_data = _calculate_survival(self, risk_data)
-        self.km_data = pl.concat([risk_data, surv_data])
+        self.km_data = _clamp(pl.concat([risk_data, surv_data]))
         self.risk_estimates = _risk_estimates(self)
         end = time.perf_counter()
@@ -342,7 +355,7 @@ class SEQuential:
         }
         if self.compevent_colname is not None:
-            compevent_models = [model["compevent"] for model in self.outcome_models]
+            compevent_models = [model["compevent"] for model in self.outcome_model]
         else:
             compevent_models = None

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/analysis/__init__.py RENAMED Viewed

@@ -3,6 +3,7 @@ from ._outcome_fit import _outcome_fit as _outcome_fit
 from ._risk_estimates import _risk_estimates as _risk_estimates
 from ._subgroup_fit import _subgroup_fit as _subgroup_fit
 from ._survival_pred import _calculate_survival as _calculate_survival
+from ._survival_pred import _clamp as _clamp
 from ._survival_pred import \
     _get_outcome_predictions as _get_outcome_predictions
 from ._survival_pred import _pred_risk as _pred_risk

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/analysis/_hazard.py RENAMED Viewed

@@ -4,6 +4,8 @@ import numpy as np
 import polars as pl
 from lifelines import CoxPHFitter
+from ..helpers._predict_model import _safe_predict
 def _calculate_hazard(self):
     if self.subgroup_colname is None:
@@ -93,8 +95,10 @@ def _hazard_handler(self, data, idx, boot_idx, rng):
     else:
         model_dict = self.outcome_model[boot_idx]
-    outcome_model = model_dict["outcome"]
-    ce_model = model_dict.get("compevent", None) if self.compevent_colname else None
+    outcome_model = self._offloader.load_model(model_dict["outcome"])
+    ce_model = None
+    if self.compevent_colname and "compevent" in model_dict:
+        ce_model = self._offloader.load_model(model_dict["compevent"])
     all_treatments = []
     for val in self.treatment_level:
@@ -103,13 +107,14 @@ def _hazard_handler(self, data, idx, boot_idx, rng):
         )
         tmp_pd = tmp.to_pandas()
-        outcome_prob = outcome_model.predict(tmp_pd)
+        outcome_prob = _safe_predict(outcome_model, tmp_pd)
         outcome_sim = rng.binomial(1, outcome_prob)
         tmp = tmp.with_columns([pl.Series("outcome", outcome_sim)])
         if ce_model is not None:
-            ce_prob = ce_model.predict(tmp_pd)
+            ce_tmp_pd = tmp.to_pandas()
+            ce_prob = _safe_predict(ce_model, ce_tmp_pd)
             ce_sim = rng.binomial(1, ce_prob)
             tmp = tmp.with_columns([pl.Series("ce", ce_sim)])

pyseqtarget-0.12.0/pySEQTarget/analysis/_risk_estimates.py ADDED Viewed

@@ -0,0 +1,138 @@
+import polars as pl
+from scipy import stats
+def _compute_rd_rr(comp, has_bootstrap, z=None, group_cols=None):
+    """
+    Compute Risk Difference and Risk Ratio from a comparison dataframe.
+    Consolidates the repeated calculation logic.
+    """
+    if group_cols is None:
+        group_cols = []
+    if has_bootstrap:
+        rd_se = (pl.col("se_x").pow(2) + pl.col("se_y").pow(2)).sqrt()
+        rd_comp = comp.with_columns(
+            [
+                (pl.col("risk_x") - pl.col("risk_y")).alias("Risk Difference"),
+                (pl.col("risk_x") - pl.col("risk_y") - z * rd_se).alias("RD 95% LCI"),
+                (pl.col("risk_x") - pl.col("risk_y") + z * rd_se).alias("RD 95% UCI"),
+            ]
+        )
+        rd_comp = rd_comp.drop(["risk_x", "risk_y", "se_x", "se_y"])
+        col_order = group_cols + [
+            "A_x",
+            "A_y",
+            "Risk Difference",
+            "RD 95% LCI",
+            "RD 95% UCI",
+        ]
+        rd_comp = rd_comp.select([c for c in col_order if c in rd_comp.columns])
+        rr_log_se = (
+            (pl.col("se_x") / pl.col("risk_x")).pow(2)
+            + (pl.col("se_y") / pl.col("risk_y")).pow(2)
+        ).sqrt()
+        rr_comp = comp.with_columns(
+            [
+                (pl.col("risk_x") / pl.col("risk_y")).alias("Risk Ratio"),
+                ((pl.col("risk_x") / pl.col("risk_y")) * (-z * rr_log_se).exp()).alias(
+                    "RR 95% LCI"
+                ),
+                ((pl.col("risk_x") / pl.col("risk_y")) * (z * rr_log_se).exp()).alias(
+                    "RR 95% UCI"
+                ),
+            ]
+        )
+        rr_comp = rr_comp.drop(["risk_x", "risk_y", "se_x", "se_y"])
+        col_order = group_cols + [
+            "A_x",
+            "A_y",
+            "Risk Ratio",
+            "RR 95% LCI",
+            "RR 95% UCI",
+        ]
+        rr_comp = rr_comp.select([c for c in col_order if c in rr_comp.columns])
+    else:
+        rd_comp = comp.with_columns(
+            (pl.col("risk_x") - pl.col("risk_y")).alias("Risk Difference")
+        )
+        rd_comp = rd_comp.drop(["risk_x", "risk_y"])
+        col_order = group_cols + ["A_x", "A_y", "Risk Difference"]
+        rd_comp = rd_comp.select([c for c in col_order if c in rd_comp.columns])
+        rr_comp = comp.with_columns(
+            (pl.col("risk_x") / pl.col("risk_y")).alias("Risk Ratio")
+        )
+        rr_comp = rr_comp.drop(["risk_x", "risk_y"])
+        col_order = group_cols + ["A_x", "A_y", "Risk Ratio"]
+        rr_comp = rr_comp.select([c for c in col_order if c in rr_comp.columns])
+    return rd_comp, rr_comp
+def _risk_estimates(self):
+    last_followup = self.km_data["followup"].max()
+    risk = self.km_data.filter(
+        (pl.col("followup") == last_followup) & (pl.col("estimate") == "risk")
+    )
+    group_cols = [self.subgroup_colname] if self.subgroup_colname else []
+    has_bootstrap = self.bootstrap_nboot > 0
+    if has_bootstrap:
+        alpha = 1 - self.bootstrap_CI
+        z = stats.norm.ppf(1 - alpha / 2)
+    else:
+        z = None
+    # Pre-extract data for each treatment level once (avoid repeated filtering)
+    risk_by_level = {}
+    for tx in self.treatment_level:
+        level_data = risk.filter(pl.col(self.treatment_col) == tx)
+        risk_by_level[tx] = {
+            "pred": level_data.select(group_cols + ["pred"]),
+        }
+        if has_bootstrap:
+            risk_by_level[tx]["SE"] = level_data.select(group_cols + ["SE"])
+    rd_comparisons = []
+    rr_comparisons = []
+    for tx_x in self.treatment_level:
+        for tx_y in self.treatment_level:
+            if tx_x == tx_y:
+                continue
+            # Use pre-extracted data instead of filtering again
+            risk_x = risk_by_level[tx_x]["pred"].rename({"pred": "risk_x"})
+            risk_y = risk_by_level[tx_y]["pred"].rename({"pred": "risk_y"})
+            if group_cols:
+                comp = risk_x.join(risk_y, on=group_cols, how="left")
+            else:
+                comp = risk_x.join(risk_y, how="cross")
+            comp = comp.with_columns(
+                [pl.lit(tx_x).alias("A_x"), pl.lit(tx_y).alias("A_y")]
+            )
+            if has_bootstrap:
+                se_x = risk_by_level[tx_x]["SE"].rename({"SE": "se_x"})
+                se_y = risk_by_level[tx_y]["SE"].rename({"SE": "se_y"})
+                if group_cols:
+                    comp = comp.join(se_x, on=group_cols, how="left")
+                    comp = comp.join(se_y, on=group_cols, how="left")
+                else:
+                    comp = comp.join(se_x, how="cross")
+                    comp = comp.join(se_y, how="cross")
+            rd_comp, rr_comp = _compute_rd_rr(comp, has_bootstrap, z, group_cols)
+            rd_comparisons.append(rd_comp)
+            rr_comparisons.append(rr_comp)
+    risk_difference = pl.concat(rd_comparisons) if rd_comparisons else pl.DataFrame()
+    risk_ratio = pl.concat(rr_comparisons) if rr_comparisons else pl.DataFrame()
+    return {"risk_difference": risk_difference, "risk_ratio": risk_ratio}

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/analysis/_survival_pred.py RENAMED Viewed

@@ -1,5 +1,7 @@
 import polars as pl
+from ..helpers._predict_model import _safe_predict
 def _get_outcome_predictions(self, TxDT, idx=None):
     data = TxDT.to_pandas()
@@ -9,9 +11,12 @@ def _get_outcome_predictions(self, TxDT, idx=None):
     for boot_model in self.outcome_model:
         model_dict = boot_model[idx] if idx is not None else boot_model
-        predictions["outcome"].append(model_dict["outcome"].predict(data))
+        outcome_model = self._offloader.load_model(model_dict["outcome"])
+        predictions["outcome"].append(_safe_predict(outcome_model, data.copy()))
         if self.compevent_colname is not None:
-            predictions["compevent"].append(model_dict["compevent"].predict(data))
+            compevent_model = self._offloader.load_model(model_dict["compevent"])
+            predictions["compevent"].append(_safe_predict(compevent_model, data.copy()))
     return predictions
@@ -41,24 +46,20 @@ def _calculate_risk(self, data, idx=None, val=None):
     lci = a / 2
     uci = 1 - lci
+    # Pre-compute the followup range once (starts at 1, not 0)
+    followup_range = list(range(1, self.followup_max + 1))
     SDT = (
         data.with_columns(
-            [
-                (
-                    pl.col(self.id_col).cast(pl.Utf8) + pl.col("trial").cast(pl.Utf8)
-                ).alias("TID")
-            ]
+            [pl.concat_str([pl.col(self.id_col), pl.col("trial")]).alias("TID")]
         )
         .group_by("TID")
         .first()
         .drop(["followup", f"followup{self.indicator_squared}"])
-        .with_columns([pl.lit(list(range(self.followup_max))).alias("followup")])
+        .with_columns([pl.lit(followup_range).alias("followup")])
         .explode("followup")
         .with_columns(
-            [
-                (pl.col("followup") + 1).alias("followup"),
-                (pl.col("followup") ** 2).alias(f"followup{self.indicator_squared}"),
-            ]
+            [(pl.col("followup") ** 2).alias(f"followup{self.indicator_squared}")]
         )
     ).sort([self.id_col, "trial", "followup"])
@@ -370,3 +371,11 @@ def _calculate_survival(self, risk_data):
             [(1 - pl.col("pred")).alias("pred"), pl.lit("survival").alias("estimate")]
         )
     return surv
+def _clamp(data):
+    """Clamp prediction and CI columns to [0, 1] bounds."""
+    cols = ["pred", "LCI", "UCI"]
+    exists = [c for c in cols if c in data.columns]
+    return data.with_columns([pl.col(col).clip(0.0, 1.0) for col in exists])

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/error/__init__.py RENAMED Viewed

@@ -1,2 +1,2 @@
-from ._datachecker import _datachecker as _datachecker
+from ._data_checker import _data_checker as _data_checker
 from ._param_checker import _param_checker as _param_checker

pyseqtarget-0.10.0/pySEQTarget/error/_datachecker.py → pyseqtarget-0.12.0/pySEQTarget/error/_data_checker.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import polars as pl
-def _datachecker(self):
+def _data_checker(self):
     check = self.data.group_by(self.id_col).agg(
         [pl.len().alias("row_count"), pl.col(self.time_col).max().alias("max_time")]
     )

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/expansion/_mapper.py RENAMED Viewed

@@ -13,17 +13,10 @@ def _mapper(data, id_col, time_col, min_followup=-math.inf, max_followup=math.in
         .with_columns([pl.col(id_col).cum_count().over(id_col).sub(1).alias("trial")])
         .with_columns(
             [
-                pl.struct(
-                    [
-                        pl.col(time_col),
-                        pl.col(time_col).max().over(id_col).alias("max_time"),
-                    ]
-                )
-                .map_elements(
-                    lambda x: list(range(x[time_col], x["max_time"] + 1)),
-                    return_dtype=pl.List(pl.Int64),
-                )
-                .alias("period")
+                pl.int_ranges(
+                    pl.col(time_col),
+                    pl.col(time_col).max().over(id_col) + 1,
+                ).alias("period")
             ]
         )
         .explode("period")

pyseqtarget-0.12.0/pySEQTarget/expansion/_selection.py ADDED Viewed

@@ -0,0 +1,44 @@
+import polars as pl
+def _random_selection(self):
+    """
+    Handles the case where random selection is applied for data from
+    the __mapper -> __binder -> optionally __dynamic pipeline
+    """
+    UIDs = (
+        self.DT.select(
+            [self.id_col, "trial", f"{self.treatment_col}{self.indicator_baseline}"]
+        )
+        .with_columns(
+            (
+                pl.col(self.id_col).cast(pl.Utf8) + "_" + pl.col("trial").cast(pl.Utf8)
+            ).alias("trialID")
+        )
+        .filter(
+            pl.col(f"{self.treatment_col}{self.indicator_baseline}")
+            == self.treatment_level[0]
+        )
+        .unique("trialID")
+        .get_column("trialID")
+        .to_list()
+    )
+    NIDs = len(UIDs)
+    sample = self._rng.choice(
+        UIDs, size=int(self.selection_sample * NIDs), replace=False
+    )
+    self.DT = (
+        self.DT.with_columns(
+            (
+                pl.col(self.id_col).cast(pl.Utf8) + "_" + pl.col("trial").cast(pl.Utf8)
+            ).alias("trialID")
+        )
+        .filter(
+            pl.col("trialID").is_in(sample)
+            | pl.col(f"{self.treatment_col}{self.indicator_baseline}")
+            != self.treatment_level[0]
+        )
+        .drop("trialID")
+    )

{pyseqtarget-0.10.0 → pyseqtarget-0.12.0}/pySEQTarget/helpers/__init__.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from ._bootstrap import bootstrap_loop as bootstrap_loop
 from ._col_string import _col_string as _col_string
 from ._format_time import _format_time as _format_time
+from ._offloader import Offloader as Offloader
 from ._output_files import _build_md as _build_md
 from ._output_files import _build_pdf as _build_pdf
 from ._pad import _pad as _pad

pySEQTarget 0.10.0__tar.gz → 0.12.0__tar.gz

pySEQTarget 0.10.0tar.gz → 0.12.0tar.gz