PyPI - pyrollmatch - Versions diffs - 0.0.3__py3-none-any.whl - Mend

pyrollmatch 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

pyrollmatch/__init__.py +50 -0
pyrollmatch/balance.py +121 -0
pyrollmatch/core.py +301 -0
pyrollmatch/diagnostics.py +207 -0
pyrollmatch/match.py +263 -0
pyrollmatch/reduce.py +72 -0
pyrollmatch/score.py +229 -0
pyrollmatch-0.0.3.dist-info/METADATA +278 -0
pyrollmatch-0.0.3.dist-info/RECORD +11 -0
pyrollmatch-0.0.3.dist-info/WHEEL +4 -0
pyrollmatch-0.0.3.dist-info/licenses/LICENSE +21 -0

pyrollmatch/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""
+pyrollmatch — Fast rolling entry matching for staggered adoption studies.
+A Python reimplementation of the R ``rollmatch`` package (RTI International)
+using polars and numpy for scalable matching on large panel datasets (100K+ units).
+Rolling entry matching (REM) explicitly handles staggered treatment adoption
+by matching each treated unit to controls at the treated unit's specific entry
+time, using accumulated (rolling-window) covariates.
+Quick Start
+-----------
+>>> import polars as pl
+>>> from pyrollmatch import rollmatch, alpha_sweep
+>>>
+>>> # data: panel with columns [unit_id, time, treat, entry_time, x1, x2, ...]
+>>> result = rollmatch(
+...     data, treat="treat", tm="time", entry="entry_time", id="unit_id",
+...     covariates=["x1", "x2", "x3"],
+...     alpha=0.1, num_matches=3,
+... )
+>>> result.balance  # SMD table
+>>> result.weights  # unit_id -> matching weight
+References
+----------
+- Witman et al. (2018). "Comparison Group Selection in the Presence of Rolling Entry."
+  Health Services Research, 54(1), 262-270. doi:10.1111/1475-6773.13086
+- RTI International rollmatch R package: https://github.com/RTIInternational/rollmatch
+"""
+from .core import rollmatch, alpha_sweep, RollmatchResult
+from .reduce import reduce_data
+from .score import score_data, ScoredResult
+from .balance import compute_balance, smd_table
+from .diagnostics import balance_test, equivalence_test
+__version__ = "0.0.3"
+__all__ = [
+    "rollmatch",
+    "alpha_sweep",
+    "RollmatchResult",
+    "reduce_data",
+    "score_data",
+    "ScoredResult",
+    "compute_balance",
+    "smd_table",
+    "balance_test",
+    "equivalence_test",
+]

pyrollmatch/balance.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""
+balance — Covariate balance computation and SMD table.
+"""
+import polars as pl
+import numpy as np
+def compute_balance(
+    scored_data: pl.DataFrame,
+    matches: pl.DataFrame,
+    treat: str,
+    id: str,
+    tm: str,
+    covariates: list[str],
+) -> pl.DataFrame:
+    """Compute covariate balance before and after matching.
+    Returns a table with means, SDs, and SMDs for each covariate,
+    both in the full sample and the matched sample.
+    Parameters
+    ----------
+    scored_data : pl.DataFrame
+        Reduced data with treatment indicator and covariates.
+    matches : pl.DataFrame
+        Match results with treat_id and control_id columns.
+    treat : str
+        Treatment indicator column.
+    id : str
+        Unit identifier column.
+    tm : str
+        Time period column.
+    covariates : list[str]
+        Covariate column names.
+    Returns
+    -------
+    pl.DataFrame with columns:
+        covariate, full_mean_t, full_mean_c, full_sd_t, full_sd_c,
+        full_smd, matched_mean_t, matched_mean_c, matched_sd_t,
+        matched_sd_c, matched_smd
+    """
+    # Pre-compute matched data ONCE (not per covariate)
+    treat_matches = matches.select(tm, "treat_id").unique().rename({"treat_id": id})
+    control_matches = matches.select(tm, "control_id").unique().rename({"control_id": id})
+    matched_ids_df = pl.concat([treat_matches, control_matches])
+    matched_data = scored_data.join(matched_ids_df, on=[tm, id], how="semi")
+    # Pre-split by treatment group
+    full_t = scored_data.filter(pl.col(treat) == 1)
+    full_c = scored_data.filter(pl.col(treat) == 0)
+    match_t = matched_data.filter(pl.col(treat) == 1)
+    match_c = matched_data.filter(pl.col(treat) == 0)
+    rows = []
+    for cov in covariates:
+        vals_t = full_t[cov].drop_nulls().to_numpy()
+        vals_c = full_c[cov].drop_nulls().to_numpy()
+        full_mean_t = np.mean(vals_t) if len(vals_t) > 0 else np.nan
+        full_mean_c = np.mean(vals_c) if len(vals_c) > 0 else np.nan
+        full_sd_t = np.std(vals_t, ddof=1) if len(vals_t) > 1 else np.nan
+        full_sd_c = np.std(vals_c, ddof=1) if len(vals_c) > 1 else np.nan
+        full_pooled = np.sqrt((full_sd_t**2 + full_sd_c**2) / 2) if not (np.isnan(full_sd_t) or np.isnan(full_sd_c)) else np.nan
+        full_smd = (full_mean_t - full_mean_c) / full_pooled if full_pooled and full_pooled > 0 else np.nan
+        mvals_t = match_t[cov].drop_nulls().to_numpy()
+        mvals_c = match_c[cov].drop_nulls().to_numpy()
+        m_mean_t = np.mean(mvals_t) if len(mvals_t) > 0 else np.nan
+        m_mean_c = np.mean(mvals_c) if len(mvals_c) > 0 else np.nan
+        m_sd_t = np.std(mvals_t, ddof=1) if len(mvals_t) > 1 else np.nan
+        m_sd_c = np.std(mvals_c, ddof=1) if len(mvals_c) > 1 else np.nan
+        m_pooled = np.sqrt((m_sd_t**2 + m_sd_c**2) / 2) if not (np.isnan(m_sd_t) or np.isnan(m_sd_c)) else np.nan
+        m_smd = (m_mean_t - m_mean_c) / m_pooled if m_pooled and m_pooled > 0 else np.nan
+        rows.append({
+            "covariate": cov,
+            "full_mean_t": round(full_mean_t, 4),
+            "full_mean_c": round(full_mean_c, 4),
+            "full_sd_t": round(full_sd_t, 4),
+            "full_sd_c": round(full_sd_c, 4),
+            "full_smd": round(full_smd, 4),
+            "matched_mean_t": round(m_mean_t, 4),
+            "matched_mean_c": round(m_mean_c, 4),
+            "matched_sd_t": round(m_sd_t, 4),
+            "matched_sd_c": round(m_sd_c, 4),
+            "matched_smd": round(m_smd, 4),
+        })
+    return pl.DataFrame(rows)
+def smd_table(balance: pl.DataFrame, threshold: float = 0.1) -> None:
+    """Print a formatted SMD table with pass/fail indicators.
+    Parameters
+    ----------
+    balance : pl.DataFrame
+        Output from compute_balance().
+    threshold : float
+        |SMD| threshold for pass/fail (default 0.1).
+    """
+    max_smd = balance["matched_smd"].abs().max()
+    all_pass = balance["matched_smd"].abs().max() < threshold
+    print(f"\n{'='*70}")
+    print(f"  Standardized Mean Differences (threshold: |SMD| < {threshold})")
+    print(f"  Max |SMD| = {max_smd:.4f}  {'✓ ALL PASS' if all_pass else '✗ SOME FAIL'}")
+    print(f"{'='*70}\n")
+    print(f"  {'Covariate':<30} {'Full SMD':>10} {'Matched SMD':>12} {'Pass':>6}")
+    print(f"  {'-'*30} {'-'*10} {'-'*12} {'-'*6}")
+    for row in balance.iter_rows(named=True):
+        smd = row["matched_smd"]
+        passed = abs(smd) < threshold if smd is not None else False
+        print(f"  {row['covariate']:<30} {row['full_smd']:>10.4f} {smd:>12.4f} {'✓' if passed else '✗':>6}")
+    print()

pyrollmatch/core.py ADDED Viewed

@@ -0,0 +1,301 @@
+"""
+core — Main rollmatch orchestration and alpha sweep.
+"""
+import polars as pl
+import numpy as np
+from dataclasses import dataclass, field
+from .reduce import reduce_data
+from .score import score_data
+from .match import match_all_periods
+from .balance import compute_balance, smd_table
+@dataclass
+class RollmatchResult:
+    """Result from rollmatch."""
+    matched_data: pl.DataFrame
+    balance: pl.DataFrame
+    n_treated_total: int
+    n_treated_matched: int
+    n_controls_matched: int
+    alpha: float
+    weights: pl.DataFrame  # id -> weight
+def _compute_weights(matches: pl.DataFrame, id: str, num_matches: int) -> pl.DataFrame:
+    """Compute matching weights from matched pairs.
+    Following R rollmatch convention:
+    - treatment_weight = 1 / actual_matches_for_this_treated_unit
+    - control_weight = sum of treatment_weights across all treatments
+      this control is matched to
+    This ensures proper inverse probability weighting when treated units
+    have different numbers of matches (e.g., due to tight calipers).
+    """
+    treat_match_counts = (
+        matches.group_by("treat_id").len()
+        .rename({"len": "total_matches"})
+    )
+    matches_with_weights = matches.join(treat_match_counts, on="treat_id")
+    matches_with_weights = matches_with_weights.with_columns(
+        (1.0 / pl.col("total_matches")).alias("treatment_weight")
+    )
+    treat_weights = (
+        matches.select("treat_id").unique()
+        .rename({"treat_id": id})
+        .with_columns(pl.lit(1.0).alias("weight"))
+    )
+    ctrl_weights = (
+        matches_with_weights
+        .group_by("control_id")
+        .agg(pl.col("treatment_weight").sum().alias("weight"))
+        .rename({"control_id": id})
+    )
+    weights = pl.concat([treat_weights, ctrl_weights])
+    return weights.group_by(id).agg(pl.col("weight").sum())
+def rollmatch(
+    data: pl.DataFrame,
+    treat: str,
+    tm: str,
+    entry: str,
+    id: str,
+    covariates: list[str],
+    lookback: int = 1,
+    alpha: float = 0,
+    num_matches: int = 3,
+    replacement: bool = True,
+    standard_deviation: str = "average",
+    model_type: str = "logistic",
+    match_on: str = "logit",
+    block_size: int = 2000,
+    verbose: bool = True,
+) -> RollmatchResult | None:
+    """Run the full rolling entry matching pipeline.
+    Parameters
+    ----------
+    data : pl.DataFrame
+        Panel data with unit × time observations.
+    treat : str
+        Binary treatment column (1=treated, 0=control).
+    tm : str
+        Time period column (integer).
+    entry : str
+        Entry period column. Treatment onset for treated units; null or
+        any value > max(tm) for controls.
+    id : str
+        Unit identifier column.
+    covariates : list[str]
+        Covariate column names for matching.
+    lookback : int
+        Periods to look back from entry for baseline covariates.
+    alpha : float
+        Caliper multiplier (0 = no caliper).
+    num_matches : int
+        Number of control matches per treated unit.
+    replacement : bool
+        Allow control reuse within time period.
+    standard_deviation : str
+        Method for pooled SD in caliper.
+    model_type : str
+        Propensity model type ("logistic").
+    match_on : str
+        Score type ("logit" or "pscore").
+    block_size : int
+        Block size for memory-efficient matching.
+    verbose : bool
+        Print progress.
+    Returns
+    -------
+    RollmatchResult or None if matching fails.
+    """
+    if verbose:
+        n_treat = data.filter(pl.col(treat) == 1)[id].n_unique()
+        n_ctrl = data.filter(pl.col(treat) == 0)[id].n_unique()
+        print(f"rollmatch: {n_treat} treated, {n_ctrl} controls, alpha={alpha}")
+    # Step 1: Reduce data
+    if verbose:
+        print("  Step 1: reduce_data...")
+    reduced = reduce_data(data, treat, tm, entry, id, lookback)
+    if verbose:
+        print(f"    Reduced: {reduced.height} rows")
+    # Drop rows with NaN in covariates
+    reduced = reduced.drop_nulls(subset=covariates)
+    if verbose:
+        print(f"    After dropping NaN: {reduced.height} rows")
+    if reduced.height == 0:
+        if verbose:
+            print("  ERROR: No valid rows after NaN removal")
+        return None
+    # Step 2: Score data
+    if verbose:
+        print("  Step 2: score_data...")
+    scored = score_data(reduced, covariates, treat, model_type, match_on)
+    if verbose:
+        print(f"    Scored: {scored.height} rows")
+    # Step 3: Match
+    if verbose:
+        print(f"  Step 3: matching (alpha={alpha}, num_matches={num_matches})...")
+    matches = match_all_periods(
+        scored, treat, tm, entry, id,
+        alpha=alpha, num_matches=num_matches,
+        replacement=replacement, standard_deviation=standard_deviation,
+        block_size=block_size,
+    )
+    if matches is None or matches.height == 0:
+        if verbose:
+            print("  No matches found!")
+        return None
+    n_treated_matched = matches["treat_id"].n_unique()
+    n_controls_matched = matches["control_id"].n_unique()
+    n_treated_total = scored.filter(pl.col(treat) == 1)[id].n_unique()
+    if verbose:
+        print(f"    Matched: {matches.height} pairs")
+        print(f"    Treated matched: {n_treated_matched}/{n_treated_total} "
+              f"({100*n_treated_matched/n_treated_total:.1f}%)")
+        print(f"    Controls used: {n_controls_matched}")
+    # Step 4: Balance
+    if verbose:
+        print("  Step 4: balance...")
+    balance = compute_balance(scored, matches, treat, id, tm, covariates)
+    # Step 5: Compute weights
+    weights = _compute_weights(matches, id, num_matches)
+    if verbose:
+        smd_table(balance)
+    return RollmatchResult(
+        matched_data=matches,
+        balance=balance,
+        n_treated_total=n_treated_total,
+        n_treated_matched=n_treated_matched,
+        n_controls_matched=n_controls_matched,
+        alpha=alpha,
+        weights=weights,
+    )
+def alpha_sweep(
+    data: pl.DataFrame,
+    treat: str,
+    tm: str,
+    entry: str,
+    id: str,
+    covariates: list[str],
+    alphas: list[float] | None = None,
+    lookback: int = 1,
+    num_matches: int = 3,
+    replacement: bool = True,
+    standard_deviation: str = "average",
+    model_type: str = "logistic",
+    match_on: str = "logit",
+    block_size: int = 2000,
+    smd_threshold: float = 0.1,
+) -> tuple[pl.DataFrame, RollmatchResult | None]:
+    """Run rollmatch across multiple alpha values and select the best.
+    Best = fully balanced (all |SMD| < threshold) with highest match rate.
+    If none fully balance, select the one with lowest max|SMD|.
+    Parameters
+    ----------
+    data : pl.DataFrame
+        Panel data.
+    alphas : list[float]
+        Caliper multipliers to try. Default: [0.01, 0.02, 0.05, 0.1, 0.15, 0.2]
+    smd_threshold : float
+        |SMD| threshold for "balanced" (default 0.1).
+    (other params same as rollmatch)
+    Returns
+    -------
+    (summary_df, best_result)
+    """
+    if alphas is None:
+        alphas = [0.01, 0.02, 0.05, 0.1, 0.15, 0.2]
+    # Pre-compute reduce + score once (shared across alphas)
+    reduced = reduce_data(data, treat, tm, entry, id, lookback)
+    reduced = reduced.drop_nulls(subset=covariates)
+    scored = score_data(reduced, covariates, treat, model_type, match_on)
+    results = []
+    best_result = None
+    best_score = (-1, -np.inf)  # (all_pass, match_rate)
+    for alpha in alphas:
+        print(f"  alpha={alpha:.2f} ... ", end="", flush=True)
+        matches = match_all_periods(
+            scored, treat, tm, entry, id,
+            alpha=alpha, num_matches=num_matches,
+            replacement=replacement, standard_deviation=standard_deviation,
+            block_size=block_size,
+        )
+        if matches is None or matches.height == 0:
+            print("no matches")
+            continue
+        balance = compute_balance(scored, matches, treat, id, tm, covariates)
+        max_smd = balance["matched_smd"].abs().max()
+        all_pass = max_smd < smd_threshold
+        n_treat_total = scored.filter(pl.col(treat) == 1)[id].n_unique()
+        n_treat_matched = matches["treat_id"].n_unique()
+        match_rate = n_treat_matched / n_treat_total
+        results.append({
+            "alpha": alpha,
+            "n_matched_pairs": matches.height,
+            "n_treated_matched": n_treat_matched,
+            "pct_treated": round(100 * match_rate, 1),
+            "max_abs_smd": round(max_smd, 4),
+            "all_pass": all_pass,
+        })
+        print(f"matched={n_treat_matched}/{n_treat_total} ({100*match_rate:.0f}%), "
+              f"max|SMD|={max_smd:.4f} {'✓' if all_pass else '✗'}")
+        # Track best
+        score = (int(all_pass), match_rate)
+        if score > best_score:
+            best_score = score
+            weights = _compute_weights(matches, id, num_matches)
+            best_result = RollmatchResult(
+                matched_data=matches,
+                balance=balance,
+                n_treated_total=n_treat_total,
+                n_treated_matched=n_treat_matched,
+                n_controls_matched=matches["control_id"].n_unique(),
+                alpha=alpha,
+                weights=weights,
+            )
+    summary = pl.DataFrame(results) if results else pl.DataFrame()
+    if best_result:
+        print(f"\n  Best: alpha={best_result.alpha} "
+              f"(matched={best_result.n_treated_matched}/{best_result.n_treated_total}, "
+              f"max|SMD|={best_result.balance['matched_smd'].abs().max():.4f})")
+    return summary, best_result

pyrollmatch/diagnostics.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""
+diagnostics — Post-matching diagnostic tests.
+Includes t-tests, SMD tests, variance ratio tests, and equivalence tests
+for assessing matching quality.
+"""
+import numpy as np
+import polars as pl
+from scipy import stats
+def balance_test(
+    scored_data: pl.DataFrame,
+    matches: pl.DataFrame,
+    treat: str,
+    id: str,
+    tm: str,
+    covariates: list[str],
+    threshold: float = 0.1,
+) -> pl.DataFrame:
+    """Run comprehensive balance diagnostics on matched sample.
+    For each covariate, computes:
+    - Standardized mean difference (SMD)
+    - Two-sample t-test (H0: means are equal)
+    - Variance ratio (treat/control)
+    - Kolmogorov-Smirnov test (H0: distributions are equal)
+    Parameters
+    ----------
+    scored_data : pl.DataFrame
+        Reduced data with treatment indicator and covariates.
+    matches : pl.DataFrame
+        Match results with treat_id, control_id, tm columns.
+    treat : str
+        Treatment indicator column.
+    id : str
+        Unit identifier column.
+    tm : str
+        Time period column.
+    covariates : list[str]
+        Covariate column names.
+    threshold : float
+        SMD threshold for pass/fail (default 0.1).
+    Returns
+    -------
+    pl.DataFrame with diagnostics per covariate.
+    """
+    # Get matched units
+    treat_matches = matches.select(tm, "treat_id").unique().rename({"treat_id": id})
+    control_matches = matches.select(tm, "control_id").unique().rename({"control_id": id})
+    matched_ids = pl.concat([treat_matches, control_matches])
+    matched_data = scored_data.join(matched_ids, on=[tm, id], how="semi")
+    rows = []
+    for cov in covariates:
+        vals_t = matched_data.filter(pl.col(treat) == 1)[cov].drop_nulls().to_numpy().astype(float)
+        vals_c = matched_data.filter(pl.col(treat) == 0)[cov].drop_nulls().to_numpy().astype(float)
+        if len(vals_t) < 2 or len(vals_c) < 2:
+            continue
+        # SMD
+        sd_t, sd_c = np.std(vals_t, ddof=1), np.std(vals_c, ddof=1)
+        pooled_sd = np.sqrt((sd_t**2 + sd_c**2) / 2)
+        smd = (np.mean(vals_t) - np.mean(vals_c)) / pooled_sd if pooled_sd > 0 else np.nan
+        # Two-sample t-test (Welch's)
+        t_stat, t_pvalue = stats.ttest_ind(vals_t, vals_c, equal_var=False)
+        # Variance ratio
+        var_ratio = np.var(vals_t, ddof=1) / np.var(vals_c, ddof=1) if np.var(vals_c, ddof=1) > 0 else np.nan
+        # KS test
+        ks_stat, ks_pvalue = stats.ks_2samp(vals_t, vals_c)
+        rows.append({
+            "covariate": cov,
+            "mean_treated": round(np.mean(vals_t), 4),
+            "mean_control": round(np.mean(vals_c), 4),
+            "smd": round(smd, 4),
+            "smd_pass": bool(abs(smd) < threshold),
+            "t_stat": round(t_stat, 4),
+            "t_pvalue": round(t_pvalue, 4),
+            "var_ratio": round(var_ratio, 4),
+            "var_ratio_pass": bool(0.5 < var_ratio < 2.0) if not np.isnan(var_ratio) else False,
+            "ks_stat": round(ks_stat, 4),
+            "ks_pvalue": round(ks_pvalue, 4),
+        })
+    result = pl.DataFrame(rows)
+    # Print summary
+    n_pass_smd = result.filter(pl.col("smd_pass")).height
+    n_pass_var = result.filter(pl.col("var_ratio_pass")).height
+    n_total = result.height
+    print(f"\n{'='*70}")
+    print(f"  Post-Matching Balance Diagnostics")
+    print(f"{'='*70}")
+    print(f"  SMD < {threshold}: {n_pass_smd}/{n_total} pass")
+    print(f"  Variance ratio in (0.5, 2.0): {n_pass_var}/{n_total} pass")
+    print(f"{'='*70}\n")
+    print(f"  {'Covariate':<25} {'SMD':>8} {'t-test p':>10} {'VR':>8} {'KS p':>8}")
+    print(f"  {'-'*25} {'-'*8} {'-'*10} {'-'*8} {'-'*8}")
+    for row in result.iter_rows(named=True):
+        smd_flag = "✓" if row["smd_pass"] else "✗"
+        vr_flag = "✓" if row["var_ratio_pass"] else "✗"
+        print(f"  {row['covariate']:<25} {row['smd']:>7.4f}{smd_flag} {row['t_pvalue']:>10.4f} {row['var_ratio']:>7.3f}{vr_flag} {row['ks_pvalue']:>8.4f}")
+    return result
+def equivalence_test(
+    scored_data: pl.DataFrame,
+    matches: pl.DataFrame,
+    treat: str,
+    id: str,
+    tm: str,
+    covariates: list[str],
+    multiplier: float = 0.36,
+) -> pl.DataFrame:
+    """TOST equivalence test for covariate balance.
+    Tests H0: |SMD| >= delta (non-equivalence).
+    Rejection = GOOD (positive evidence of negligible difference).
+    Uses Hartman & Hidalgo (2018) approach: delta = multiplier × pooled_SD.
+    Parameters
+    ----------
+    scored_data : pl.DataFrame
+        Reduced data.
+    matches : pl.DataFrame
+        Match results.
+    treat, id, tm : str
+        Column names.
+    covariates : list[str]
+        Covariate names.
+    multiplier : float
+        Equivalence bound as fraction of pooled SD (default 0.36).
+    Returns
+    -------
+    pl.DataFrame with TOST results per covariate.
+    """
+    treat_matches = matches.select(tm, "treat_id").unique().rename({"treat_id": id})
+    control_matches = matches.select(tm, "control_id").unique().rename({"control_id": id})
+    matched_ids = pl.concat([treat_matches, control_matches])
+    matched_data = scored_data.join(matched_ids, on=[tm, id], how="semi")
+    rows = []
+    for cov in covariates:
+        vals_t = matched_data.filter(pl.col(treat) == 1)[cov].drop_nulls().to_numpy().astype(float)
+        vals_c = matched_data.filter(pl.col(treat) == 0)[cov].drop_nulls().to_numpy().astype(float)
+        if len(vals_t) < 2 or len(vals_c) < 2:
+            continue
+        m, n = len(vals_t), len(vals_c)
+        diff = np.mean(vals_t) - np.mean(vals_c)
+        var_t = np.var(vals_t, ddof=1)
+        var_c = np.var(vals_c, ddof=1)
+        # Pooled SD: weighted formula matching Hartman & Hidalgo (2018)
+        # equivtest R package: sqrt(((m-1)*var_x + (n-1)*var_y) / (m+n-2))
+        pooled_sd = np.sqrt(((m - 1) * var_t + (n - 1) * var_c) / (m + n - 2))
+        delta = multiplier * pooled_sd
+        # Two one-sided t-tests following equivtest::tost()
+        # Uses Welch's t-test (unequal variances)
+        se = np.sqrt(var_t / m + var_c / n)
+        df_welch = se**4 / ((var_t/m)**2/(m-1) + (var_c/n)**2/(n-1)) if se > 0 else 1
+        # Upper test: H0: diff >= delta, alt: diff < delta
+        t_upper = (diff - delta) / se if se > 0 else np.inf
+        p_upper = stats.t.cdf(t_upper, df=df_welch)
+        # Lower test: H0: diff <= -delta, alt: diff > -delta
+        t_lower = (diff + delta) / se if se > 0 else -np.inf
+        p_lower = 1 - stats.t.cdf(t_lower, df=df_welch)
+        tost_p = max(p_upper, p_lower)
+        rows.append({
+            "covariate": cov,
+            "diff": round(diff, 6),
+            "se": round(se, 6),
+            "delta": round(delta, 4),
+            "tost_p_upper": round(p_upper, 4),
+            "tost_p_lower": round(p_lower, 4),
+            "tost_p": round(tost_p, 4),
+            "equivalent": bool(tost_p < 0.05),
+        })
+    result = pl.DataFrame(rows)
+    n_equiv = result.filter(pl.col("equivalent")).height
+    print(f"\n  TOST Equivalence Test (bound = {multiplier}σ)")
+    print(f"  Equivalent: {n_equiv}/{result.height} covariates (p < 0.05 = GOOD)")
+    for row in result.iter_rows(named=True):
+        flag = "✓ EQUIV" if row["equivalent"] else "  not equiv"
+        print(f"    {row['covariate']:<25} p={row['tost_p']:.4f} {flag}")
+    return result