PyPI - diff-diff - Versions diffs - 3.5.3__tar.gz → 3.6.0__tar.gz - Mend

diff-diff 3.5.3tar.gz → 3.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

{diff_diff-3.5.3 → diff_diff-3.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diff-diff
-Version: 3.5.3
+Version: 3.6.0
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: Operating System :: OS Independent
@@ -170,6 +170,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
 - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
 - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
 - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
+- [LPDiD](https://diff-diff.readthedocs.io/en/stable/api/lpdid.html) - Dube, Girardi, Jorda & Taylor (2025) Local Projections DiD: per-horizon long-difference event study on clean controls (no negative weighting), variance- or equally-weighted ATT, for absorbing treatment
 - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
 ## Diagnostics & Sensitivity

{diff_diff-3.5.3 → diff_diff-3.6.0}/README.md RENAMED Viewed

@@ -117,6 +117,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
 - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
 - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
 - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
+- [LPDiD](https://diff-diff.readthedocs.io/en/stable/api/lpdid.html) - Dube, Girardi, Jorda & Taylor (2025) Local Projections DiD: per-horizon long-difference event study on clean controls (no negative weighting), variance- or equally-weighted ATT, for absorbing treatment
 - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
 ## Diagnostics & Sensitivity

{diff_diff-3.5.3 → diff_diff-3.6.0}/diff_diff/__init__.py RENAMED Viewed

@@ -230,6 +230,8 @@ from diff_diff.synthetic_control import (
 from diff_diff.synthetic_control_results import SyntheticControlResults
 from diff_diff.wooldridge import WooldridgeDiD
 from diff_diff.wooldridge_results import WooldridgeDiDResults
+from diff_diff.lpdid import LPDiD
+from diff_diff.lpdid_results import LPDiDResults
 from diff_diff.utils import (
     WildBootstrapResults,
     check_parallel_trends,
@@ -299,7 +301,7 @@ ETWFE = WooldridgeDiD
 DCDH = ChaisemartinDHaultfoeuille
 HAD = HeterogeneousAdoptionDiD
-__version__ = "3.5.3"
+__version__ = "3.6.0"
 __all__ = [
     # Estimators
     "DifferenceInDifferences",
@@ -381,6 +383,9 @@ __all__ = [
     "WooldridgeDiD",
     "WooldridgeDiDResults",
     "ETWFE",
+    # LPDiD (Local Projections DiD)
+    "LPDiD",
+    "LPDiDResults",
     # Visualization
     "plot_bacon",
     "plot_event_study",

diff_diff-3.6.0/diff_diff/bootstrap_chunking.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""Memory-bounded chunking for multiplier-bootstrap weight matrices.
+The multiplier bootstrap perturbs cached influence functions with a dense
+``(n_bootstrap, n_units)`` weight matrix. At large ``n_units`` that matrix
+dominates peak memory (e.g. ``999 x 5_000_000 x 8`` bytes is ~40 GB). Every
+consumer is a left-multiply ``weights @ influence_vector`` whose result is small
+(``(n_bootstrap,)`` or ``(n_bootstrap, n_gt)``), so the bootstrap can be tiled
+over the *draw* dimension: generate and consume the weights in row-blocks of
+``B``, capping the live intermediate at ``(B, n_units)``. FLOPs are identical to
+the un-chunked path -- only the draw axis is tiled. The generated weight stream
+is *bit-identical* to the un-chunked matrix (see below); the downstream
+``weights @ influence`` matmuls go through BLAS, whose reduction order depends on
+the operand row-count, so the resulting statistics match the un-chunked path to
+within floating-point reassociation (typically <~1 ULP), far below bootstrap
+Monte-Carlo error -- not bit-for-bit.
+Bit-identity of the weight *generation* is preserved on **both** backends:
+- **Rust** seeds each row absolutely as ``base_seed + row_index``
+  (``rust/src/bootstrap.rs``), so calling the generator per block with base seed
+  ``base_seed + chunk_start`` reproduces the exact un-chunked rows. Exactly one
+  ``rng.integers`` draw is consumed, matching the un-chunked wrapper.
+- The **NumPy** fallback draws the matrix row-major from the ``Generator``
+  stream, so consuming it in contiguous, in-order blocks from the same generator
+  reproduces the identical sequence.
+"""
+from __future__ import annotations
+from typing import Iterator, Optional, Tuple
+import numpy as np
+from diff_diff._backend import HAS_RUST_BACKEND, _rust_bootstrap_weights
+from diff_diff.bootstrap_utils import generate_bootstrap_weights_batch_numpy
+# Byte ceiling for a single ``(B, n_units)`` float64 weight block. 256 MB keeps
+# the live intermediate small at millions of units while staying large enough
+# that the per-block matmuls remain BLAS-efficient and chunk overhead (a handful
+# of extra Python iterations / FFI calls) is negligible.
+_TARGET_BLOCK_BYTES = 256 * 1024 * 1024
+def compute_block_size(
+    n_units: int, n_bootstrap: int, target_bytes: int = _TARGET_BLOCK_BYTES
+) -> int:
+    """Number of bootstrap rows per block so a ``(B, n_units)`` float64 block
+    stays under ``target_bytes``. Always in ``[1, n_bootstrap]``."""
+    if n_units <= 0:
+        return max(1, n_bootstrap)
+    b = target_bytes // (n_units * 8)
+    return int(max(1, min(max(1, n_bootstrap), b)))
+def iter_weight_blocks(
+    n_bootstrap: int,
+    n_gen: int,
+    weight_type: str,
+    rng: np.random.Generator,
+    *,
+    expand_index: Optional[np.ndarray] = None,
+    block_size: Optional[int] = None,
+) -> Iterator[Tuple[int, np.ndarray]]:
+    """Yield ``(chunk_start, block)`` pairs covering all ``n_bootstrap`` draws.
+    ``block`` has shape ``(B, width)`` where ``width = len(expand_index)`` when
+    ``expand_index`` is given, else ``n_gen``. Weights are generated at width
+    ``n_gen`` (unit / cluster / PSU level) and, when ``expand_index`` is given,
+    expanded to unit level via ``block[:, expand_index]`` (cluster->unit or
+    PSU->unit fan-out). The concatenation of all yielded blocks is bit-identical
+    to a single ``generate_bootstrap_weights_batch(n_bootstrap, n_gen, ...)``
+    followed by the same expansion.
+    Generation is in-order and stateful on ``rng`` (NumPy fallback) -- the caller
+    must consume the iterator sequentially, which the chunk loop does.
+    """
+    width = n_gen if expand_index is None else int(len(expand_index))
+    if block_size is None:
+        block_size = compute_block_size(width, n_bootstrap)
+    if block_size < 1:
+        raise ValueError(f"block_size must be >= 1, got {block_size}")
+    rust_gen = (
+        _rust_bootstrap_weights
+        if (HAS_RUST_BACKEND and _rust_bootstrap_weights is not None)
+        else None
+    )
+    # Draw exactly one base seed (matching the un-chunked Rust wrapper); the
+    # NumPy fallback consumes the rng stream directly per block instead.
+    base_seed = int(rng.integers(0, 2**63 - 1)) if rust_gen is not None else 0
+    for chunk_start in range(0, n_bootstrap, block_size):
+        rows = min(block_size, n_bootstrap - chunk_start)
+        if rust_gen is not None:
+            block = rust_gen(rows, n_gen, weight_type, base_seed + chunk_start)
+        else:
+            block = generate_bootstrap_weights_batch_numpy(rows, n_gen, weight_type, rng)
+        if expand_index is not None:
+            block = block[:, expand_index]
+        yield chunk_start, block
+def iter_survey_multiplier_weight_blocks(
+    n_bootstrap: int,
+    resolved_survey: object,
+    weight_type: str,
+    rng: np.random.Generator,
+    *,
+    block_size: int,
+) -> Tuple[np.ndarray, Iterator[Tuple[int, np.ndarray]]]:
+    """Chunked PSU-level multiplier weights for the survey-aware bootstrap.
+    Returns ``(psu_ids, blocks)`` where ``blocks`` yields
+    ``(chunk_start, (B, n_psu))`` PSU-weight blocks covering all draws.
+    For UNSTRATIFIED designs (``strata is None``, ``n_psu >= 2``) the
+    ``(n_bootstrap, n_psu)`` matrix is generated one draw-block at a time via
+    :func:`iter_weight_blocks` plus the unstratified FPC scalar -- bit-identical
+    to the unstratified branch of
+    :func:`diff_diff.bootstrap_utils.generate_survey_multiplier_weights_batch`,
+    but the full matrix is never materialized. This is the path taken by
+    ``cluster="unit"`` (each unit its own PSU, ``n_psu == n_units``), the case
+    that otherwise dominates bootstrap memory at large n_units.
+    Stratified designs (and the ``n_psu < 2`` degenerate case) fall back to full
+    generation + sliced blocks: per-stratum / lonely-PSU generation is not tiled
+    here, but stratified designs have few PSUs so the full matrix is small.
+    """
+    from diff_diff.bootstrap_utils import generate_survey_multiplier_weights_batch
+    if block_size < 1:
+        raise ValueError(f"block_size must be >= 1, got {block_size}")
+    psu = getattr(resolved_survey, "psu", None)
+    strata = getattr(resolved_survey, "strata", None)
+    if psu is None:
+        n_psu = len(resolved_survey.weights)  # type: ignore[attr-defined]
+        psu_ids = np.arange(n_psu)
+    else:
+        psu_ids = np.unique(psu)
+        n_psu = len(psu_ids)
+    if strata is not None or n_psu < 2:
+        # Stratified or degenerate single-PSU: full generation (small here).
+        weights, psu_ids = generate_survey_multiplier_weights_batch(
+            n_bootstrap, resolved_survey, weight_type, rng
+        )
+        def _sliced() -> Iterator[Tuple[int, np.ndarray]]:
+            for chunk_start in range(0, n_bootstrap, block_size):
+                yield chunk_start, weights[chunk_start : chunk_start + block_size]
+        return psu_ids, _sliced()
+    # Unstratified, n_psu >= 2: tile the generation over draws. Mirror the
+    # unstratified FPC scaling from generate_survey_multiplier_weights_batch.
+    fpc = getattr(resolved_survey, "fpc", None)
+    fpc_scale = 1.0
+    fpc_zero = False
+    if fpc is not None:
+        # psu=None already sets n_psu = len(weights), so n_units_for_fpc == n_psu
+        # on both branches of the original generator.
+        n_units_for_fpc = n_psu
+        if fpc[0] < n_units_for_fpc:
+            raise ValueError(
+                f"FPC ({fpc[0]}) is less than the number of PSUs "
+                f"({n_units_for_fpc}). FPC must be >= number of PSUs."
+            )
+        f = n_units_for_fpc / fpc[0]
+        if f < 1.0:
+            fpc_scale = float(np.sqrt(1.0 - f))
+        else:
+            fpc_zero = True
+    def _generated() -> Iterator[Tuple[int, np.ndarray]]:
+        for chunk_start, block in iter_weight_blocks(
+            n_bootstrap, n_psu, weight_type, rng, block_size=block_size
+        ):
+            if fpc_zero:
+                block = np.zeros_like(block)
+            elif fpc_scale != 1.0:
+                block = block * fpc_scale
+            yield chunk_start, block
+    return psu_ids, _generated()

{diff_diff-3.5.3 → diff_diff-3.6.0}/diff_diff/conley.py RENAMED Viewed

@@ -1104,16 +1104,37 @@ def _compute_conley_vcov(
         _conley_sparse=_conley_sparse,
     )
-    # Sandwich via two solves (mirrors _compute_cr2_bm pattern in linalg.py)
-    try:
-        temp = np.linalg.solve(bread_matrix, meat)
-        vcov = np.linalg.solve(bread_matrix, temp.T).T
-    except np.linalg.LinAlgError as e:
-        if "Singular" in str(e):
-            raise ValueError(
-                "Design matrix is rank-deficient (singular X'X matrix). "
-                "Cannot compute Conley spatial HAC variance."
-            ) from e
-        raise
+    # Sandwich via the shared rank-guarded inverse of the design Gram.
+    # np.linalg.solve only raises on an *exactly* singular bread, so a *near*-
+    # singular X'WX would otherwise flow a garbage inverse (~1e13) straight into
+    # the spatial-HAC variance. `_rank_guarded_inv` truncates redundant
+    # directions on the equilibrated Gram -> a finite SE on the identified
+    # subspace (NaN only at rank 0), matching the covariate IF rank-guard and the
+    # other structural bread inversions (ContinuousDiD / TwoStageDiD /
+    # SpilloverDiD). Lazy import: `linalg` imports this module, so a top-level
+    # `from diff_diff.linalg import ...` would be circular; resolving at call time
+    # is safe (linalg is already loaded by the time this runs).
+    from diff_diff.linalg import _rank_guarded_inv
+    bread_inv, n_dropped, _, dropped = _rank_guarded_inv(bread_matrix, return_dropped=True)
+    if n_dropped:
+        warnings.warn(
+            "Conley spatial HAC variance: the design Gram (X'WX) is "
+            f"rank-deficient ({n_dropped} redundant direction(s) dropped); "
+            "rank-reducing to a finite SE on the identified subspace "
+            "(NaN if rank 0). This usually indicates collinear regressors.",
+            UserWarning,
+            stacklevel=2,
+        )
+    # vcov = bread^{-1} @ meat @ bread^{-1}; algebraically identical to the prior
+    # two symmetric solves given `bread` symmetric (holds for any meat).
+    vcov = bread_inv @ meat @ bread_inv
+    # A dropped (unidentified) coefficient is zero-filled in bread_inv, which would
+    # otherwise report se=0 for that named coefficient. NaN its row/col in the
+    # FINAL vcov so per-coefficient SE extraction yields NaN (not 0) for the
+    # unidentified directions, while the identified coefficients stay finite.
+    if dropped.any():
+        vcov[dropped, :] = np.nan
+        vcov[:, dropped] = np.nan
     return vcov

{diff_diff-3.5.3 → diff_diff-3.6.0}/diff_diff/continuous_did.py RENAMED Viewed

@@ -29,7 +29,7 @@ from diff_diff.continuous_did_results import (
     ContinuousDiDResults,
     DoseResponseCurve,
 )
-from diff_diff.linalg import solve_ols
+from diff_diff.linalg import _rank_guarded_inv, solve_ols
 from diff_diff.survey import (
     ResolvedSurveyDesign,
     _resolve_survey_for_fit,
@@ -1047,21 +1047,32 @@ class ContinuousDiD:
         # Store bootstrap info for influence function computation
         # bread = (Psi'WPsi / n_treated)^{-1} when survey, (Psi'Psi / n_treated)^{-1} otherwise
+        # Bread = (Psi'WPsi / mass)^{-1} via the shared rank-guarded inverse:
+        # np.linalg.inv only raises on an *exactly* singular Gram, so a *near*-
+        # singular B-spline design (clustered doses / near-duplicate knots)
+        # previously returned a garbage inverse (~1e13) -> garbage SE. The prior
+        # `pinv` fallback was both minimum-norm (not the column-drop / near-
+        # collinear limit) and *silent*. `_rank_guarded_inv` truncates redundant
+        # directions on the equilibrated Gram -> finite SE on the identified
+        # subspace (NaN only at rank 0), matching the covariate IF rank-guard.
         if w_treated is not None:
             w_treated_sum = float(np.sum(w_treated))
             PtWP = Psi.T @ (Psi * w_treated[:, np.newaxis])
             # Normalize bread by weighted mass (not raw count) for consistency
             # with downstream IF score denominators that also use weighted mass
-            try:
-                bread = np.linalg.inv(PtWP / w_treated_sum)
-            except np.linalg.LinAlgError:
-                bread = np.linalg.pinv(PtWP / w_treated_sum)
+            bread, n_dropped, _ = _rank_guarded_inv(PtWP / w_treated_sum)
         else:
             PtP = Psi.T @ Psi
-            try:
-                bread = np.linalg.inv(PtP / n_treated)
-            except np.linalg.LinAlgError:
-                bread = np.linalg.pinv(PtP / n_treated)
+            bread, n_dropped, _ = _rank_guarded_inv(PtP / n_treated)
+        if n_dropped:
+            warnings.warn(
+                "ContinuousDiD ACRT variance: the B-spline design Gram is "
+                f"rank-deficient ({n_dropped} redundant direction(s) dropped); "
+                "rank-reducing to a finite SE on the identified subspace. "
+                "Analytical SEs reflect the reduced rank (NaN if rank 0).",
+                UserWarning,
+                stacklevel=2,
+            )
         # ee_treated: per-unit estimating equation vectors (K-vector per unit)
         # For WLS (survey weights), the score is w_i * X_i * u_i to match the

{diff_diff-3.5.3 → diff_diff-3.6.0}/diff_diff/diagnostics.py RENAMED Viewed

@@ -19,7 +19,7 @@ import pandas as pd
 from diff_diff.estimators import DifferenceInDifferences
 from diff_diff.results import _get_significance_stars
-from diff_diff.utils import safe_inference
+from diff_diff.utils import safe_inference, validate_binary
 @dataclass
@@ -228,7 +228,7 @@ def run_placebo_test(
     test_type : str, default="fake_timing"
         Type of placebo test:
         - "fake_timing": Assign treatment at a fake (earlier) time period
-        - "fake_group": Run DiD designating some control units as "fake treated"
+        - "fake_group": Designate control units as "fake treated" (real-treated units, per the ``treatment`` column, are dropped first)
         - "permutation": Randomly reassign treatment and compute distribution
         - "leave_one_out": Drop each treated unit and re-estimate
     fake_treatment_period : any, optional
@@ -313,6 +313,7 @@ def run_placebo_test(
             fake_treated_units=fake_treatment_group,
             post_periods=post_periods,
             alpha=alpha,
+            treatment=treatment,
             **estimator_kwargs,
         )
@@ -445,14 +446,20 @@ def placebo_group_test(
     fake_treated_units: List[Any],
     post_periods: Optional[List[Any]] = None,
     alpha: float = 0.05,
+    treatment: Optional[str] = None,
     **estimator_kwargs,
 ) -> PlaceboTestResults:
     """
-    Test for differential trends among never-treated units.
+    Test for differential trends by designating control units as "fake treated".
+    Designates ``fake_treated_units`` as fake-treated and estimates a DiD on the
+    resulting panel. A significant effect suggests heterogeneous trends in the
+    control group (a parallel-trends red flag).
-    Assigns some never-treated units as "fake treated" and estimates a
-    DiD model using only never-treated data. A significant effect suggests
-    heterogeneous trends in the control group.
+    If ``treatment`` is provided, units that are *ever* really treated are dropped
+    first, so the placebo runs on never-treated units only (the recommended,
+    uncontaminated design). If ``treatment`` is ``None``, the test runs on whatever
+    data is supplied, so the caller must pass control-only data for a valid placebo.
     Parameters
     ----------
@@ -470,6 +477,11 @@ def placebo_group_test(
         List of post-treatment period values.
     alpha : float, default=0.05
         Significance level.
+    treatment : str, optional
+        Real treatment-indicator column. When given, units that are ever
+        real-treated (``data.groupby(unit)[treatment].max() == 1``) are dropped
+        before the placebo, so it runs on never-treated units only. When ``None``
+        (default), no filtering is done and the caller must pass control-only data.
     **estimator_kwargs
         Arguments passed to DifferenceInDifferences.
@@ -481,7 +493,35 @@ def placebo_group_test(
     if fake_treated_units is None or len(fake_treated_units) == 0:
         raise ValueError("fake_treated_units must be a non-empty list")
-    all_periods = sorted(data[time].unique())
+    fake_data = data.copy()
+    # Optionally restrict to never-treated units so the placebo is not contaminated
+    # by the real treatment effect (the BDM 2004 placebo-law design on controls).
+    if treatment is not None:
+        # Fail closed: a missing column or non-0/1 values would otherwise silently
+        # skip the ever-treated filter (groupby().max() drops NaN), running the
+        # placebo on contaminated data.
+        if treatment not in fake_data.columns:
+            raise ValueError(f"treatment column '{treatment}' not found in data")
+        if fake_data[treatment].isna().any():
+            raise ValueError(f"treatment column '{treatment}' contains missing values")
+        validate_binary(fake_data[treatment].to_numpy(), "treatment")
+        ever_treated = fake_data.groupby(unit)[treatment].max()
+        ever_treated_units = set(ever_treated[ever_treated == 1].index)
+        misused = [u for u in fake_treated_units if u in ever_treated_units]
+        if misused:
+            import warnings
+            warnings.warn(
+                f"{len(misused)} of fake_treated_units are themselves ever real-treated "
+                f"and will be dropped with the other real-treated units: {misused}. "
+                f"Pass only never-treated units as fake_treated_units for a valid placebo.",
+                UserWarning,
+                stacklevel=2,
+            )
+        fake_data = fake_data[~fake_data[unit].isin(ever_treated_units)].copy()
+    all_periods = sorted(fake_data[time].unique())
     # Infer post periods if not provided
     if post_periods is None:
@@ -489,14 +529,31 @@ def placebo_group_test(
         post_periods = all_periods[mid:]
     # Create fake treatment indicator
-    fake_data = data.copy()
     fake_data["_fake_treated"] = fake_data[unit].isin(fake_treated_units).astype(int)
     fake_data["_post"] = fake_data[time].isin(post_periods).astype(int)
+    # Guard degenerate designs (e.g., all fake_treated_units were dropped as
+    # real-treated, or no controls remain) before they surface as a cryptic
+    # LinAlgError inside the estimator.
+    if fake_data["_fake_treated"].sum() == 0:
+        raise ValueError(
+            "No fake-treated observations remain (all fake_treated_units were "
+            "dropped as real-treated, or are absent from the data). Pass "
+            "never-treated units as fake_treated_units."
+        )
+    if (fake_data["_fake_treated"] == 0).sum() == 0:
+        raise ValueError("No control (non-fake-treated) units remain for the placebo comparison.")
     # Fit DiD
     did = DifferenceInDifferences(**estimator_kwargs)
     results = did.fit(fake_data, outcome=outcome, treatment="_fake_treated", time="_post")
+    # Record the fake-treated units actually used (after any never-treated
+    # filtering), not just the originally requested list, to avoid metadata drift.
+    # Preserve the caller's order (sorting could raise TypeError on mixed-type IDs).
+    retained = set(fake_data.loc[fake_data["_fake_treated"] == 1, unit].unique())
+    used_fake_treated = [u for u in fake_treated_units if u in retained]
     return PlaceboTestResults(
         test_type="fake_group",
         placebo_effect=results.att,
@@ -507,7 +564,7 @@ def placebo_group_test(
         n_obs=results.n_obs,
         is_significant=bool(results.p_value < alpha),
         alpha=alpha,
-        fake_group=list(fake_treated_units),
+        fake_group=used_fake_treated,
     )
@@ -526,8 +583,12 @@ def permutation_test(
     Compute permutation-based p-value for DiD estimate.
     Randomly reassigns treatment status at the unit level and computes the
-    DiD estimate for each permutation. The p-value is the proportion of
-    permuted estimates at least as extreme as the original.
+    DiD estimate for each permutation. The p-value is the randomization-inference
+    value ``(1 + count) / (B + 1)`` (Phipson & Smyth 2010), where ``count`` is the
+    number of permuted estimates at least as extreme as the observed and ``B`` is
+    the number of valid permutations. With ``B`` sampled permutations this is a
+    Monte-Carlo approximation that converges to the exact full-enumeration value
+    ``count / total`` as ``B`` grows.
     Parameters
     ----------
@@ -557,8 +618,17 @@ def permutation_test(
     Notes
     -----
-    The permutation test is exact and does not rely on asymptotic
-    approximations, making it valid with any sample size.
+    This is a randomization-inference (permutation) test of the sharp null of no
+    effect for any unit; it does not rely on asymptotic approximations. Treatment
+    assignments are drawn independently each iteration (Monte-Carlo sampling *with
+    replacement* from the assignment space), so the reported p-value
+    ``(1 + count) / (B + 1)`` (Phipson & Smyth 2010) is a **valid but slightly
+    conservative** estimator -- the ``+1`` adds the observed assignment and
+    prevents a zero p-value. Here ``count`` is the number of permutations at least
+    as extreme as the observed estimate and ``B`` is the number of valid
+    permutations. As ``B`` grows it converges to the *exact* p-value obtained by
+    full enumeration of all assignments (the R-parity reference). "Exact" is
+    reserved for that full enumeration; the sampled value approximates it.
     """
     rng = np.random.default_rng(seed)
@@ -620,11 +690,12 @@ def permutation_test(
                 stacklevel=2,
             )
-    # Compute p-value: proportion of |permuted| >= |original|
-    p_value = np.mean(np.abs(valid_effects) >= np.abs(original_att))
-    # Ensure p-value is at least 1/(n_permutations + 1)
-    p_value = max(p_value, 1 / (len(valid_effects) + 1))
+    # Randomization-inference p-value (Phipson & Smyth 2010): include the observed
+    # statistic in both numerator and denominator. The 1/(B+1) floor is intrinsic
+    # (count == 0 -> 1/(B+1)), so no separate clamp is needed. With sampled
+    # permutations this converges to the exact full-enumeration value count/total.
+    count = int(np.sum(np.abs(valid_effects) >= np.abs(original_att)))
+    p_value = (1 + count) / (len(valid_effects) + 1)
     # Compute SE and CI from permutation distribution
     se = np.std(valid_effects, ddof=1)

{diff_diff-3.5.3 → diff_diff-3.6.0}/diff_diff/efficient_did.py RENAMED Viewed

@@ -789,6 +789,11 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
         m_hat_cache: Dict[Tuple, np.ndarray] = {}
         r_hat_cache: Dict[Tuple[float, float], np.ndarray] = {}
         s_hat_cache: Dict[float, np.ndarray] = {}  # inverse propensities per group
+        # Per-fit cache of the polynomial sieve basis, keyed (id(X), degree). The three
+        # sieve nuisance helpers all build the basis from the same fit-level
+        # `covariate_matrix`, so this shares each distinct degree's basis across them
+        # instead of rebuilding it per helper. Lives only for this fit() call.
+        sieve_basis_cache: Dict[Tuple[int, int], np.ndarray] = {}
         if use_covariates:
             assert covariates is not None  # for type narrowing
@@ -934,6 +939,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
                                 k_max=self.sieve_k_max,
                                 criterion=self.sieve_criterion,
                                 unit_weights=unit_level_weights,
+                                basis_cache=sieve_basis_cache,
                             )
                         # m_{g', tpre, 1}(X)
                         key_gp_tpre = (gp, tpre_col_val, effective_p1_col)
@@ -950,6 +956,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
                                 k_max=self.sieve_k_max,
                                 criterion=self.sieve_criterion,
                                 unit_weights=unit_level_weights,
+                                basis_cache=sieve_basis_cache,
                             )
                         # r_{g, inf}(X) and r_{g, g'}(X) via sieve (Eq 4.1-4.2)
                         for comp in {np.inf, gp}:
@@ -966,6 +973,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
                                     criterion=self.sieve_criterion,
                                     ratio_clip=self.ratio_clip,
                                     unit_weights=unit_level_weights,
+                                    basis_cache=sieve_basis_cache,
                                 )
                     # Per-unit DR generated outcomes: shape (n_units, H)
@@ -998,6 +1006,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
                                 k_max=self.sieve_k_max,
                                 criterion=self.sieve_criterion,
                                 unit_weights=unit_level_weights,
+                                basis_cache=sieve_basis_cache,
                             )
                     # Conditional Omega*(X) with per-unit propensities (Eq 3.12)

diff-diff 3.5.3__tar.gz → 3.6.0__tar.gz

diff-diff 3.5.3tar.gz → 3.6.0tar.gz