diff-diff 3.5.3__tar.gz → 3.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {diff_diff-3.5.3 → diff_diff-3.6.1}/PKG-INFO +4 -3
  2. {diff_diff-3.5.3 → diff_diff-3.6.1}/README.md +3 -2
  3. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/__init__.py +6 -1
  4. diff_diff-3.6.1/diff_diff/bootstrap_chunking.py +185 -0
  5. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/business_report.py +13 -4
  6. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/chaisemartin_dhaultfoeuille.py +13 -6
  7. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/chaisemartin_dhaultfoeuille_results.py +5 -3
  8. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/conley.py +32 -11
  9. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/continuous_did.py +20 -9
  10. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/diagnostics.py +89 -18
  11. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/efficient_did.py +9 -0
  12. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/efficient_did_bootstrap.py +77 -35
  13. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/efficient_did_covariates.py +38 -3
  14. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/estimators.py +91 -57
  15. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/guides/llms-autonomous.txt +15 -6
  16. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/guides/llms-full.txt +59 -5
  17. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/guides/llms-practitioner.txt +6 -3
  18. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/guides/llms.txt +4 -3
  19. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/had.py +32 -22
  20. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/imputation.py +154 -56
  21. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/imputation_bootstrap.py +7 -2
  22. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/linalg.py +212 -176
  23. diff_diff-3.6.1/diff_diff/lpdid.py +1607 -0
  24. diff_diff-3.6.1/diff_diff/lpdid_results.py +297 -0
  25. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/spillover.py +24 -13
  26. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/staggered.py +274 -188
  27. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/staggered_aggregation.py +16 -7
  28. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/staggered_bootstrap.py +97 -65
  29. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/staggered_results.py +9 -0
  30. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/staggered_triple_diff.py +18 -59
  31. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/trop.py +153 -22
  32. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/trop_global.py +44 -39
  33. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/trop_local.py +262 -61
  34. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/trop_results.py +33 -2
  35. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/two_stage.py +21 -14
  36. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/two_stage_bootstrap.py +34 -19
  37. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/utils.py +205 -94
  38. {diff_diff-3.5.3 → diff_diff-3.6.1}/pyproject.toml +1 -1
  39. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/Cargo.lock +15 -16
  40. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/Cargo.toml +8 -7
  41. {diff_diff-3.5.3 → diff_diff-3.6.1}/LICENSE +0 -0
  42. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/_backend.py +0 -0
  43. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/_guides_api.py +0 -0
  44. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/_nprobust_port.py +0 -0
  45. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/_reporting_helpers.py +0 -0
  46. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/agent_workflow.py +0 -0
  47. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/bacon.py +0 -0
  48. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/balancing.py +0 -0
  49. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/bootstrap_utils.py +0 -0
  50. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +0 -0
  51. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/conformal.py +0 -0
  52. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/continuous_did_bspline.py +0 -0
  53. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/continuous_did_results.py +0 -0
  54. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/datasets.py +0 -0
  55. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/diagnostic_report.py +0 -0
  56. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/efficient_did_results.py +0 -0
  57. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/efficient_did_weights.py +0 -0
  58. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/guides/__init__.py +0 -0
  59. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/had_pretests.py +0 -0
  60. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/honest_did.py +0 -0
  61. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/imputation_results.py +0 -0
  62. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/local_linear.py +0 -0
  63. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/power.py +0 -0
  64. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/practitioner.py +0 -0
  65. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/prep.py +0 -0
  66. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/prep_dgp.py +0 -0
  67. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/pretrends.py +0 -0
  68. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/profile.py +0 -0
  69. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/results.py +0 -0
  70. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/stacked_did.py +0 -0
  71. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/stacked_did_results.py +0 -0
  72. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/staggered_triple_diff_results.py +0 -0
  73. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/sun_abraham.py +0 -0
  74. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/survey.py +0 -0
  75. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/synthetic_control.py +0 -0
  76. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/synthetic_control_results.py +0 -0
  77. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/synthetic_did.py +0 -0
  78. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/triple_diff.py +0 -0
  79. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/twfe.py +0 -0
  80. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/two_stage_results.py +0 -0
  81. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/__init__.py +0 -0
  82. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_common.py +0 -0
  83. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_continuous.py +0 -0
  84. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_diagnostic.py +0 -0
  85. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_event_study.py +0 -0
  86. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_power.py +0 -0
  87. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_staggered.py +0 -0
  88. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/visualization/_synthetic.py +0 -0
  89. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/wooldridge.py +0 -0
  90. {diff_diff-3.5.3 → diff_diff-3.6.1}/diff_diff/wooldridge_results.py +0 -0
  91. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/build.rs +0 -0
  92. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/src/bootstrap.rs +0 -0
  93. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/src/lib.rs +0 -0
  94. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/src/linalg.rs +0 -0
  95. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/src/trop.rs +0 -0
  96. {diff_diff-3.5.3 → diff_diff-3.6.1}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 3.5.3
3
+ Version: 3.6.1
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -155,7 +155,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
155
155
  - [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - panel data DiD with unit and time fixed effects via within-transformation or dummies
156
156
  - [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - event study design with period-specific treatment effects for dynamic analysis
157
157
  - [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption
158
- - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The only library option for treatments that switch on AND off. Alias `DCDH`.
158
+ - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The most general option for treatments that switch on AND off (see also `LPDiD`/`TROP` `non_absorbing`). Alias `DCDH`.
159
159
  - [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
160
160
  - [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html) - Borusyak, Jaravel & Spiess (2024) imputation estimator, most efficient under homogeneous effects
161
161
  - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance
@@ -170,6 +170,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
170
170
  - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
171
171
  - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
172
172
  - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
173
+ - [LPDiD](https://diff-diff.readthedocs.io/en/stable/api/lpdid.html) - Dube, Girardi, Jorda & Taylor (2025) Local Projections DiD: per-horizon long-difference event study on clean controls (no negative weighting), variance- or equally-weighted ATT, for absorbing or non-absorbing (reversible) treatment
173
174
  - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
174
175
 
175
176
  ## Diagnostics & Sensitivity
@@ -197,7 +198,7 @@ No other Python or R DiD package offers design-based variance estimation for mod
197
198
  - Python 3.9 - 3.14
198
199
  - numpy >= 1.20
199
200
  - pandas >= 1.3
200
- - scipy >= 1.7
201
+ - scipy >= 1.10
201
202
 
202
203
  ## Development
203
204
 
@@ -102,7 +102,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
102
102
  - [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - panel data DiD with unit and time fixed effects via within-transformation or dummies
103
103
  - [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - event study design with period-specific treatment effects for dynamic analysis
104
104
  - [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption
105
- - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The only library option for treatments that switch on AND off. Alias `DCDH`.
105
+ - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The most general option for treatments that switch on AND off (see also `LPDiD`/`TROP` `non_absorbing`). Alias `DCDH`.
106
106
  - [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
107
107
  - [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html) - Borusyak, Jaravel & Spiess (2024) imputation estimator, most efficient under homogeneous effects
108
108
  - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance
@@ -117,6 +117,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
117
117
  - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
118
118
  - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
119
119
  - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
120
+ - [LPDiD](https://diff-diff.readthedocs.io/en/stable/api/lpdid.html) - Dube, Girardi, Jorda & Taylor (2025) Local Projections DiD: per-horizon long-difference event study on clean controls (no negative weighting), variance- or equally-weighted ATT, for absorbing or non-absorbing (reversible) treatment
120
121
  - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
121
122
 
122
123
  ## Diagnostics & Sensitivity
@@ -144,7 +145,7 @@ No other Python or R DiD package offers design-based variance estimation for mod
144
145
  - Python 3.9 - 3.14
145
146
  - numpy >= 1.20
146
147
  - pandas >= 1.3
147
- - scipy >= 1.7
148
+ - scipy >= 1.10
148
149
 
149
150
  ## Development
150
151
 
@@ -230,6 +230,8 @@ from diff_diff.synthetic_control import (
230
230
  from diff_diff.synthetic_control_results import SyntheticControlResults
231
231
  from diff_diff.wooldridge import WooldridgeDiD
232
232
  from diff_diff.wooldridge_results import WooldridgeDiDResults
233
+ from diff_diff.lpdid import LPDiD
234
+ from diff_diff.lpdid_results import LPDiDResults
233
235
  from diff_diff.utils import (
234
236
  WildBootstrapResults,
235
237
  check_parallel_trends,
@@ -299,7 +301,7 @@ ETWFE = WooldridgeDiD
299
301
  DCDH = ChaisemartinDHaultfoeuille
300
302
  HAD = HeterogeneousAdoptionDiD
301
303
 
302
- __version__ = "3.5.3"
304
+ __version__ = "3.6.1"
303
305
  __all__ = [
304
306
  # Estimators
305
307
  "DifferenceInDifferences",
@@ -381,6 +383,9 @@ __all__ = [
381
383
  "WooldridgeDiD",
382
384
  "WooldridgeDiDResults",
383
385
  "ETWFE",
386
+ # LPDiD (Local Projections DiD)
387
+ "LPDiD",
388
+ "LPDiDResults",
384
389
  # Visualization
385
390
  "plot_bacon",
386
391
  "plot_event_study",
@@ -0,0 +1,185 @@
1
+ """Memory-bounded chunking for multiplier-bootstrap weight matrices.
2
+
3
+ The multiplier bootstrap perturbs cached influence functions with a dense
4
+ ``(n_bootstrap, n_units)`` weight matrix. At large ``n_units`` that matrix
5
+ dominates peak memory (e.g. ``999 x 5_000_000 x 8`` bytes is ~40 GB). Every
6
+ consumer is a left-multiply ``weights @ influence_vector`` whose result is small
7
+ (``(n_bootstrap,)`` or ``(n_bootstrap, n_gt)``), so the bootstrap can be tiled
8
+ over the *draw* dimension: generate and consume the weights in row-blocks of
9
+ ``B``, capping the live intermediate at ``(B, n_units)``. FLOPs are identical to
10
+ the un-chunked path -- only the draw axis is tiled. The generated weight stream
11
+ is *bit-identical* to the un-chunked matrix (see below); the downstream
12
+ ``weights @ influence`` matmuls go through BLAS, whose reduction order depends on
13
+ the operand row-count, so the resulting statistics match the un-chunked path to
14
+ within floating-point reassociation (typically <~1 ULP), far below bootstrap
15
+ Monte-Carlo error -- not bit-for-bit.
16
+
17
+ Bit-identity of the weight *generation* is preserved on **both** backends:
18
+
19
+ - **Rust** seeds each row absolutely as ``base_seed + row_index``
20
+ (``rust/src/bootstrap.rs``), so calling the generator per block with base seed
21
+ ``base_seed + chunk_start`` reproduces the exact un-chunked rows. Exactly one
22
+ ``rng.integers`` draw is consumed, matching the un-chunked wrapper.
23
+ - The **NumPy** fallback draws the matrix row-major from the ``Generator``
24
+ stream, so consuming it in contiguous, in-order blocks from the same generator
25
+ reproduces the identical sequence.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ from typing import Iterator, Optional, Tuple
31
+
32
+ import numpy as np
33
+
34
+ from diff_diff._backend import HAS_RUST_BACKEND, _rust_bootstrap_weights
35
+ from diff_diff.bootstrap_utils import generate_bootstrap_weights_batch_numpy
36
+
37
+ # Byte ceiling for a single ``(B, n_units)`` float64 weight block. 256 MB keeps
38
+ # the live intermediate small at millions of units while staying large enough
39
+ # that the per-block matmuls remain BLAS-efficient and chunk overhead (a handful
40
+ # of extra Python iterations / FFI calls) is negligible.
41
+ _TARGET_BLOCK_BYTES = 256 * 1024 * 1024
42
+
43
+
44
+ def compute_block_size(
45
+ n_units: int, n_bootstrap: int, target_bytes: int = _TARGET_BLOCK_BYTES
46
+ ) -> int:
47
+ """Number of bootstrap rows per block so a ``(B, n_units)`` float64 block
48
+ stays under ``target_bytes``. Always in ``[1, n_bootstrap]``."""
49
+ if n_units <= 0:
50
+ return max(1, n_bootstrap)
51
+ b = target_bytes // (n_units * 8)
52
+ return int(max(1, min(max(1, n_bootstrap), b)))
53
+
54
+
55
+ def iter_weight_blocks(
56
+ n_bootstrap: int,
57
+ n_gen: int,
58
+ weight_type: str,
59
+ rng: np.random.Generator,
60
+ *,
61
+ expand_index: Optional[np.ndarray] = None,
62
+ block_size: Optional[int] = None,
63
+ ) -> Iterator[Tuple[int, np.ndarray]]:
64
+ """Yield ``(chunk_start, block)`` pairs covering all ``n_bootstrap`` draws.
65
+
66
+ ``block`` has shape ``(B, width)`` where ``width = len(expand_index)`` when
67
+ ``expand_index`` is given, else ``n_gen``. Weights are generated at width
68
+ ``n_gen`` (unit / cluster / PSU level) and, when ``expand_index`` is given,
69
+ expanded to unit level via ``block[:, expand_index]`` (cluster->unit or
70
+ PSU->unit fan-out). The concatenation of all yielded blocks is bit-identical
71
+ to a single ``generate_bootstrap_weights_batch(n_bootstrap, n_gen, ...)``
72
+ followed by the same expansion.
73
+
74
+ Generation is in-order and stateful on ``rng`` (NumPy fallback) -- the caller
75
+ must consume the iterator sequentially, which the chunk loop does.
76
+ """
77
+ width = n_gen if expand_index is None else int(len(expand_index))
78
+ if block_size is None:
79
+ block_size = compute_block_size(width, n_bootstrap)
80
+ if block_size < 1:
81
+ raise ValueError(f"block_size must be >= 1, got {block_size}")
82
+
83
+ rust_gen = (
84
+ _rust_bootstrap_weights
85
+ if (HAS_RUST_BACKEND and _rust_bootstrap_weights is not None)
86
+ else None
87
+ )
88
+ # Draw exactly one base seed (matching the un-chunked Rust wrapper); the
89
+ # NumPy fallback consumes the rng stream directly per block instead.
90
+ base_seed = int(rng.integers(0, 2**63 - 1)) if rust_gen is not None else 0
91
+
92
+ for chunk_start in range(0, n_bootstrap, block_size):
93
+ rows = min(block_size, n_bootstrap - chunk_start)
94
+ if rust_gen is not None:
95
+ block = rust_gen(rows, n_gen, weight_type, base_seed + chunk_start)
96
+ else:
97
+ block = generate_bootstrap_weights_batch_numpy(rows, n_gen, weight_type, rng)
98
+ if expand_index is not None:
99
+ block = block[:, expand_index]
100
+ yield chunk_start, block
101
+
102
+
103
+ def iter_survey_multiplier_weight_blocks(
104
+ n_bootstrap: int,
105
+ resolved_survey: object,
106
+ weight_type: str,
107
+ rng: np.random.Generator,
108
+ *,
109
+ block_size: int,
110
+ ) -> Tuple[np.ndarray, Iterator[Tuple[int, np.ndarray]]]:
111
+ """Chunked PSU-level multiplier weights for the survey-aware bootstrap.
112
+
113
+ Returns ``(psu_ids, blocks)`` where ``blocks`` yields
114
+ ``(chunk_start, (B, n_psu))`` PSU-weight blocks covering all draws.
115
+
116
+ For UNSTRATIFIED designs (``strata is None``, ``n_psu >= 2``) the
117
+ ``(n_bootstrap, n_psu)`` matrix is generated one draw-block at a time via
118
+ :func:`iter_weight_blocks` plus the unstratified FPC scalar -- bit-identical
119
+ to the unstratified branch of
120
+ :func:`diff_diff.bootstrap_utils.generate_survey_multiplier_weights_batch`,
121
+ but the full matrix is never materialized. This is the path taken by
122
+ ``cluster="unit"`` (each unit its own PSU, ``n_psu == n_units``), the case
123
+ that otherwise dominates bootstrap memory at large n_units.
124
+
125
+ Stratified designs (and the ``n_psu < 2`` degenerate case) fall back to full
126
+ generation + sliced blocks: per-stratum / lonely-PSU generation is not tiled
127
+ here, but stratified designs have few PSUs so the full matrix is small.
128
+ """
129
+ from diff_diff.bootstrap_utils import generate_survey_multiplier_weights_batch
130
+
131
+ if block_size < 1:
132
+ raise ValueError(f"block_size must be >= 1, got {block_size}")
133
+
134
+ psu = getattr(resolved_survey, "psu", None)
135
+ strata = getattr(resolved_survey, "strata", None)
136
+ if psu is None:
137
+ n_psu = len(resolved_survey.weights) # type: ignore[attr-defined]
138
+ psu_ids = np.arange(n_psu)
139
+ else:
140
+ psu_ids = np.unique(psu)
141
+ n_psu = len(psu_ids)
142
+
143
+ if strata is not None or n_psu < 2:
144
+ # Stratified or degenerate single-PSU: full generation (small here).
145
+ weights, psu_ids = generate_survey_multiplier_weights_batch(
146
+ n_bootstrap, resolved_survey, weight_type, rng
147
+ )
148
+
149
+ def _sliced() -> Iterator[Tuple[int, np.ndarray]]:
150
+ for chunk_start in range(0, n_bootstrap, block_size):
151
+ yield chunk_start, weights[chunk_start : chunk_start + block_size]
152
+
153
+ return psu_ids, _sliced()
154
+
155
+ # Unstratified, n_psu >= 2: tile the generation over draws. Mirror the
156
+ # unstratified FPC scaling from generate_survey_multiplier_weights_batch.
157
+ fpc = getattr(resolved_survey, "fpc", None)
158
+ fpc_scale = 1.0
159
+ fpc_zero = False
160
+ if fpc is not None:
161
+ # psu=None already sets n_psu = len(weights), so n_units_for_fpc == n_psu
162
+ # on both branches of the original generator.
163
+ n_units_for_fpc = n_psu
164
+ if fpc[0] < n_units_for_fpc:
165
+ raise ValueError(
166
+ f"FPC ({fpc[0]}) is less than the number of PSUs "
167
+ f"({n_units_for_fpc}). FPC must be >= number of PSUs."
168
+ )
169
+ f = n_units_for_fpc / fpc[0]
170
+ if f < 1.0:
171
+ fpc_scale = float(np.sqrt(1.0 - f))
172
+ else:
173
+ fpc_zero = True
174
+
175
+ def _generated() -> Iterator[Tuple[int, np.ndarray]]:
176
+ for chunk_start, block in iter_weight_blocks(
177
+ n_bootstrap, n_psu, weight_type, rng, block_size=block_size
178
+ ):
179
+ if fpc_zero:
180
+ block = np.zeros_like(block)
181
+ elif fpc_scale != 1.0:
182
+ block = block * fpc_scale
183
+ yield chunk_start, block
184
+
185
+ return psu_ids, _generated()
@@ -353,12 +353,21 @@ class BusinessReport:
353
353
  """Return a structured multi-section markdown report."""
354
354
  base = _render_full_report(self.to_dict())
355
355
  if self._include_appendix:
356
+ appendix_text = None
356
357
  try:
357
358
  appendix = self._results.summary()
358
- except Exception: # noqa: BLE001
359
- appendix = None
360
- if appendix:
361
- base = base + "\n\n## Technical Appendix\n\n```\n" + str(appendix) + "\n```\n"
359
+ if appendix:
360
+ appendix_text = str(appendix)
361
+ except Exception as exc: # noqa: BLE001
362
+ appendix_error = type(exc).__name__ or "Exception"
363
+ base = (
364
+ base
365
+ + "\n\n## Technical Appendix\n\n"
366
+ + "Technical appendix unavailable: estimator summary rendering failed "
367
+ + f"({appendix_error}).\n"
368
+ )
369
+ if appendix_text:
370
+ base = base + "\n\n## Technical Appendix\n\n```\n" + appendix_text + "\n```\n"
362
371
  return base
363
372
 
364
373
  def export_markdown(self) -> str:
@@ -1,9 +1,14 @@
1
1
  """
2
2
  de Chaisemartin-D'Haultfoeuille (dCDH) estimator for reversible-treatment DiD.
3
3
 
4
- The dCDH estimator is the only modern DiD estimator in the diff-diff library
5
- that handles **non-absorbing (reversible) treatments** — treatment can switch
6
- on AND off over time. All other staggered estimators in the library
4
+ The dCDH estimator is the most general DiD estimator in the diff-diff library
5
+ for **non-absorbing (reversible) treatments** — treatment can switch on AND off
6
+ over time, switcher vs non-switcher comparisons are its primitive object, and it
7
+ allows dynamic (carryover) effects with explicit joiner/leaver (``DID_+`` /
8
+ ``DID_-``) decomposition. ``LPDiD`` (``non_absorbing="first_entry"`` /
9
+ ``"effect_stabilization"``) and ``TROP`` (``non_absorbing=True``, under a
10
+ no-dynamic-effects assumption) also accept non-absorbing treatment under stronger
11
+ assumptions. The remaining staggered estimators in the library
7
12
  (``CallawaySantAnna``, ``SunAbraham``, ``ImputationDiD``, ``TwoStageDiD``,
8
13
  ``EfficientDiD``, ``WooldridgeDiD``) assume treatment is absorbing.
9
14
 
@@ -354,9 +359,11 @@ class ChaisemartinDHaultfoeuille(ChaisemartinDHaultfoeuilleBootstrapMixin):
354
359
  """
355
360
  de Chaisemartin-D'Haultfoeuille (dCDH) estimator.
356
361
 
357
- The only modern DiD estimator in the library that handles **reversible
358
- (non-absorbing) treatments** - treatment may switch on AND off over
359
- time. Computes the contemporaneous-switch DiD ``DID_M`` from the
362
+ The most general library estimator for **reversible (non-absorbing)
363
+ treatments** - treatment may switch on AND off over time, with explicit
364
+ joiner/leaver (``DID_+`` / ``DID_-``) decomposition (``LPDiD`` and ``TROP``
365
+ also support non-absorbing treatment under stronger assumptions; see their
366
+ ``non_absorbing`` parameters). Computes the contemporaneous-switch DiD ``DID_M`` from the
360
367
  AER 2020 paper (equivalently ``DID_1`` at horizon ``l = 1`` of the
361
368
  dynamic companion paper, NBER WP 29873) plus the full multi-horizon
362
369
  event study ``DID_l`` for ``l = 1..L_max`` via the ``L_max`` parameter
@@ -4,9 +4,11 @@ Result containers for the de Chaisemartin-D'Haultfoeuille (dCDH) estimator.
4
4
  This module contains ``ChaisemartinDHaultfoeuilleResults`` and
5
5
  ``DCDHBootstrapResults`` dataclasses produced by the
6
6
  ``ChaisemartinDHaultfoeuille`` (alias ``DCDH``) estimator. The dCDH
7
- estimator is the only modern DiD estimator in the library that handles
8
- non-absorbing (reversible) treatments. Phase 1 ships the contemporaneous-
9
- switch case ``DID_M`` (= ``DID_1`` of the dynamic companion paper).
7
+ estimator is the most general library estimator for non-absorbing
8
+ (reversible) treatments (``LPDiD`` and ``TROP`` also support non-absorbing
9
+ treatment under stronger assumptions; see their ``non_absorbing`` parameters).
10
+ Phase 1 ships the contemporaneous-switch case ``DID_M`` (= ``DID_1`` of the
11
+ dynamic companion paper).
10
12
 
11
13
  References
12
14
  ----------
@@ -1104,16 +1104,37 @@ def _compute_conley_vcov(
1104
1104
  _conley_sparse=_conley_sparse,
1105
1105
  )
1106
1106
 
1107
- # Sandwich via two solves (mirrors _compute_cr2_bm pattern in linalg.py)
1108
- try:
1109
- temp = np.linalg.solve(bread_matrix, meat)
1110
- vcov = np.linalg.solve(bread_matrix, temp.T).T
1111
- except np.linalg.LinAlgError as e:
1112
- if "Singular" in str(e):
1113
- raise ValueError(
1114
- "Design matrix is rank-deficient (singular X'X matrix). "
1115
- "Cannot compute Conley spatial HAC variance."
1116
- ) from e
1117
- raise
1107
+ # Sandwich via the shared rank-guarded inverse of the design Gram.
1108
+ # np.linalg.solve only raises on an *exactly* singular bread, so a *near*-
1109
+ # singular X'WX would otherwise flow a garbage inverse (~1e13) straight into
1110
+ # the spatial-HAC variance. `_rank_guarded_inv` truncates redundant
1111
+ # directions on the equilibrated Gram -> a finite SE on the identified
1112
+ # subspace (NaN only at rank 0), matching the covariate IF rank-guard and the
1113
+ # other structural bread inversions (ContinuousDiD / TwoStageDiD /
1114
+ # SpilloverDiD). Lazy import: `linalg` imports this module, so a top-level
1115
+ # `from diff_diff.linalg import ...` would be circular; resolving at call time
1116
+ # is safe (linalg is already loaded by the time this runs).
1117
+ from diff_diff.linalg import _rank_guarded_inv
1118
+
1119
+ bread_inv, n_dropped, _, dropped = _rank_guarded_inv(bread_matrix, return_dropped=True)
1120
+ if n_dropped:
1121
+ warnings.warn(
1122
+ "Conley spatial HAC variance: the design Gram (X'WX) is "
1123
+ f"rank-deficient ({n_dropped} redundant direction(s) dropped); "
1124
+ "rank-reducing to a finite SE on the identified subspace "
1125
+ "(NaN if rank 0). This usually indicates collinear regressors.",
1126
+ UserWarning,
1127
+ stacklevel=2,
1128
+ )
1129
+ # vcov = bread^{-1} @ meat @ bread^{-1}; algebraically identical to the prior
1130
+ # two symmetric solves given `bread` symmetric (holds for any meat).
1131
+ vcov = bread_inv @ meat @ bread_inv
1132
+ # A dropped (unidentified) coefficient is zero-filled in bread_inv, which would
1133
+ # otherwise report se=0 for that named coefficient. NaN its row/col in the
1134
+ # FINAL vcov so per-coefficient SE extraction yields NaN (not 0) for the
1135
+ # unidentified directions, while the identified coefficients stay finite.
1136
+ if dropped.any():
1137
+ vcov[dropped, :] = np.nan
1138
+ vcov[:, dropped] = np.nan
1118
1139
 
1119
1140
  return vcov
@@ -29,7 +29,7 @@ from diff_diff.continuous_did_results import (
29
29
  ContinuousDiDResults,
30
30
  DoseResponseCurve,
31
31
  )
32
- from diff_diff.linalg import solve_ols
32
+ from diff_diff.linalg import _rank_guarded_inv, solve_ols
33
33
  from diff_diff.survey import (
34
34
  ResolvedSurveyDesign,
35
35
  _resolve_survey_for_fit,
@@ -1047,21 +1047,32 @@ class ContinuousDiD:
1047
1047
 
1048
1048
  # Store bootstrap info for influence function computation
1049
1049
  # bread = (Psi'WPsi / n_treated)^{-1} when survey, (Psi'Psi / n_treated)^{-1} otherwise
1050
+ # Bread = (Psi'WPsi / mass)^{-1} via the shared rank-guarded inverse:
1051
+ # np.linalg.inv only raises on an *exactly* singular Gram, so a *near*-
1052
+ # singular B-spline design (clustered doses / near-duplicate knots)
1053
+ # previously returned a garbage inverse (~1e13) -> garbage SE. The prior
1054
+ # `pinv` fallback was both minimum-norm (not the column-drop / near-
1055
+ # collinear limit) and *silent*. `_rank_guarded_inv` truncates redundant
1056
+ # directions on the equilibrated Gram -> finite SE on the identified
1057
+ # subspace (NaN only at rank 0), matching the covariate IF rank-guard.
1050
1058
  if w_treated is not None:
1051
1059
  w_treated_sum = float(np.sum(w_treated))
1052
1060
  PtWP = Psi.T @ (Psi * w_treated[:, np.newaxis])
1053
1061
  # Normalize bread by weighted mass (not raw count) for consistency
1054
1062
  # with downstream IF score denominators that also use weighted mass
1055
- try:
1056
- bread = np.linalg.inv(PtWP / w_treated_sum)
1057
- except np.linalg.LinAlgError:
1058
- bread = np.linalg.pinv(PtWP / w_treated_sum)
1063
+ bread, n_dropped, _ = _rank_guarded_inv(PtWP / w_treated_sum)
1059
1064
  else:
1060
1065
  PtP = Psi.T @ Psi
1061
- try:
1062
- bread = np.linalg.inv(PtP / n_treated)
1063
- except np.linalg.LinAlgError:
1064
- bread = np.linalg.pinv(PtP / n_treated)
1066
+ bread, n_dropped, _ = _rank_guarded_inv(PtP / n_treated)
1067
+ if n_dropped:
1068
+ warnings.warn(
1069
+ "ContinuousDiD ACRT variance: the B-spline design Gram is "
1070
+ f"rank-deficient ({n_dropped} redundant direction(s) dropped); "
1071
+ "rank-reducing to a finite SE on the identified subspace. "
1072
+ "Analytical SEs reflect the reduced rank (NaN if rank 0).",
1073
+ UserWarning,
1074
+ stacklevel=2,
1075
+ )
1065
1076
 
1066
1077
  # ee_treated: per-unit estimating equation vectors (K-vector per unit)
1067
1078
  # For WLS (survey weights), the score is w_i * X_i * u_i to match the
@@ -19,7 +19,7 @@ import pandas as pd
19
19
 
20
20
  from diff_diff.estimators import DifferenceInDifferences
21
21
  from diff_diff.results import _get_significance_stars
22
- from diff_diff.utils import safe_inference
22
+ from diff_diff.utils import safe_inference, validate_binary
23
23
 
24
24
 
25
25
  @dataclass
@@ -228,7 +228,7 @@ def run_placebo_test(
228
228
  test_type : str, default="fake_timing"
229
229
  Type of placebo test:
230
230
  - "fake_timing": Assign treatment at a fake (earlier) time period
231
- - "fake_group": Run DiD designating some control units as "fake treated"
231
+ - "fake_group": Designate control units as "fake treated" (real-treated units, per the ``treatment`` column, are dropped first)
232
232
  - "permutation": Randomly reassign treatment and compute distribution
233
233
  - "leave_one_out": Drop each treated unit and re-estimate
234
234
  fake_treatment_period : any, optional
@@ -313,6 +313,7 @@ def run_placebo_test(
313
313
  fake_treated_units=fake_treatment_group,
314
314
  post_periods=post_periods,
315
315
  alpha=alpha,
316
+ treatment=treatment,
316
317
  **estimator_kwargs,
317
318
  )
318
319
 
@@ -445,14 +446,20 @@ def placebo_group_test(
445
446
  fake_treated_units: List[Any],
446
447
  post_periods: Optional[List[Any]] = None,
447
448
  alpha: float = 0.05,
449
+ treatment: Optional[str] = None,
448
450
  **estimator_kwargs,
449
451
  ) -> PlaceboTestResults:
450
452
  """
451
- Test for differential trends among never-treated units.
453
+ Test for differential trends by designating control units as "fake treated".
454
+
455
+ Designates ``fake_treated_units`` as fake-treated and estimates a DiD on the
456
+ resulting panel. A significant effect suggests heterogeneous trends in the
457
+ control group (a parallel-trends red flag).
452
458
 
453
- Assigns some never-treated units as "fake treated" and estimates a
454
- DiD model using only never-treated data. A significant effect suggests
455
- heterogeneous trends in the control group.
459
+ If ``treatment`` is provided, units that are *ever* really treated are dropped
460
+ first, so the placebo runs on never-treated units only (the recommended,
461
+ uncontaminated design). If ``treatment`` is ``None``, the test runs on whatever
462
+ data is supplied, so the caller must pass control-only data for a valid placebo.
456
463
 
457
464
  Parameters
458
465
  ----------
@@ -470,6 +477,11 @@ def placebo_group_test(
470
477
  List of post-treatment period values.
471
478
  alpha : float, default=0.05
472
479
  Significance level.
480
+ treatment : str, optional
481
+ Real treatment-indicator column. When given, units that are ever
482
+ real-treated (``data.groupby(unit)[treatment].max() == 1``) are dropped
483
+ before the placebo, so it runs on never-treated units only. When ``None``
484
+ (default), no filtering is done and the caller must pass control-only data.
473
485
  **estimator_kwargs
474
486
  Arguments passed to DifferenceInDifferences.
475
487
 
@@ -481,7 +493,35 @@ def placebo_group_test(
481
493
  if fake_treated_units is None or len(fake_treated_units) == 0:
482
494
  raise ValueError("fake_treated_units must be a non-empty list")
483
495
 
484
- all_periods = sorted(data[time].unique())
496
+ fake_data = data.copy()
497
+
498
+ # Optionally restrict to never-treated units so the placebo is not contaminated
499
+ # by the real treatment effect (the BDM 2004 placebo-law design on controls).
500
+ if treatment is not None:
501
+ # Fail closed: a missing column or non-0/1 values would otherwise silently
502
+ # skip the ever-treated filter (groupby().max() drops NaN), running the
503
+ # placebo on contaminated data.
504
+ if treatment not in fake_data.columns:
505
+ raise ValueError(f"treatment column '{treatment}' not found in data")
506
+ if fake_data[treatment].isna().any():
507
+ raise ValueError(f"treatment column '{treatment}' contains missing values")
508
+ validate_binary(fake_data[treatment].to_numpy(), "treatment")
509
+ ever_treated = fake_data.groupby(unit)[treatment].max()
510
+ ever_treated_units = set(ever_treated[ever_treated == 1].index)
511
+ misused = [u for u in fake_treated_units if u in ever_treated_units]
512
+ if misused:
513
+ import warnings
514
+
515
+ warnings.warn(
516
+ f"{len(misused)} of fake_treated_units are themselves ever real-treated "
517
+ f"and will be dropped with the other real-treated units: {misused}. "
518
+ f"Pass only never-treated units as fake_treated_units for a valid placebo.",
519
+ UserWarning,
520
+ stacklevel=2,
521
+ )
522
+ fake_data = fake_data[~fake_data[unit].isin(ever_treated_units)].copy()
523
+
524
+ all_periods = sorted(fake_data[time].unique())
485
525
 
486
526
  # Infer post periods if not provided
487
527
  if post_periods is None:
@@ -489,14 +529,31 @@ def placebo_group_test(
489
529
  post_periods = all_periods[mid:]
490
530
 
491
531
  # Create fake treatment indicator
492
- fake_data = data.copy()
493
532
  fake_data["_fake_treated"] = fake_data[unit].isin(fake_treated_units).astype(int)
494
533
  fake_data["_post"] = fake_data[time].isin(post_periods).astype(int)
495
534
 
535
+ # Guard degenerate designs (e.g., all fake_treated_units were dropped as
536
+ # real-treated, or no controls remain) before they surface as a cryptic
537
+ # LinAlgError inside the estimator.
538
+ if fake_data["_fake_treated"].sum() == 0:
539
+ raise ValueError(
540
+ "No fake-treated observations remain (all fake_treated_units were "
541
+ "dropped as real-treated, or are absent from the data). Pass "
542
+ "never-treated units as fake_treated_units."
543
+ )
544
+ if (fake_data["_fake_treated"] == 0).sum() == 0:
545
+ raise ValueError("No control (non-fake-treated) units remain for the placebo comparison.")
546
+
496
547
  # Fit DiD
497
548
  did = DifferenceInDifferences(**estimator_kwargs)
498
549
  results = did.fit(fake_data, outcome=outcome, treatment="_fake_treated", time="_post")
499
550
 
551
+ # Record the fake-treated units actually used (after any never-treated
552
+ # filtering), not just the originally requested list, to avoid metadata drift.
553
+ # Preserve the caller's order (sorting could raise TypeError on mixed-type IDs).
554
+ retained = set(fake_data.loc[fake_data["_fake_treated"] == 1, unit].unique())
555
+ used_fake_treated = [u for u in fake_treated_units if u in retained]
556
+
500
557
  return PlaceboTestResults(
501
558
  test_type="fake_group",
502
559
  placebo_effect=results.att,
@@ -507,7 +564,7 @@ def placebo_group_test(
507
564
  n_obs=results.n_obs,
508
565
  is_significant=bool(results.p_value < alpha),
509
566
  alpha=alpha,
510
- fake_group=list(fake_treated_units),
567
+ fake_group=used_fake_treated,
511
568
  )
512
569
 
513
570
 
@@ -526,8 +583,12 @@ def permutation_test(
526
583
  Compute permutation-based p-value for DiD estimate.
527
584
 
528
585
  Randomly reassigns treatment status at the unit level and computes the
529
- DiD estimate for each permutation. The p-value is the proportion of
530
- permuted estimates at least as extreme as the original.
586
+ DiD estimate for each permutation. The p-value is the randomization-inference
587
+ value ``(1 + count) / (B + 1)`` (Phipson & Smyth 2010), where ``count`` is the
588
+ number of permuted estimates at least as extreme as the observed and ``B`` is
589
+ the number of valid permutations. With ``B`` sampled permutations this is a
590
+ Monte-Carlo approximation that converges to the exact full-enumeration value
591
+ ``count / total`` as ``B`` grows.
531
592
 
532
593
  Parameters
533
594
  ----------
@@ -557,8 +618,17 @@ def permutation_test(
557
618
 
558
619
  Notes
559
620
  -----
560
- The permutation test is exact and does not rely on asymptotic
561
- approximations, making it valid with any sample size.
621
+ This is a randomization-inference (permutation) test of the sharp null of no
622
+ effect for any unit; it does not rely on asymptotic approximations. Treatment
623
+ assignments are drawn independently each iteration (Monte-Carlo sampling *with
624
+ replacement* from the assignment space), so the reported p-value
625
+ ``(1 + count) / (B + 1)`` (Phipson & Smyth 2010) is a **valid but slightly
626
+ conservative** estimator -- the ``+1`` adds the observed assignment and
627
+ prevents a zero p-value. Here ``count`` is the number of permutations at least
628
+ as extreme as the observed estimate and ``B`` is the number of valid
629
+ permutations. As ``B`` grows it converges to the *exact* p-value obtained by
630
+ full enumeration of all assignments (the R-parity reference). "Exact" is
631
+ reserved for that full enumeration; the sampled value approximates it.
562
632
  """
563
633
  rng = np.random.default_rng(seed)
564
634
 
@@ -620,11 +690,12 @@ def permutation_test(
620
690
  stacklevel=2,
621
691
  )
622
692
 
623
- # Compute p-value: proportion of |permuted| >= |original|
624
- p_value = np.mean(np.abs(valid_effects) >= np.abs(original_att))
625
-
626
- # Ensure p-value is at least 1/(n_permutations + 1)
627
- p_value = max(p_value, 1 / (len(valid_effects) + 1))
693
+ # Randomization-inference p-value (Phipson & Smyth 2010): include the observed
694
+ # statistic in both numerator and denominator. The 1/(B+1) floor is intrinsic
695
+ # (count == 0 -> 1/(B+1)), so no separate clamp is needed. With sampled
696
+ # permutations this converges to the exact full-enumeration value count/total.
697
+ count = int(np.sum(np.abs(valid_effects) >= np.abs(original_att)))
698
+ p_value = (1 + count) / (len(valid_effects) + 1)
628
699
 
629
700
  # Compute SE and CI from permutation distribution
630
701
  se = np.std(valid_effects, ddof=1)