diff-diff 3.5.2__tar.gz → 3.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-3.5.2 → diff_diff-3.6.0}/PKG-INFO +2 -1
- {diff_diff-3.5.2 → diff_diff-3.6.0}/README.md +1 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/__init__.py +8 -1
- diff_diff-3.6.0/diff_diff/bootstrap_chunking.py +185 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/conley.py +32 -11
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/continuous_did.py +20 -9
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/diagnostics.py +89 -18
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/efficient_did.py +9 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/efficient_did_bootstrap.py +77 -35
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/efficient_did_covariates.py +38 -3
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/estimators.py +97 -14
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/guides/llms-full.txt +59 -3
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/guides/llms.txt +2 -1
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/had.py +32 -22
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/imputation.py +154 -56
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/imputation_bootstrap.py +7 -2
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/linalg.py +253 -178
- diff_diff-3.6.0/diff_diff/lpdid.py +1205 -0
- diff_diff-3.6.0/diff_diff/lpdid_results.py +256 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/power.py +214 -9
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/prep.py +1 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/prep_dgp.py +217 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/results.py +10 -3
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/spillover.py +24 -13
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/staggered.py +89 -155
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/staggered_bootstrap.py +97 -65
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/staggered_triple_diff.py +18 -59
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/trop_global.py +27 -38
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/trop_local.py +83 -42
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/twfe.py +3 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/two_stage.py +88 -43
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/two_stage_bootstrap.py +124 -61
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/two_stage_results.py +4 -3
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/utils.py +368 -191
- {diff_diff-3.5.2 → diff_diff-3.6.0}/pyproject.toml +1 -1
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/Cargo.lock +56 -323
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/Cargo.toml +8 -7
- {diff_diff-3.5.2 → diff_diff-3.6.0}/LICENSE +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/_backend.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/_guides_api.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/_nprobust_port.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/_reporting_helpers.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/agent_workflow.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/bacon.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/balancing.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/bootstrap_utils.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/business_report.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/chaisemartin_dhaultfoeuille.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/chaisemartin_dhaultfoeuille_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/conformal.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/continuous_did_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/datasets.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/diagnostic_report.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/efficient_did_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/efficient_did_weights.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/guides/__init__.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/guides/llms-autonomous.txt +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/guides/llms-practitioner.txt +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/had_pretests.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/honest_did.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/imputation_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/local_linear.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/practitioner.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/pretrends.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/profile.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/stacked_did.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/stacked_did_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/staggered_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/staggered_triple_diff_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/sun_abraham.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/survey.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/synthetic_control.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/synthetic_control_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/triple_diff.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/trop.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/trop_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/__init__.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_common.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_continuous.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_diagnostic.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_event_study.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_power.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_staggered.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/visualization/_synthetic.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/wooldridge.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/diff_diff/wooldridge_results.py +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/build.rs +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/src/bootstrap.rs +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/src/lib.rs +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/src/linalg.rs +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/src/trop.rs +0 -0
- {diff_diff-3.5.2 → diff_diff-3.6.0}/rust/src/weights.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -170,6 +170,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
|
|
|
170
170
|
- [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
|
|
171
171
|
- [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
|
|
172
172
|
- [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
|
|
173
|
+
- [LPDiD](https://diff-diff.readthedocs.io/en/stable/api/lpdid.html) - Dube, Girardi, Jorda & Taylor (2025) Local Projections DiD: per-horizon long-difference event study on clean controls (no negative weighting), variance- or equally-weighted ATT, for absorbing treatment
|
|
173
174
|
- [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
|
|
174
175
|
|
|
175
176
|
## Diagnostics & Sensitivity
|
|
@@ -117,6 +117,7 @@ Full guide: `diff_diff.get_llm_guide("practitioner")`.
|
|
|
117
117
|
- [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
|
|
118
118
|
- [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
|
|
119
119
|
- [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
|
|
120
|
+
- [LPDiD](https://diff-diff.readthedocs.io/en/stable/api/lpdid.html) - Dube, Girardi, Jorda & Taylor (2025) Local Projections DiD: per-horizon long-difference event study on clean controls (no negative weighting), variance- or equally-weighted ATT, for absorbing treatment
|
|
120
121
|
- [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
|
|
121
122
|
|
|
122
123
|
## Diagnostics & Sensitivity
|
|
@@ -133,6 +133,7 @@ from diff_diff.prep import (
|
|
|
133
133
|
generate_staggered_data,
|
|
134
134
|
generate_staggered_ddd_data,
|
|
135
135
|
generate_survey_did_data,
|
|
136
|
+
generate_synthetic_control_data,
|
|
136
137
|
make_post_indicator,
|
|
137
138
|
make_treatment_indicator,
|
|
138
139
|
rank_control_units,
|
|
@@ -229,6 +230,8 @@ from diff_diff.synthetic_control import (
|
|
|
229
230
|
from diff_diff.synthetic_control_results import SyntheticControlResults
|
|
230
231
|
from diff_diff.wooldridge import WooldridgeDiD
|
|
231
232
|
from diff_diff.wooldridge_results import WooldridgeDiDResults
|
|
233
|
+
from diff_diff.lpdid import LPDiD
|
|
234
|
+
from diff_diff.lpdid_results import LPDiDResults
|
|
232
235
|
from diff_diff.utils import (
|
|
233
236
|
WildBootstrapResults,
|
|
234
237
|
check_parallel_trends,
|
|
@@ -298,7 +301,7 @@ ETWFE = WooldridgeDiD
|
|
|
298
301
|
DCDH = ChaisemartinDHaultfoeuille
|
|
299
302
|
HAD = HeterogeneousAdoptionDiD
|
|
300
303
|
|
|
301
|
-
__version__ = "3.
|
|
304
|
+
__version__ = "3.6.0"
|
|
302
305
|
__all__ = [
|
|
303
306
|
# Estimators
|
|
304
307
|
"DifferenceInDifferences",
|
|
@@ -380,6 +383,9 @@ __all__ = [
|
|
|
380
383
|
"WooldridgeDiD",
|
|
381
384
|
"WooldridgeDiDResults",
|
|
382
385
|
"ETWFE",
|
|
386
|
+
# LPDiD (Local Projections DiD)
|
|
387
|
+
"LPDiD",
|
|
388
|
+
"LPDiDResults",
|
|
383
389
|
# Visualization
|
|
384
390
|
"plot_bacon",
|
|
385
391
|
"plot_event_study",
|
|
@@ -426,6 +432,7 @@ __all__ = [
|
|
|
426
432
|
"generate_survey_did_data",
|
|
427
433
|
"generate_continuous_did_data",
|
|
428
434
|
"generate_reversible_did_data",
|
|
435
|
+
"generate_synthetic_control_data",
|
|
429
436
|
"create_event_time",
|
|
430
437
|
"aggregate_survey",
|
|
431
438
|
"aggregate_to_cohorts",
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Memory-bounded chunking for multiplier-bootstrap weight matrices.
|
|
2
|
+
|
|
3
|
+
The multiplier bootstrap perturbs cached influence functions with a dense
|
|
4
|
+
``(n_bootstrap, n_units)`` weight matrix. At large ``n_units`` that matrix
|
|
5
|
+
dominates peak memory (e.g. ``999 x 5_000_000 x 8`` bytes is ~40 GB). Every
|
|
6
|
+
consumer is a left-multiply ``weights @ influence_vector`` whose result is small
|
|
7
|
+
(``(n_bootstrap,)`` or ``(n_bootstrap, n_gt)``), so the bootstrap can be tiled
|
|
8
|
+
over the *draw* dimension: generate and consume the weights in row-blocks of
|
|
9
|
+
``B``, capping the live intermediate at ``(B, n_units)``. FLOPs are identical to
|
|
10
|
+
the un-chunked path -- only the draw axis is tiled. The generated weight stream
|
|
11
|
+
is *bit-identical* to the un-chunked matrix (see below); the downstream
|
|
12
|
+
``weights @ influence`` matmuls go through BLAS, whose reduction order depends on
|
|
13
|
+
the operand row-count, so the resulting statistics match the un-chunked path to
|
|
14
|
+
within floating-point reassociation (typically <~1 ULP), far below bootstrap
|
|
15
|
+
Monte-Carlo error -- not bit-for-bit.
|
|
16
|
+
|
|
17
|
+
Bit-identity of the weight *generation* is preserved on **both** backends:
|
|
18
|
+
|
|
19
|
+
- **Rust** seeds each row absolutely as ``base_seed + row_index``
|
|
20
|
+
(``rust/src/bootstrap.rs``), so calling the generator per block with base seed
|
|
21
|
+
``base_seed + chunk_start`` reproduces the exact un-chunked rows. Exactly one
|
|
22
|
+
``rng.integers`` draw is consumed, matching the un-chunked wrapper.
|
|
23
|
+
- The **NumPy** fallback draws the matrix row-major from the ``Generator``
|
|
24
|
+
stream, so consuming it in contiguous, in-order blocks from the same generator
|
|
25
|
+
reproduces the identical sequence.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from typing import Iterator, Optional, Tuple
|
|
31
|
+
|
|
32
|
+
import numpy as np
|
|
33
|
+
|
|
34
|
+
from diff_diff._backend import HAS_RUST_BACKEND, _rust_bootstrap_weights
|
|
35
|
+
from diff_diff.bootstrap_utils import generate_bootstrap_weights_batch_numpy
|
|
36
|
+
|
|
37
|
+
# Byte ceiling for a single ``(B, n_units)`` float64 weight block. 256 MB keeps
|
|
38
|
+
# the live intermediate small at millions of units while staying large enough
|
|
39
|
+
# that the per-block matmuls remain BLAS-efficient and chunk overhead (a handful
|
|
40
|
+
# of extra Python iterations / FFI calls) is negligible.
|
|
41
|
+
_TARGET_BLOCK_BYTES = 256 * 1024 * 1024
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def compute_block_size(
|
|
45
|
+
n_units: int, n_bootstrap: int, target_bytes: int = _TARGET_BLOCK_BYTES
|
|
46
|
+
) -> int:
|
|
47
|
+
"""Number of bootstrap rows per block so a ``(B, n_units)`` float64 block
|
|
48
|
+
stays under ``target_bytes``. Always in ``[1, n_bootstrap]``."""
|
|
49
|
+
if n_units <= 0:
|
|
50
|
+
return max(1, n_bootstrap)
|
|
51
|
+
b = target_bytes // (n_units * 8)
|
|
52
|
+
return int(max(1, min(max(1, n_bootstrap), b)))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def iter_weight_blocks(
|
|
56
|
+
n_bootstrap: int,
|
|
57
|
+
n_gen: int,
|
|
58
|
+
weight_type: str,
|
|
59
|
+
rng: np.random.Generator,
|
|
60
|
+
*,
|
|
61
|
+
expand_index: Optional[np.ndarray] = None,
|
|
62
|
+
block_size: Optional[int] = None,
|
|
63
|
+
) -> Iterator[Tuple[int, np.ndarray]]:
|
|
64
|
+
"""Yield ``(chunk_start, block)`` pairs covering all ``n_bootstrap`` draws.
|
|
65
|
+
|
|
66
|
+
``block`` has shape ``(B, width)`` where ``width = len(expand_index)`` when
|
|
67
|
+
``expand_index`` is given, else ``n_gen``. Weights are generated at width
|
|
68
|
+
``n_gen`` (unit / cluster / PSU level) and, when ``expand_index`` is given,
|
|
69
|
+
expanded to unit level via ``block[:, expand_index]`` (cluster->unit or
|
|
70
|
+
PSU->unit fan-out). The concatenation of all yielded blocks is bit-identical
|
|
71
|
+
to a single ``generate_bootstrap_weights_batch(n_bootstrap, n_gen, ...)``
|
|
72
|
+
followed by the same expansion.
|
|
73
|
+
|
|
74
|
+
Generation is in-order and stateful on ``rng`` (NumPy fallback) -- the caller
|
|
75
|
+
must consume the iterator sequentially, which the chunk loop does.
|
|
76
|
+
"""
|
|
77
|
+
width = n_gen if expand_index is None else int(len(expand_index))
|
|
78
|
+
if block_size is None:
|
|
79
|
+
block_size = compute_block_size(width, n_bootstrap)
|
|
80
|
+
if block_size < 1:
|
|
81
|
+
raise ValueError(f"block_size must be >= 1, got {block_size}")
|
|
82
|
+
|
|
83
|
+
rust_gen = (
|
|
84
|
+
_rust_bootstrap_weights
|
|
85
|
+
if (HAS_RUST_BACKEND and _rust_bootstrap_weights is not None)
|
|
86
|
+
else None
|
|
87
|
+
)
|
|
88
|
+
# Draw exactly one base seed (matching the un-chunked Rust wrapper); the
|
|
89
|
+
# NumPy fallback consumes the rng stream directly per block instead.
|
|
90
|
+
base_seed = int(rng.integers(0, 2**63 - 1)) if rust_gen is not None else 0
|
|
91
|
+
|
|
92
|
+
for chunk_start in range(0, n_bootstrap, block_size):
|
|
93
|
+
rows = min(block_size, n_bootstrap - chunk_start)
|
|
94
|
+
if rust_gen is not None:
|
|
95
|
+
block = rust_gen(rows, n_gen, weight_type, base_seed + chunk_start)
|
|
96
|
+
else:
|
|
97
|
+
block = generate_bootstrap_weights_batch_numpy(rows, n_gen, weight_type, rng)
|
|
98
|
+
if expand_index is not None:
|
|
99
|
+
block = block[:, expand_index]
|
|
100
|
+
yield chunk_start, block
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def iter_survey_multiplier_weight_blocks(
|
|
104
|
+
n_bootstrap: int,
|
|
105
|
+
resolved_survey: object,
|
|
106
|
+
weight_type: str,
|
|
107
|
+
rng: np.random.Generator,
|
|
108
|
+
*,
|
|
109
|
+
block_size: int,
|
|
110
|
+
) -> Tuple[np.ndarray, Iterator[Tuple[int, np.ndarray]]]:
|
|
111
|
+
"""Chunked PSU-level multiplier weights for the survey-aware bootstrap.
|
|
112
|
+
|
|
113
|
+
Returns ``(psu_ids, blocks)`` where ``blocks`` yields
|
|
114
|
+
``(chunk_start, (B, n_psu))`` PSU-weight blocks covering all draws.
|
|
115
|
+
|
|
116
|
+
For UNSTRATIFIED designs (``strata is None``, ``n_psu >= 2``) the
|
|
117
|
+
``(n_bootstrap, n_psu)`` matrix is generated one draw-block at a time via
|
|
118
|
+
:func:`iter_weight_blocks` plus the unstratified FPC scalar -- bit-identical
|
|
119
|
+
to the unstratified branch of
|
|
120
|
+
:func:`diff_diff.bootstrap_utils.generate_survey_multiplier_weights_batch`,
|
|
121
|
+
but the full matrix is never materialized. This is the path taken by
|
|
122
|
+
``cluster="unit"`` (each unit its own PSU, ``n_psu == n_units``), the case
|
|
123
|
+
that otherwise dominates bootstrap memory at large n_units.
|
|
124
|
+
|
|
125
|
+
Stratified designs (and the ``n_psu < 2`` degenerate case) fall back to full
|
|
126
|
+
generation + sliced blocks: per-stratum / lonely-PSU generation is not tiled
|
|
127
|
+
here, but stratified designs have few PSUs so the full matrix is small.
|
|
128
|
+
"""
|
|
129
|
+
from diff_diff.bootstrap_utils import generate_survey_multiplier_weights_batch
|
|
130
|
+
|
|
131
|
+
if block_size < 1:
|
|
132
|
+
raise ValueError(f"block_size must be >= 1, got {block_size}")
|
|
133
|
+
|
|
134
|
+
psu = getattr(resolved_survey, "psu", None)
|
|
135
|
+
strata = getattr(resolved_survey, "strata", None)
|
|
136
|
+
if psu is None:
|
|
137
|
+
n_psu = len(resolved_survey.weights) # type: ignore[attr-defined]
|
|
138
|
+
psu_ids = np.arange(n_psu)
|
|
139
|
+
else:
|
|
140
|
+
psu_ids = np.unique(psu)
|
|
141
|
+
n_psu = len(psu_ids)
|
|
142
|
+
|
|
143
|
+
if strata is not None or n_psu < 2:
|
|
144
|
+
# Stratified or degenerate single-PSU: full generation (small here).
|
|
145
|
+
weights, psu_ids = generate_survey_multiplier_weights_batch(
|
|
146
|
+
n_bootstrap, resolved_survey, weight_type, rng
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def _sliced() -> Iterator[Tuple[int, np.ndarray]]:
|
|
150
|
+
for chunk_start in range(0, n_bootstrap, block_size):
|
|
151
|
+
yield chunk_start, weights[chunk_start : chunk_start + block_size]
|
|
152
|
+
|
|
153
|
+
return psu_ids, _sliced()
|
|
154
|
+
|
|
155
|
+
# Unstratified, n_psu >= 2: tile the generation over draws. Mirror the
|
|
156
|
+
# unstratified FPC scaling from generate_survey_multiplier_weights_batch.
|
|
157
|
+
fpc = getattr(resolved_survey, "fpc", None)
|
|
158
|
+
fpc_scale = 1.0
|
|
159
|
+
fpc_zero = False
|
|
160
|
+
if fpc is not None:
|
|
161
|
+
# psu=None already sets n_psu = len(weights), so n_units_for_fpc == n_psu
|
|
162
|
+
# on both branches of the original generator.
|
|
163
|
+
n_units_for_fpc = n_psu
|
|
164
|
+
if fpc[0] < n_units_for_fpc:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"FPC ({fpc[0]}) is less than the number of PSUs "
|
|
167
|
+
f"({n_units_for_fpc}). FPC must be >= number of PSUs."
|
|
168
|
+
)
|
|
169
|
+
f = n_units_for_fpc / fpc[0]
|
|
170
|
+
if f < 1.0:
|
|
171
|
+
fpc_scale = float(np.sqrt(1.0 - f))
|
|
172
|
+
else:
|
|
173
|
+
fpc_zero = True
|
|
174
|
+
|
|
175
|
+
def _generated() -> Iterator[Tuple[int, np.ndarray]]:
|
|
176
|
+
for chunk_start, block in iter_weight_blocks(
|
|
177
|
+
n_bootstrap, n_psu, weight_type, rng, block_size=block_size
|
|
178
|
+
):
|
|
179
|
+
if fpc_zero:
|
|
180
|
+
block = np.zeros_like(block)
|
|
181
|
+
elif fpc_scale != 1.0:
|
|
182
|
+
block = block * fpc_scale
|
|
183
|
+
yield chunk_start, block
|
|
184
|
+
|
|
185
|
+
return psu_ids, _generated()
|
|
@@ -1104,16 +1104,37 @@ def _compute_conley_vcov(
|
|
|
1104
1104
|
_conley_sparse=_conley_sparse,
|
|
1105
1105
|
)
|
|
1106
1106
|
|
|
1107
|
-
# Sandwich via
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1107
|
+
# Sandwich via the shared rank-guarded inverse of the design Gram.
|
|
1108
|
+
# np.linalg.solve only raises on an *exactly* singular bread, so a *near*-
|
|
1109
|
+
# singular X'WX would otherwise flow a garbage inverse (~1e13) straight into
|
|
1110
|
+
# the spatial-HAC variance. `_rank_guarded_inv` truncates redundant
|
|
1111
|
+
# directions on the equilibrated Gram -> a finite SE on the identified
|
|
1112
|
+
# subspace (NaN only at rank 0), matching the covariate IF rank-guard and the
|
|
1113
|
+
# other structural bread inversions (ContinuousDiD / TwoStageDiD /
|
|
1114
|
+
# SpilloverDiD). Lazy import: `linalg` imports this module, so a top-level
|
|
1115
|
+
# `from diff_diff.linalg import ...` would be circular; resolving at call time
|
|
1116
|
+
# is safe (linalg is already loaded by the time this runs).
|
|
1117
|
+
from diff_diff.linalg import _rank_guarded_inv
|
|
1118
|
+
|
|
1119
|
+
bread_inv, n_dropped, _, dropped = _rank_guarded_inv(bread_matrix, return_dropped=True)
|
|
1120
|
+
if n_dropped:
|
|
1121
|
+
warnings.warn(
|
|
1122
|
+
"Conley spatial HAC variance: the design Gram (X'WX) is "
|
|
1123
|
+
f"rank-deficient ({n_dropped} redundant direction(s) dropped); "
|
|
1124
|
+
"rank-reducing to a finite SE on the identified subspace "
|
|
1125
|
+
"(NaN if rank 0). This usually indicates collinear regressors.",
|
|
1126
|
+
UserWarning,
|
|
1127
|
+
stacklevel=2,
|
|
1128
|
+
)
|
|
1129
|
+
# vcov = bread^{-1} @ meat @ bread^{-1}; algebraically identical to the prior
|
|
1130
|
+
# two symmetric solves given `bread` symmetric (holds for any meat).
|
|
1131
|
+
vcov = bread_inv @ meat @ bread_inv
|
|
1132
|
+
# A dropped (unidentified) coefficient is zero-filled in bread_inv, which would
|
|
1133
|
+
# otherwise report se=0 for that named coefficient. NaN its row/col in the
|
|
1134
|
+
# FINAL vcov so per-coefficient SE extraction yields NaN (not 0) for the
|
|
1135
|
+
# unidentified directions, while the identified coefficients stay finite.
|
|
1136
|
+
if dropped.any():
|
|
1137
|
+
vcov[dropped, :] = np.nan
|
|
1138
|
+
vcov[:, dropped] = np.nan
|
|
1118
1139
|
|
|
1119
1140
|
return vcov
|
|
@@ -29,7 +29,7 @@ from diff_diff.continuous_did_results import (
|
|
|
29
29
|
ContinuousDiDResults,
|
|
30
30
|
DoseResponseCurve,
|
|
31
31
|
)
|
|
32
|
-
from diff_diff.linalg import solve_ols
|
|
32
|
+
from diff_diff.linalg import _rank_guarded_inv, solve_ols
|
|
33
33
|
from diff_diff.survey import (
|
|
34
34
|
ResolvedSurveyDesign,
|
|
35
35
|
_resolve_survey_for_fit,
|
|
@@ -1047,21 +1047,32 @@ class ContinuousDiD:
|
|
|
1047
1047
|
|
|
1048
1048
|
# Store bootstrap info for influence function computation
|
|
1049
1049
|
# bread = (Psi'WPsi / n_treated)^{-1} when survey, (Psi'Psi / n_treated)^{-1} otherwise
|
|
1050
|
+
# Bread = (Psi'WPsi / mass)^{-1} via the shared rank-guarded inverse:
|
|
1051
|
+
# np.linalg.inv only raises on an *exactly* singular Gram, so a *near*-
|
|
1052
|
+
# singular B-spline design (clustered doses / near-duplicate knots)
|
|
1053
|
+
# previously returned a garbage inverse (~1e13) -> garbage SE. The prior
|
|
1054
|
+
# `pinv` fallback was both minimum-norm (not the column-drop / near-
|
|
1055
|
+
# collinear limit) and *silent*. `_rank_guarded_inv` truncates redundant
|
|
1056
|
+
# directions on the equilibrated Gram -> finite SE on the identified
|
|
1057
|
+
# subspace (NaN only at rank 0), matching the covariate IF rank-guard.
|
|
1050
1058
|
if w_treated is not None:
|
|
1051
1059
|
w_treated_sum = float(np.sum(w_treated))
|
|
1052
1060
|
PtWP = Psi.T @ (Psi * w_treated[:, np.newaxis])
|
|
1053
1061
|
# Normalize bread by weighted mass (not raw count) for consistency
|
|
1054
1062
|
# with downstream IF score denominators that also use weighted mass
|
|
1055
|
-
|
|
1056
|
-
bread = np.linalg.inv(PtWP / w_treated_sum)
|
|
1057
|
-
except np.linalg.LinAlgError:
|
|
1058
|
-
bread = np.linalg.pinv(PtWP / w_treated_sum)
|
|
1063
|
+
bread, n_dropped, _ = _rank_guarded_inv(PtWP / w_treated_sum)
|
|
1059
1064
|
else:
|
|
1060
1065
|
PtP = Psi.T @ Psi
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1066
|
+
bread, n_dropped, _ = _rank_guarded_inv(PtP / n_treated)
|
|
1067
|
+
if n_dropped:
|
|
1068
|
+
warnings.warn(
|
|
1069
|
+
"ContinuousDiD ACRT variance: the B-spline design Gram is "
|
|
1070
|
+
f"rank-deficient ({n_dropped} redundant direction(s) dropped); "
|
|
1071
|
+
"rank-reducing to a finite SE on the identified subspace. "
|
|
1072
|
+
"Analytical SEs reflect the reduced rank (NaN if rank 0).",
|
|
1073
|
+
UserWarning,
|
|
1074
|
+
stacklevel=2,
|
|
1075
|
+
)
|
|
1065
1076
|
|
|
1066
1077
|
# ee_treated: per-unit estimating equation vectors (K-vector per unit)
|
|
1067
1078
|
# For WLS (survey weights), the score is w_i * X_i * u_i to match the
|
|
@@ -19,7 +19,7 @@ import pandas as pd
|
|
|
19
19
|
|
|
20
20
|
from diff_diff.estimators import DifferenceInDifferences
|
|
21
21
|
from diff_diff.results import _get_significance_stars
|
|
22
|
-
from diff_diff.utils import safe_inference
|
|
22
|
+
from diff_diff.utils import safe_inference, validate_binary
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@dataclass
|
|
@@ -228,7 +228,7 @@ def run_placebo_test(
|
|
|
228
228
|
test_type : str, default="fake_timing"
|
|
229
229
|
Type of placebo test:
|
|
230
230
|
- "fake_timing": Assign treatment at a fake (earlier) time period
|
|
231
|
-
- "fake_group":
|
|
231
|
+
- "fake_group": Designate control units as "fake treated" (real-treated units, per the ``treatment`` column, are dropped first)
|
|
232
232
|
- "permutation": Randomly reassign treatment and compute distribution
|
|
233
233
|
- "leave_one_out": Drop each treated unit and re-estimate
|
|
234
234
|
fake_treatment_period : any, optional
|
|
@@ -313,6 +313,7 @@ def run_placebo_test(
|
|
|
313
313
|
fake_treated_units=fake_treatment_group,
|
|
314
314
|
post_periods=post_periods,
|
|
315
315
|
alpha=alpha,
|
|
316
|
+
treatment=treatment,
|
|
316
317
|
**estimator_kwargs,
|
|
317
318
|
)
|
|
318
319
|
|
|
@@ -445,14 +446,20 @@ def placebo_group_test(
|
|
|
445
446
|
fake_treated_units: List[Any],
|
|
446
447
|
post_periods: Optional[List[Any]] = None,
|
|
447
448
|
alpha: float = 0.05,
|
|
449
|
+
treatment: Optional[str] = None,
|
|
448
450
|
**estimator_kwargs,
|
|
449
451
|
) -> PlaceboTestResults:
|
|
450
452
|
"""
|
|
451
|
-
Test for differential trends
|
|
453
|
+
Test for differential trends by designating control units as "fake treated".
|
|
454
|
+
|
|
455
|
+
Designates ``fake_treated_units`` as fake-treated and estimates a DiD on the
|
|
456
|
+
resulting panel. A significant effect suggests heterogeneous trends in the
|
|
457
|
+
control group (a parallel-trends red flag).
|
|
452
458
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
459
|
+
If ``treatment`` is provided, units that are *ever* really treated are dropped
|
|
460
|
+
first, so the placebo runs on never-treated units only (the recommended,
|
|
461
|
+
uncontaminated design). If ``treatment`` is ``None``, the test runs on whatever
|
|
462
|
+
data is supplied, so the caller must pass control-only data for a valid placebo.
|
|
456
463
|
|
|
457
464
|
Parameters
|
|
458
465
|
----------
|
|
@@ -470,6 +477,11 @@ def placebo_group_test(
|
|
|
470
477
|
List of post-treatment period values.
|
|
471
478
|
alpha : float, default=0.05
|
|
472
479
|
Significance level.
|
|
480
|
+
treatment : str, optional
|
|
481
|
+
Real treatment-indicator column. When given, units that are ever
|
|
482
|
+
real-treated (``data.groupby(unit)[treatment].max() == 1``) are dropped
|
|
483
|
+
before the placebo, so it runs on never-treated units only. When ``None``
|
|
484
|
+
(default), no filtering is done and the caller must pass control-only data.
|
|
473
485
|
**estimator_kwargs
|
|
474
486
|
Arguments passed to DifferenceInDifferences.
|
|
475
487
|
|
|
@@ -481,7 +493,35 @@ def placebo_group_test(
|
|
|
481
493
|
if fake_treated_units is None or len(fake_treated_units) == 0:
|
|
482
494
|
raise ValueError("fake_treated_units must be a non-empty list")
|
|
483
495
|
|
|
484
|
-
|
|
496
|
+
fake_data = data.copy()
|
|
497
|
+
|
|
498
|
+
# Optionally restrict to never-treated units so the placebo is not contaminated
|
|
499
|
+
# by the real treatment effect (the BDM 2004 placebo-law design on controls).
|
|
500
|
+
if treatment is not None:
|
|
501
|
+
# Fail closed: a missing column or non-0/1 values would otherwise silently
|
|
502
|
+
# skip the ever-treated filter (groupby().max() drops NaN), running the
|
|
503
|
+
# placebo on contaminated data.
|
|
504
|
+
if treatment not in fake_data.columns:
|
|
505
|
+
raise ValueError(f"treatment column '{treatment}' not found in data")
|
|
506
|
+
if fake_data[treatment].isna().any():
|
|
507
|
+
raise ValueError(f"treatment column '{treatment}' contains missing values")
|
|
508
|
+
validate_binary(fake_data[treatment].to_numpy(), "treatment")
|
|
509
|
+
ever_treated = fake_data.groupby(unit)[treatment].max()
|
|
510
|
+
ever_treated_units = set(ever_treated[ever_treated == 1].index)
|
|
511
|
+
misused = [u for u in fake_treated_units if u in ever_treated_units]
|
|
512
|
+
if misused:
|
|
513
|
+
import warnings
|
|
514
|
+
|
|
515
|
+
warnings.warn(
|
|
516
|
+
f"{len(misused)} of fake_treated_units are themselves ever real-treated "
|
|
517
|
+
f"and will be dropped with the other real-treated units: {misused}. "
|
|
518
|
+
f"Pass only never-treated units as fake_treated_units for a valid placebo.",
|
|
519
|
+
UserWarning,
|
|
520
|
+
stacklevel=2,
|
|
521
|
+
)
|
|
522
|
+
fake_data = fake_data[~fake_data[unit].isin(ever_treated_units)].copy()
|
|
523
|
+
|
|
524
|
+
all_periods = sorted(fake_data[time].unique())
|
|
485
525
|
|
|
486
526
|
# Infer post periods if not provided
|
|
487
527
|
if post_periods is None:
|
|
@@ -489,14 +529,31 @@ def placebo_group_test(
|
|
|
489
529
|
post_periods = all_periods[mid:]
|
|
490
530
|
|
|
491
531
|
# Create fake treatment indicator
|
|
492
|
-
fake_data = data.copy()
|
|
493
532
|
fake_data["_fake_treated"] = fake_data[unit].isin(fake_treated_units).astype(int)
|
|
494
533
|
fake_data["_post"] = fake_data[time].isin(post_periods).astype(int)
|
|
495
534
|
|
|
535
|
+
# Guard degenerate designs (e.g., all fake_treated_units were dropped as
|
|
536
|
+
# real-treated, or no controls remain) before they surface as a cryptic
|
|
537
|
+
# LinAlgError inside the estimator.
|
|
538
|
+
if fake_data["_fake_treated"].sum() == 0:
|
|
539
|
+
raise ValueError(
|
|
540
|
+
"No fake-treated observations remain (all fake_treated_units were "
|
|
541
|
+
"dropped as real-treated, or are absent from the data). Pass "
|
|
542
|
+
"never-treated units as fake_treated_units."
|
|
543
|
+
)
|
|
544
|
+
if (fake_data["_fake_treated"] == 0).sum() == 0:
|
|
545
|
+
raise ValueError("No control (non-fake-treated) units remain for the placebo comparison.")
|
|
546
|
+
|
|
496
547
|
# Fit DiD
|
|
497
548
|
did = DifferenceInDifferences(**estimator_kwargs)
|
|
498
549
|
results = did.fit(fake_data, outcome=outcome, treatment="_fake_treated", time="_post")
|
|
499
550
|
|
|
551
|
+
# Record the fake-treated units actually used (after any never-treated
|
|
552
|
+
# filtering), not just the originally requested list, to avoid metadata drift.
|
|
553
|
+
# Preserve the caller's order (sorting could raise TypeError on mixed-type IDs).
|
|
554
|
+
retained = set(fake_data.loc[fake_data["_fake_treated"] == 1, unit].unique())
|
|
555
|
+
used_fake_treated = [u for u in fake_treated_units if u in retained]
|
|
556
|
+
|
|
500
557
|
return PlaceboTestResults(
|
|
501
558
|
test_type="fake_group",
|
|
502
559
|
placebo_effect=results.att,
|
|
@@ -507,7 +564,7 @@ def placebo_group_test(
|
|
|
507
564
|
n_obs=results.n_obs,
|
|
508
565
|
is_significant=bool(results.p_value < alpha),
|
|
509
566
|
alpha=alpha,
|
|
510
|
-
fake_group=
|
|
567
|
+
fake_group=used_fake_treated,
|
|
511
568
|
)
|
|
512
569
|
|
|
513
570
|
|
|
@@ -526,8 +583,12 @@ def permutation_test(
|
|
|
526
583
|
Compute permutation-based p-value for DiD estimate.
|
|
527
584
|
|
|
528
585
|
Randomly reassigns treatment status at the unit level and computes the
|
|
529
|
-
DiD estimate for each permutation. The p-value is the
|
|
530
|
-
|
|
586
|
+
DiD estimate for each permutation. The p-value is the randomization-inference
|
|
587
|
+
value ``(1 + count) / (B + 1)`` (Phipson & Smyth 2010), where ``count`` is the
|
|
588
|
+
number of permuted estimates at least as extreme as the observed and ``B`` is
|
|
589
|
+
the number of valid permutations. With ``B`` sampled permutations this is a
|
|
590
|
+
Monte-Carlo approximation that converges to the exact full-enumeration value
|
|
591
|
+
``count / total`` as ``B`` grows.
|
|
531
592
|
|
|
532
593
|
Parameters
|
|
533
594
|
----------
|
|
@@ -557,8 +618,17 @@ def permutation_test(
|
|
|
557
618
|
|
|
558
619
|
Notes
|
|
559
620
|
-----
|
|
560
|
-
|
|
561
|
-
|
|
621
|
+
This is a randomization-inference (permutation) test of the sharp null of no
|
|
622
|
+
effect for any unit; it does not rely on asymptotic approximations. Treatment
|
|
623
|
+
assignments are drawn independently each iteration (Monte-Carlo sampling *with
|
|
624
|
+
replacement* from the assignment space), so the reported p-value
|
|
625
|
+
``(1 + count) / (B + 1)`` (Phipson & Smyth 2010) is a **valid but slightly
|
|
626
|
+
conservative** estimator -- the ``+1`` adds the observed assignment and
|
|
627
|
+
prevents a zero p-value. Here ``count`` is the number of permutations at least
|
|
628
|
+
as extreme as the observed estimate and ``B`` is the number of valid
|
|
629
|
+
permutations. As ``B`` grows it converges to the *exact* p-value obtained by
|
|
630
|
+
full enumeration of all assignments (the R-parity reference). "Exact" is
|
|
631
|
+
reserved for that full enumeration; the sampled value approximates it.
|
|
562
632
|
"""
|
|
563
633
|
rng = np.random.default_rng(seed)
|
|
564
634
|
|
|
@@ -620,11 +690,12 @@ def permutation_test(
|
|
|
620
690
|
stacklevel=2,
|
|
621
691
|
)
|
|
622
692
|
|
|
623
|
-
#
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
#
|
|
627
|
-
|
|
693
|
+
# Randomization-inference p-value (Phipson & Smyth 2010): include the observed
|
|
694
|
+
# statistic in both numerator and denominator. The 1/(B+1) floor is intrinsic
|
|
695
|
+
# (count == 0 -> 1/(B+1)), so no separate clamp is needed. With sampled
|
|
696
|
+
# permutations this converges to the exact full-enumeration value count/total.
|
|
697
|
+
count = int(np.sum(np.abs(valid_effects) >= np.abs(original_att)))
|
|
698
|
+
p_value = (1 + count) / (len(valid_effects) + 1)
|
|
628
699
|
|
|
629
700
|
# Compute SE and CI from permutation distribution
|
|
630
701
|
se = np.std(valid_effects, ddof=1)
|
|
@@ -789,6 +789,11 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
|
|
|
789
789
|
m_hat_cache: Dict[Tuple, np.ndarray] = {}
|
|
790
790
|
r_hat_cache: Dict[Tuple[float, float], np.ndarray] = {}
|
|
791
791
|
s_hat_cache: Dict[float, np.ndarray] = {} # inverse propensities per group
|
|
792
|
+
# Per-fit cache of the polynomial sieve basis, keyed (id(X), degree). The three
|
|
793
|
+
# sieve nuisance helpers all build the basis from the same fit-level
|
|
794
|
+
# `covariate_matrix`, so this shares each distinct degree's basis across them
|
|
795
|
+
# instead of rebuilding it per helper. Lives only for this fit() call.
|
|
796
|
+
sieve_basis_cache: Dict[Tuple[int, int], np.ndarray] = {}
|
|
792
797
|
|
|
793
798
|
if use_covariates:
|
|
794
799
|
assert covariates is not None # for type narrowing
|
|
@@ -934,6 +939,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
|
|
|
934
939
|
k_max=self.sieve_k_max,
|
|
935
940
|
criterion=self.sieve_criterion,
|
|
936
941
|
unit_weights=unit_level_weights,
|
|
942
|
+
basis_cache=sieve_basis_cache,
|
|
937
943
|
)
|
|
938
944
|
# m_{g', tpre, 1}(X)
|
|
939
945
|
key_gp_tpre = (gp, tpre_col_val, effective_p1_col)
|
|
@@ -950,6 +956,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
|
|
|
950
956
|
k_max=self.sieve_k_max,
|
|
951
957
|
criterion=self.sieve_criterion,
|
|
952
958
|
unit_weights=unit_level_weights,
|
|
959
|
+
basis_cache=sieve_basis_cache,
|
|
953
960
|
)
|
|
954
961
|
# r_{g, inf}(X) and r_{g, g'}(X) via sieve (Eq 4.1-4.2)
|
|
955
962
|
for comp in {np.inf, gp}:
|
|
@@ -966,6 +973,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
|
|
|
966
973
|
criterion=self.sieve_criterion,
|
|
967
974
|
ratio_clip=self.ratio_clip,
|
|
968
975
|
unit_weights=unit_level_weights,
|
|
976
|
+
basis_cache=sieve_basis_cache,
|
|
969
977
|
)
|
|
970
978
|
|
|
971
979
|
# Per-unit DR generated outcomes: shape (n_units, H)
|
|
@@ -998,6 +1006,7 @@ class EfficientDiD(EfficientDiDBootstrapMixin):
|
|
|
998
1006
|
k_max=self.sieve_k_max,
|
|
999
1007
|
criterion=self.sieve_criterion,
|
|
1000
1008
|
unit_weights=unit_level_weights,
|
|
1009
|
+
basis_cache=sieve_basis_cache,
|
|
1001
1010
|
)
|
|
1002
1011
|
|
|
1003
1012
|
# Conditional Omega*(X) with per-unit propensities (Eq 3.12)
|