diff-diff 2.6.0__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.6.0 → diff_diff-2.7.0}/PKG-INFO +24 -2
- {diff_diff-2.6.0 → diff_diff-2.7.0}/README.md +23 -1
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/__init__.py +39 -1
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/bootstrap_utils.py +124 -0
- diff_diff-2.7.0/diff_diff/efficient_did.py +843 -0
- diff_diff-2.7.0/diff_diff/efficient_did_bootstrap.py +315 -0
- diff_diff-2.7.0/diff_diff/efficient_did_results.py +289 -0
- diff_diff-2.7.0/diff_diff/efficient_did_weights.py +538 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/staggered.py +736 -99
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/staggered_aggregation.py +248 -141
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/staggered_bootstrap.py +161 -58
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/staggered_results.py +12 -1
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/trop.py +247 -123
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/utils.py +63 -5
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/visualization.py +61 -14
- {diff_diff-2.6.0 → diff_diff-2.7.0}/pyproject.toml +1 -1
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/Cargo.lock +21 -21
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/Cargo.toml +1 -1
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/src/trop.rs +181 -112
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/_backend.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/bacon.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/continuous_did.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/continuous_did_results.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/datasets.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/estimators.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/imputation.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/imputation_bootstrap.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/imputation_results.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/linalg.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/power.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/prep.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/results.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/stacked_did.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/stacked_did_results.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/sun_abraham.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/trop_results.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/twfe.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/two_stage.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/two_stage_bootstrap.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/diff_diff/two_stage_results.py +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/build.rs +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/src/lib.rs +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/src/linalg.rs +0 -0
- {diff_diff-2.6.0 → diff_diff-2.7.0}/rust/src/weights.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -58,7 +58,7 @@ pip install -e .
|
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
60
|
import pandas as pd
|
|
61
|
-
from diff_diff import DifferenceInDifferences
|
|
61
|
+
from diff_diff import DifferenceInDifferences # or: DiD
|
|
62
62
|
|
|
63
63
|
# Create sample data
|
|
64
64
|
data = pd.DataFrame({
|
|
@@ -122,6 +122,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
122
122
|
- **Data prep utilities**: Helper functions for common data preparation tasks
|
|
123
123
|
- **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
|
|
124
124
|
|
|
125
|
+
## Estimator Aliases
|
|
126
|
+
|
|
127
|
+
All estimators have short aliases for convenience:
|
|
128
|
+
|
|
129
|
+
| Alias | Full Name | Method |
|
|
130
|
+
|-------|-----------|--------|
|
|
131
|
+
| `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
|
|
132
|
+
| `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
|
|
133
|
+
| `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
|
|
134
|
+
| `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
|
|
135
|
+
| `SA` | `SunAbraham` | Sun & Abraham (2021) |
|
|
136
|
+
| `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
|
|
137
|
+
| `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
|
|
138
|
+
| `SDiD` | `SyntheticDiD` | Synthetic DiD |
|
|
139
|
+
| `DDD` | `TripleDifference` | Triple difference |
|
|
140
|
+
| `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
|
|
141
|
+
| `Stacked` | `StackedDiD` | Stacked DiD |
|
|
142
|
+
| `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
|
|
143
|
+
| `EDiD` | `EfficientDiD` | Efficient DiD |
|
|
144
|
+
|
|
145
|
+
`TROP` already uses its short canonical name and needs no alias.
|
|
146
|
+
|
|
125
147
|
## Tutorials
|
|
126
148
|
|
|
127
149
|
We provide Jupyter notebook tutorials in `docs/tutorials/`:
|
|
@@ -20,7 +20,7 @@ pip install -e .
|
|
|
20
20
|
|
|
21
21
|
```python
|
|
22
22
|
import pandas as pd
|
|
23
|
-
from diff_diff import DifferenceInDifferences
|
|
23
|
+
from diff_diff import DifferenceInDifferences # or: DiD
|
|
24
24
|
|
|
25
25
|
# Create sample data
|
|
26
26
|
data = pd.DataFrame({
|
|
@@ -84,6 +84,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
84
84
|
- **Data prep utilities**: Helper functions for common data preparation tasks
|
|
85
85
|
- **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
|
|
86
86
|
|
|
87
|
+
## Estimator Aliases
|
|
88
|
+
|
|
89
|
+
All estimators have short aliases for convenience:
|
|
90
|
+
|
|
91
|
+
| Alias | Full Name | Method |
|
|
92
|
+
|-------|-----------|--------|
|
|
93
|
+
| `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
|
|
94
|
+
| `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
|
|
95
|
+
| `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
|
|
96
|
+
| `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
|
|
97
|
+
| `SA` | `SunAbraham` | Sun & Abraham (2021) |
|
|
98
|
+
| `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
|
|
99
|
+
| `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
|
|
100
|
+
| `SDiD` | `SyntheticDiD` | Synthetic DiD |
|
|
101
|
+
| `DDD` | `TripleDifference` | Triple difference |
|
|
102
|
+
| `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
|
|
103
|
+
| `Stacked` | `StackedDiD` | Stacked DiD |
|
|
104
|
+
| `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
|
|
105
|
+
| `EDiD` | `EfficientDiD` | Efficient DiD |
|
|
106
|
+
|
|
107
|
+
`TROP` already uses its short canonical name and needs no alias.
|
|
108
|
+
|
|
87
109
|
## Tutorials
|
|
88
110
|
|
|
89
111
|
We provide Jupyter notebook tutorials in `docs/tutorials/`:
|
|
@@ -128,6 +128,11 @@ from diff_diff.continuous_did import (
|
|
|
128
128
|
ContinuousDiDResults,
|
|
129
129
|
DoseResponseCurve,
|
|
130
130
|
)
|
|
131
|
+
from diff_diff.efficient_did import (
|
|
132
|
+
EfficientDiD,
|
|
133
|
+
EfficientDiDResults,
|
|
134
|
+
EDiDBootstrapResults,
|
|
135
|
+
)
|
|
131
136
|
from diff_diff.trop import (
|
|
132
137
|
TROP,
|
|
133
138
|
TROPResults,
|
|
@@ -159,7 +164,22 @@ from diff_diff.datasets import (
|
|
|
159
164
|
load_mpdta,
|
|
160
165
|
)
|
|
161
166
|
|
|
162
|
-
|
|
167
|
+
# Estimator aliases — short names for convenience
|
|
168
|
+
DiD = DifferenceInDifferences
|
|
169
|
+
TWFE = TwoWayFixedEffects
|
|
170
|
+
EventStudy = MultiPeriodDiD
|
|
171
|
+
SDiD = SyntheticDiD
|
|
172
|
+
CS = CallawaySantAnna
|
|
173
|
+
CDiD = ContinuousDiD
|
|
174
|
+
SA = SunAbraham
|
|
175
|
+
BJS = ImputationDiD
|
|
176
|
+
Gardner = TwoStageDiD
|
|
177
|
+
DDD = TripleDifference
|
|
178
|
+
Stacked = StackedDiD
|
|
179
|
+
Bacon = BaconDecomposition
|
|
180
|
+
EDiD = EfficientDiD
|
|
181
|
+
|
|
182
|
+
__version__ = "2.7.0"
|
|
163
183
|
__all__ = [
|
|
164
184
|
# Estimators
|
|
165
185
|
"DifferenceInDifferences",
|
|
@@ -174,6 +194,19 @@ __all__ = [
|
|
|
174
194
|
"TripleDifference",
|
|
175
195
|
"TROP",
|
|
176
196
|
"StackedDiD",
|
|
197
|
+
# Estimator aliases (short names)
|
|
198
|
+
"DiD",
|
|
199
|
+
"TWFE",
|
|
200
|
+
"EventStudy",
|
|
201
|
+
"SDiD",
|
|
202
|
+
"CS",
|
|
203
|
+
"CDiD",
|
|
204
|
+
"SA",
|
|
205
|
+
"BJS",
|
|
206
|
+
"Gardner",
|
|
207
|
+
"DDD",
|
|
208
|
+
"Stacked",
|
|
209
|
+
"Bacon",
|
|
177
210
|
# Bacon Decomposition
|
|
178
211
|
"BaconDecomposition",
|
|
179
212
|
"BaconDecompositionResults",
|
|
@@ -204,6 +237,11 @@ __all__ = [
|
|
|
204
237
|
"trop",
|
|
205
238
|
"StackedDiDResults",
|
|
206
239
|
"stacked_did",
|
|
240
|
+
# EfficientDiD
|
|
241
|
+
"EfficientDiD",
|
|
242
|
+
"EfficientDiDResults",
|
|
243
|
+
"EDiDBootstrapResults",
|
|
244
|
+
"EDiD",
|
|
207
245
|
# Visualization
|
|
208
246
|
"plot_event_study",
|
|
209
247
|
"plot_group_effects",
|
|
@@ -19,6 +19,7 @@ __all__ = [
|
|
|
19
19
|
"compute_percentile_ci",
|
|
20
20
|
"compute_bootstrap_pvalue",
|
|
21
21
|
"compute_effect_bootstrap_stats",
|
|
22
|
+
"compute_effect_bootstrap_stats_batch",
|
|
22
23
|
]
|
|
23
24
|
|
|
24
25
|
|
|
@@ -277,3 +278,126 @@ def compute_effect_bootstrap_stats(
|
|
|
277
278
|
original_effect, valid_dist, n_valid=len(valid_dist)
|
|
278
279
|
)
|
|
279
280
|
return se, ci, p_value
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def compute_effect_bootstrap_stats_batch(
|
|
284
|
+
original_effects: np.ndarray,
|
|
285
|
+
bootstrap_matrix: np.ndarray,
|
|
286
|
+
alpha: float = 0.05,
|
|
287
|
+
) -> tuple:
|
|
288
|
+
"""
|
|
289
|
+
Batch-compute bootstrap statistics for multiple effects at once.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
original_effects : np.ndarray
|
|
294
|
+
Array of original point estimates, shape (n_effects,).
|
|
295
|
+
bootstrap_matrix : np.ndarray
|
|
296
|
+
Bootstrap distributions, shape (n_bootstrap, n_effects).
|
|
297
|
+
alpha : float, default=0.05
|
|
298
|
+
Significance level.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
ses : np.ndarray
|
|
303
|
+
Bootstrap SEs for each effect.
|
|
304
|
+
ci_lowers : np.ndarray
|
|
305
|
+
Lower CI bounds for each effect.
|
|
306
|
+
ci_uppers : np.ndarray
|
|
307
|
+
Upper CI bounds for each effect.
|
|
308
|
+
p_values : np.ndarray
|
|
309
|
+
Bootstrap p-values for each effect.
|
|
310
|
+
"""
|
|
311
|
+
n_bootstrap, n_effects = bootstrap_matrix.shape
|
|
312
|
+
ses = np.full(n_effects, np.nan)
|
|
313
|
+
ci_lowers = np.full(n_effects, np.nan)
|
|
314
|
+
ci_uppers = np.full(n_effects, np.nan)
|
|
315
|
+
p_values = np.full(n_effects, np.nan)
|
|
316
|
+
|
|
317
|
+
# Check for non-finite original effects
|
|
318
|
+
valid_effects = np.isfinite(original_effects)
|
|
319
|
+
if not np.any(valid_effects):
|
|
320
|
+
return ses, ci_lowers, ci_uppers, p_values
|
|
321
|
+
|
|
322
|
+
# Count valid bootstrap samples per effect
|
|
323
|
+
finite_mask = np.isfinite(bootstrap_matrix) # (n_bootstrap, n_effects)
|
|
324
|
+
n_valid = finite_mask.sum(axis=0) # (n_effects,)
|
|
325
|
+
|
|
326
|
+
# Determine which effects have enough valid samples
|
|
327
|
+
enough_valid = (n_valid >= n_bootstrap * 0.5) & valid_effects
|
|
328
|
+
|
|
329
|
+
if not np.any(enough_valid):
|
|
330
|
+
n_insufficient = int(np.sum(valid_effects))
|
|
331
|
+
if n_insufficient > 0:
|
|
332
|
+
warnings.warn(
|
|
333
|
+
f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
|
|
334
|
+
"Returning NaN for SE/CI/p-value.",
|
|
335
|
+
RuntimeWarning,
|
|
336
|
+
stacklevel=2,
|
|
337
|
+
)
|
|
338
|
+
return ses, ci_lowers, ci_uppers, p_values
|
|
339
|
+
|
|
340
|
+
# Warn about subset with insufficient samples
|
|
341
|
+
n_insufficient = int(np.sum(valid_effects & ~enough_valid))
|
|
342
|
+
if n_insufficient > 0:
|
|
343
|
+
warnings.warn(
|
|
344
|
+
f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
|
|
345
|
+
"Returning NaN for SE/CI/p-value.",
|
|
346
|
+
RuntimeWarning,
|
|
347
|
+
stacklevel=2,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# For effects with all-finite bootstraps (common case), use vectorized ops
|
|
351
|
+
all_finite = (n_valid == n_bootstrap) & enough_valid
|
|
352
|
+
if np.any(all_finite):
|
|
353
|
+
idx = np.where(all_finite)[0]
|
|
354
|
+
sub = bootstrap_matrix[:, idx]
|
|
355
|
+
|
|
356
|
+
# Vectorized SE: std across bootstrap dimension
|
|
357
|
+
batch_ses = np.std(sub, axis=0, ddof=1)
|
|
358
|
+
|
|
359
|
+
# Vectorized percentile CI
|
|
360
|
+
lower_pct = alpha / 2 * 100
|
|
361
|
+
upper_pct = (1 - alpha / 2) * 100
|
|
362
|
+
batch_ci = np.percentile(sub, [lower_pct, upper_pct], axis=0)
|
|
363
|
+
|
|
364
|
+
# Vectorized p-values
|
|
365
|
+
batch_p = np.empty(len(idx))
|
|
366
|
+
for j, eff_idx in enumerate(idx):
|
|
367
|
+
eff = original_effects[eff_idx]
|
|
368
|
+
if eff >= 0:
|
|
369
|
+
batch_p[j] = np.mean(sub[:, j] <= 0)
|
|
370
|
+
else:
|
|
371
|
+
batch_p[j] = np.mean(sub[:, j] >= 0)
|
|
372
|
+
batch_p = np.minimum(2 * batch_p, 1.0)
|
|
373
|
+
batch_p = np.maximum(batch_p, 1 / (n_bootstrap + 1))
|
|
374
|
+
|
|
375
|
+
# Guard: SE must be positive and finite
|
|
376
|
+
se_valid = np.isfinite(batch_ses) & (batch_ses > 0)
|
|
377
|
+
n_bad_se = int(np.sum(~se_valid))
|
|
378
|
+
if n_bad_se > 0:
|
|
379
|
+
warnings.warn(
|
|
380
|
+
f"{n_bad_se} effect(s) had non-finite or zero bootstrap SE. "
|
|
381
|
+
"Returning NaN for SE/CI/p-value.",
|
|
382
|
+
RuntimeWarning,
|
|
383
|
+
stacklevel=2,
|
|
384
|
+
)
|
|
385
|
+
ses[idx[se_valid]] = batch_ses[se_valid]
|
|
386
|
+
ci_lowers[idx[se_valid]] = batch_ci[0][se_valid]
|
|
387
|
+
ci_uppers[idx[se_valid]] = batch_ci[1][se_valid]
|
|
388
|
+
p_values[idx[se_valid]] = batch_p[se_valid]
|
|
389
|
+
|
|
390
|
+
# Handle effects with some non-finite bootstraps (rare) via scalar fallback
|
|
391
|
+
partial_valid = enough_valid & ~all_finite
|
|
392
|
+
if np.any(partial_valid):
|
|
393
|
+
for j in np.where(partial_valid)[0]:
|
|
394
|
+
se, ci, pv = compute_effect_bootstrap_stats(
|
|
395
|
+
original_effects[j], bootstrap_matrix[:, j], alpha=alpha,
|
|
396
|
+
context=f"effect {j}"
|
|
397
|
+
)
|
|
398
|
+
ses[j] = se
|
|
399
|
+
ci_lowers[j] = ci[0]
|
|
400
|
+
ci_uppers[j] = ci[1]
|
|
401
|
+
p_values[j] = pv
|
|
402
|
+
|
|
403
|
+
return ses, ci_lowers, ci_uppers, p_values
|