diff-diff 2.6.1__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-2.6.1 → diff_diff-2.7.0}/PKG-INFO +24 -2
- {diff_diff-2.6.1 → diff_diff-2.7.0}/README.md +23 -1
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/__init__.py +12 -1
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/bootstrap_utils.py +124 -0
- diff_diff-2.7.0/diff_diff/efficient_did.py +843 -0
- diff_diff-2.7.0/diff_diff/efficient_did_bootstrap.py +315 -0
- diff_diff-2.7.0/diff_diff/efficient_did_results.py +289 -0
- diff_diff-2.7.0/diff_diff/efficient_did_weights.py +538 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered.py +736 -99
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered_aggregation.py +248 -141
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered_bootstrap.py +161 -58
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered_results.py +12 -1
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/trop.py +247 -123
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/utils.py +63 -5
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/visualization.py +61 -14
- {diff_diff-2.6.1 → diff_diff-2.7.0}/pyproject.toml +1 -1
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/Cargo.lock +13 -13
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/Cargo.toml +1 -1
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/trop.rs +181 -112
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/_backend.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/bacon.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/continuous_did.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/continuous_did_results.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/datasets.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/diagnostics.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/estimators.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/honest_did.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/imputation.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/imputation_bootstrap.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/imputation_results.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/linalg.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/power.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/prep.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/pretrends.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/results.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/stacked_did.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/stacked_did_results.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/sun_abraham.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/synthetic_did.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/triple_diff.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/trop_results.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/twfe.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/two_stage.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/two_stage_bootstrap.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/two_stage_results.py +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/build.rs +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/bootstrap.rs +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/lib.rs +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/linalg.rs +0 -0
- {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/weights.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -58,7 +58,7 @@ pip install -e .
|
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
60
|
import pandas as pd
|
|
61
|
-
from diff_diff import DifferenceInDifferences
|
|
61
|
+
from diff_diff import DifferenceInDifferences # or: DiD
|
|
62
62
|
|
|
63
63
|
# Create sample data
|
|
64
64
|
data = pd.DataFrame({
|
|
@@ -122,6 +122,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
122
122
|
- **Data prep utilities**: Helper functions for common data preparation tasks
|
|
123
123
|
- **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
|
|
124
124
|
|
|
125
|
+
## Estimator Aliases
|
|
126
|
+
|
|
127
|
+
All estimators have short aliases for convenience:
|
|
128
|
+
|
|
129
|
+
| Alias | Full Name | Method |
|
|
130
|
+
|-------|-----------|--------|
|
|
131
|
+
| `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
|
|
132
|
+
| `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
|
|
133
|
+
| `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
|
|
134
|
+
| `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
|
|
135
|
+
| `SA` | `SunAbraham` | Sun & Abraham (2021) |
|
|
136
|
+
| `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
|
|
137
|
+
| `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
|
|
138
|
+
| `SDiD` | `SyntheticDiD` | Synthetic DiD |
|
|
139
|
+
| `DDD` | `TripleDifference` | Triple difference |
|
|
140
|
+
| `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
|
|
141
|
+
| `Stacked` | `StackedDiD` | Stacked DiD |
|
|
142
|
+
| `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
|
|
143
|
+
| `EDiD` | `EfficientDiD` | Efficient DiD |
|
|
144
|
+
|
|
145
|
+
`TROP` already uses its short canonical name and needs no alias.
|
|
146
|
+
|
|
125
147
|
## Tutorials
|
|
126
148
|
|
|
127
149
|
We provide Jupyter notebook tutorials in `docs/tutorials/`:
|
|
@@ -20,7 +20,7 @@ pip install -e .
|
|
|
20
20
|
|
|
21
21
|
```python
|
|
22
22
|
import pandas as pd
|
|
23
|
-
from diff_diff import DifferenceInDifferences
|
|
23
|
+
from diff_diff import DifferenceInDifferences # or: DiD
|
|
24
24
|
|
|
25
25
|
# Create sample data
|
|
26
26
|
data = pd.DataFrame({
|
|
@@ -84,6 +84,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
84
84
|
- **Data prep utilities**: Helper functions for common data preparation tasks
|
|
85
85
|
- **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
|
|
86
86
|
|
|
87
|
+
## Estimator Aliases
|
|
88
|
+
|
|
89
|
+
All estimators have short aliases for convenience:
|
|
90
|
+
|
|
91
|
+
| Alias | Full Name | Method |
|
|
92
|
+
|-------|-----------|--------|
|
|
93
|
+
| `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
|
|
94
|
+
| `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
|
|
95
|
+
| `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
|
|
96
|
+
| `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
|
|
97
|
+
| `SA` | `SunAbraham` | Sun & Abraham (2021) |
|
|
98
|
+
| `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
|
|
99
|
+
| `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
|
|
100
|
+
| `SDiD` | `SyntheticDiD` | Synthetic DiD |
|
|
101
|
+
| `DDD` | `TripleDifference` | Triple difference |
|
|
102
|
+
| `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
|
|
103
|
+
| `Stacked` | `StackedDiD` | Stacked DiD |
|
|
104
|
+
| `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
|
|
105
|
+
| `EDiD` | `EfficientDiD` | Efficient DiD |
|
|
106
|
+
|
|
107
|
+
`TROP` already uses its short canonical name and needs no alias.
|
|
108
|
+
|
|
87
109
|
## Tutorials
|
|
88
110
|
|
|
89
111
|
We provide Jupyter notebook tutorials in `docs/tutorials/`:
|
|
@@ -128,6 +128,11 @@ from diff_diff.continuous_did import (
|
|
|
128
128
|
ContinuousDiDResults,
|
|
129
129
|
DoseResponseCurve,
|
|
130
130
|
)
|
|
131
|
+
from diff_diff.efficient_did import (
|
|
132
|
+
EfficientDiD,
|
|
133
|
+
EfficientDiDResults,
|
|
134
|
+
EDiDBootstrapResults,
|
|
135
|
+
)
|
|
131
136
|
from diff_diff.trop import (
|
|
132
137
|
TROP,
|
|
133
138
|
TROPResults,
|
|
@@ -172,8 +177,9 @@ Gardner = TwoStageDiD
|
|
|
172
177
|
DDD = TripleDifference
|
|
173
178
|
Stacked = StackedDiD
|
|
174
179
|
Bacon = BaconDecomposition
|
|
180
|
+
EDiD = EfficientDiD
|
|
175
181
|
|
|
176
|
-
__version__ = "2.
|
|
182
|
+
__version__ = "2.7.0"
|
|
177
183
|
__all__ = [
|
|
178
184
|
# Estimators
|
|
179
185
|
"DifferenceInDifferences",
|
|
@@ -231,6 +237,11 @@ __all__ = [
|
|
|
231
237
|
"trop",
|
|
232
238
|
"StackedDiDResults",
|
|
233
239
|
"stacked_did",
|
|
240
|
+
# EfficientDiD
|
|
241
|
+
"EfficientDiD",
|
|
242
|
+
"EfficientDiDResults",
|
|
243
|
+
"EDiDBootstrapResults",
|
|
244
|
+
"EDiD",
|
|
234
245
|
# Visualization
|
|
235
246
|
"plot_event_study",
|
|
236
247
|
"plot_group_effects",
|
|
@@ -19,6 +19,7 @@ __all__ = [
|
|
|
19
19
|
"compute_percentile_ci",
|
|
20
20
|
"compute_bootstrap_pvalue",
|
|
21
21
|
"compute_effect_bootstrap_stats",
|
|
22
|
+
"compute_effect_bootstrap_stats_batch",
|
|
22
23
|
]
|
|
23
24
|
|
|
24
25
|
|
|
@@ -277,3 +278,126 @@ def compute_effect_bootstrap_stats(
|
|
|
277
278
|
original_effect, valid_dist, n_valid=len(valid_dist)
|
|
278
279
|
)
|
|
279
280
|
return se, ci, p_value
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def compute_effect_bootstrap_stats_batch(
|
|
284
|
+
original_effects: np.ndarray,
|
|
285
|
+
bootstrap_matrix: np.ndarray,
|
|
286
|
+
alpha: float = 0.05,
|
|
287
|
+
) -> tuple:
|
|
288
|
+
"""
|
|
289
|
+
Batch-compute bootstrap statistics for multiple effects at once.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
original_effects : np.ndarray
|
|
294
|
+
Array of original point estimates, shape (n_effects,).
|
|
295
|
+
bootstrap_matrix : np.ndarray
|
|
296
|
+
Bootstrap distributions, shape (n_bootstrap, n_effects).
|
|
297
|
+
alpha : float, default=0.05
|
|
298
|
+
Significance level.
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
ses : np.ndarray
|
|
303
|
+
Bootstrap SEs for each effect.
|
|
304
|
+
ci_lowers : np.ndarray
|
|
305
|
+
Lower CI bounds for each effect.
|
|
306
|
+
ci_uppers : np.ndarray
|
|
307
|
+
Upper CI bounds for each effect.
|
|
308
|
+
p_values : np.ndarray
|
|
309
|
+
Bootstrap p-values for each effect.
|
|
310
|
+
"""
|
|
311
|
+
n_bootstrap, n_effects = bootstrap_matrix.shape
|
|
312
|
+
ses = np.full(n_effects, np.nan)
|
|
313
|
+
ci_lowers = np.full(n_effects, np.nan)
|
|
314
|
+
ci_uppers = np.full(n_effects, np.nan)
|
|
315
|
+
p_values = np.full(n_effects, np.nan)
|
|
316
|
+
|
|
317
|
+
# Check for non-finite original effects
|
|
318
|
+
valid_effects = np.isfinite(original_effects)
|
|
319
|
+
if not np.any(valid_effects):
|
|
320
|
+
return ses, ci_lowers, ci_uppers, p_values
|
|
321
|
+
|
|
322
|
+
# Count valid bootstrap samples per effect
|
|
323
|
+
finite_mask = np.isfinite(bootstrap_matrix) # (n_bootstrap, n_effects)
|
|
324
|
+
n_valid = finite_mask.sum(axis=0) # (n_effects,)
|
|
325
|
+
|
|
326
|
+
# Determine which effects have enough valid samples
|
|
327
|
+
enough_valid = (n_valid >= n_bootstrap * 0.5) & valid_effects
|
|
328
|
+
|
|
329
|
+
if not np.any(enough_valid):
|
|
330
|
+
n_insufficient = int(np.sum(valid_effects))
|
|
331
|
+
if n_insufficient > 0:
|
|
332
|
+
warnings.warn(
|
|
333
|
+
f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
|
|
334
|
+
"Returning NaN for SE/CI/p-value.",
|
|
335
|
+
RuntimeWarning,
|
|
336
|
+
stacklevel=2,
|
|
337
|
+
)
|
|
338
|
+
return ses, ci_lowers, ci_uppers, p_values
|
|
339
|
+
|
|
340
|
+
# Warn about subset with insufficient samples
|
|
341
|
+
n_insufficient = int(np.sum(valid_effects & ~enough_valid))
|
|
342
|
+
if n_insufficient > 0:
|
|
343
|
+
warnings.warn(
|
|
344
|
+
f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
|
|
345
|
+
"Returning NaN for SE/CI/p-value.",
|
|
346
|
+
RuntimeWarning,
|
|
347
|
+
stacklevel=2,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# For effects with all-finite bootstraps (common case), use vectorized ops
|
|
351
|
+
all_finite = (n_valid == n_bootstrap) & enough_valid
|
|
352
|
+
if np.any(all_finite):
|
|
353
|
+
idx = np.where(all_finite)[0]
|
|
354
|
+
sub = bootstrap_matrix[:, idx]
|
|
355
|
+
|
|
356
|
+
# Vectorized SE: std across bootstrap dimension
|
|
357
|
+
batch_ses = np.std(sub, axis=0, ddof=1)
|
|
358
|
+
|
|
359
|
+
# Vectorized percentile CI
|
|
360
|
+
lower_pct = alpha / 2 * 100
|
|
361
|
+
upper_pct = (1 - alpha / 2) * 100
|
|
362
|
+
batch_ci = np.percentile(sub, [lower_pct, upper_pct], axis=0)
|
|
363
|
+
|
|
364
|
+
# Vectorized p-values
|
|
365
|
+
batch_p = np.empty(len(idx))
|
|
366
|
+
for j, eff_idx in enumerate(idx):
|
|
367
|
+
eff = original_effects[eff_idx]
|
|
368
|
+
if eff >= 0:
|
|
369
|
+
batch_p[j] = np.mean(sub[:, j] <= 0)
|
|
370
|
+
else:
|
|
371
|
+
batch_p[j] = np.mean(sub[:, j] >= 0)
|
|
372
|
+
batch_p = np.minimum(2 * batch_p, 1.0)
|
|
373
|
+
batch_p = np.maximum(batch_p, 1 / (n_bootstrap + 1))
|
|
374
|
+
|
|
375
|
+
# Guard: SE must be positive and finite
|
|
376
|
+
se_valid = np.isfinite(batch_ses) & (batch_ses > 0)
|
|
377
|
+
n_bad_se = int(np.sum(~se_valid))
|
|
378
|
+
if n_bad_se > 0:
|
|
379
|
+
warnings.warn(
|
|
380
|
+
f"{n_bad_se} effect(s) had non-finite or zero bootstrap SE. "
|
|
381
|
+
"Returning NaN for SE/CI/p-value.",
|
|
382
|
+
RuntimeWarning,
|
|
383
|
+
stacklevel=2,
|
|
384
|
+
)
|
|
385
|
+
ses[idx[se_valid]] = batch_ses[se_valid]
|
|
386
|
+
ci_lowers[idx[se_valid]] = batch_ci[0][se_valid]
|
|
387
|
+
ci_uppers[idx[se_valid]] = batch_ci[1][se_valid]
|
|
388
|
+
p_values[idx[se_valid]] = batch_p[se_valid]
|
|
389
|
+
|
|
390
|
+
# Handle effects with some non-finite bootstraps (rare) via scalar fallback
|
|
391
|
+
partial_valid = enough_valid & ~all_finite
|
|
392
|
+
if np.any(partial_valid):
|
|
393
|
+
for j in np.where(partial_valid)[0]:
|
|
394
|
+
se, ci, pv = compute_effect_bootstrap_stats(
|
|
395
|
+
original_effects[j], bootstrap_matrix[:, j], alpha=alpha,
|
|
396
|
+
context=f"effect {j}"
|
|
397
|
+
)
|
|
398
|
+
ses[j] = se
|
|
399
|
+
ci_lowers[j] = ci[0]
|
|
400
|
+
ci_uppers[j] = ci[1]
|
|
401
|
+
p_values[j] = pv
|
|
402
|
+
|
|
403
|
+
return ses, ci_lowers, ci_uppers, p_values
|