diff-diff 2.6.1__tar.gz → 2.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {diff_diff-2.6.1 → diff_diff-2.7.0}/PKG-INFO +24 -2
  2. {diff_diff-2.6.1 → diff_diff-2.7.0}/README.md +23 -1
  3. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/__init__.py +12 -1
  4. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/bootstrap_utils.py +124 -0
  5. diff_diff-2.7.0/diff_diff/efficient_did.py +843 -0
  6. diff_diff-2.7.0/diff_diff/efficient_did_bootstrap.py +315 -0
  7. diff_diff-2.7.0/diff_diff/efficient_did_results.py +289 -0
  8. diff_diff-2.7.0/diff_diff/efficient_did_weights.py +538 -0
  9. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered.py +736 -99
  10. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered_aggregation.py +248 -141
  11. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered_bootstrap.py +161 -58
  12. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/staggered_results.py +12 -1
  13. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/trop.py +247 -123
  14. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/utils.py +63 -5
  15. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/visualization.py +61 -14
  16. {diff_diff-2.6.1 → diff_diff-2.7.0}/pyproject.toml +1 -1
  17. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/Cargo.lock +13 -13
  18. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/Cargo.toml +1 -1
  19. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/trop.rs +181 -112
  20. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/_backend.py +0 -0
  21. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/bacon.py +0 -0
  22. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/continuous_did.py +0 -0
  23. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/continuous_did_bspline.py +0 -0
  24. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/continuous_did_results.py +0 -0
  25. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/datasets.py +0 -0
  26. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/diagnostics.py +0 -0
  27. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/estimators.py +0 -0
  28. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/honest_did.py +0 -0
  29. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/imputation.py +0 -0
  30. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/imputation_bootstrap.py +0 -0
  31. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/imputation_results.py +0 -0
  32. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/linalg.py +0 -0
  33. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/power.py +0 -0
  34. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/prep.py +0 -0
  35. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/prep_dgp.py +0 -0
  36. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/pretrends.py +0 -0
  37. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/results.py +0 -0
  38. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/stacked_did.py +0 -0
  39. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/stacked_did_results.py +0 -0
  40. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/sun_abraham.py +0 -0
  41. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/synthetic_did.py +0 -0
  42. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/triple_diff.py +0 -0
  43. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/trop_results.py +0 -0
  44. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/twfe.py +0 -0
  45. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/two_stage.py +0 -0
  46. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/two_stage_bootstrap.py +0 -0
  47. {diff_diff-2.6.1 → diff_diff-2.7.0}/diff_diff/two_stage_results.py +0 -0
  48. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/build.rs +0 -0
  49. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/bootstrap.rs +0 -0
  50. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/lib.rs +0 -0
  51. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/linalg.rs +0 -0
  52. {diff_diff-2.6.1 → diff_diff-2.7.0}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.6.1
3
+ Version: 2.7.0
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -58,7 +58,7 @@ pip install -e .
58
58
 
59
59
  ```python
60
60
  import pandas as pd
61
- from diff_diff import DifferenceInDifferences
61
+ from diff_diff import DifferenceInDifferences # or: DiD
62
62
 
63
63
  # Create sample data
64
64
  data = pd.DataFrame({
@@ -122,6 +122,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
122
122
  - **Data prep utilities**: Helper functions for common data preparation tasks
123
123
  - **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
124
124
 
125
+ ## Estimator Aliases
126
+
127
+ All estimators have short aliases for convenience:
128
+
129
+ | Alias | Full Name | Method |
130
+ |-------|-----------|--------|
131
+ | `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
132
+ | `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
133
+ | `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
134
+ | `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
135
+ | `SA` | `SunAbraham` | Sun & Abraham (2021) |
136
+ | `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
137
+ | `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
138
+ | `SDiD` | `SyntheticDiD` | Synthetic DiD |
139
+ | `DDD` | `TripleDifference` | Triple difference |
140
+ | `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
141
+ | `Stacked` | `StackedDiD` | Stacked DiD |
142
+ | `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
143
+ | `EDiD` | `EfficientDiD` | Efficient DiD |
144
+
145
+ `TROP` already uses its short canonical name and needs no alias.
146
+
125
147
  ## Tutorials
126
148
 
127
149
  We provide Jupyter notebook tutorials in `docs/tutorials/`:
@@ -20,7 +20,7 @@ pip install -e .
20
20
 
21
21
  ```python
22
22
  import pandas as pd
23
- from diff_diff import DifferenceInDifferences
23
+ from diff_diff import DifferenceInDifferences # or: DiD
24
24
 
25
25
  # Create sample data
26
26
  data = pd.DataFrame({
@@ -84,6 +84,28 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
84
84
  - **Data prep utilities**: Helper functions for common data preparation tasks
85
85
  - **Validated against R**: Benchmarked against `did`, `synthdid`, and `fixest` packages (see [benchmarks](docs/benchmarks.rst))
86
86
 
87
+ ## Estimator Aliases
88
+
89
+ All estimators have short aliases for convenience:
90
+
91
+ | Alias | Full Name | Method |
92
+ |-------|-----------|--------|
93
+ | `DiD` | `DifferenceInDifferences` | Basic 2x2 DiD |
94
+ | `TWFE` | `TwoWayFixedEffects` | Two-way fixed effects |
95
+ | `EventStudy` | `MultiPeriodDiD` | Event study / multi-period |
96
+ | `CS` | `CallawaySantAnna` | Callaway & Sant'Anna (2021) |
97
+ | `SA` | `SunAbraham` | Sun & Abraham (2021) |
98
+ | `BJS` | `ImputationDiD` | Borusyak, Jaravel & Spiess (2024) |
99
+ | `Gardner` | `TwoStageDiD` | Gardner (2022) two-stage |
100
+ | `SDiD` | `SyntheticDiD` | Synthetic DiD |
101
+ | `DDD` | `TripleDifference` | Triple difference |
102
+ | `CDiD` | `ContinuousDiD` | Continuous treatment DiD |
103
+ | `Stacked` | `StackedDiD` | Stacked DiD |
104
+ | `Bacon` | `BaconDecomposition` | Goodman-Bacon decomposition |
105
+ | `EDiD` | `EfficientDiD` | Efficient DiD |
106
+
107
+ `TROP` already uses its short canonical name and needs no alias.
108
+
87
109
  ## Tutorials
88
110
 
89
111
  We provide Jupyter notebook tutorials in `docs/tutorials/`:
@@ -128,6 +128,11 @@ from diff_diff.continuous_did import (
128
128
  ContinuousDiDResults,
129
129
  DoseResponseCurve,
130
130
  )
131
+ from diff_diff.efficient_did import (
132
+ EfficientDiD,
133
+ EfficientDiDResults,
134
+ EDiDBootstrapResults,
135
+ )
131
136
  from diff_diff.trop import (
132
137
  TROP,
133
138
  TROPResults,
@@ -172,8 +177,9 @@ Gardner = TwoStageDiD
172
177
  DDD = TripleDifference
173
178
  Stacked = StackedDiD
174
179
  Bacon = BaconDecomposition
180
+ EDiD = EfficientDiD
175
181
 
176
- __version__ = "2.6.1"
182
+ __version__ = "2.7.0"
177
183
  __all__ = [
178
184
  # Estimators
179
185
  "DifferenceInDifferences",
@@ -231,6 +237,11 @@ __all__ = [
231
237
  "trop",
232
238
  "StackedDiDResults",
233
239
  "stacked_did",
240
+ # EfficientDiD
241
+ "EfficientDiD",
242
+ "EfficientDiDResults",
243
+ "EDiDBootstrapResults",
244
+ "EDiD",
234
245
  # Visualization
235
246
  "plot_event_study",
236
247
  "plot_group_effects",
@@ -19,6 +19,7 @@ __all__ = [
19
19
  "compute_percentile_ci",
20
20
  "compute_bootstrap_pvalue",
21
21
  "compute_effect_bootstrap_stats",
22
+ "compute_effect_bootstrap_stats_batch",
22
23
  ]
23
24
 
24
25
 
@@ -277,3 +278,126 @@ def compute_effect_bootstrap_stats(
277
278
  original_effect, valid_dist, n_valid=len(valid_dist)
278
279
  )
279
280
  return se, ci, p_value
281
+
282
+
283
+ def compute_effect_bootstrap_stats_batch(
284
+ original_effects: np.ndarray,
285
+ bootstrap_matrix: np.ndarray,
286
+ alpha: float = 0.05,
287
+ ) -> tuple:
288
+ """
289
+ Batch-compute bootstrap statistics for multiple effects at once.
290
+
291
+ Parameters
292
+ ----------
293
+ original_effects : np.ndarray
294
+ Array of original point estimates, shape (n_effects,).
295
+ bootstrap_matrix : np.ndarray
296
+ Bootstrap distributions, shape (n_bootstrap, n_effects).
297
+ alpha : float, default=0.05
298
+ Significance level.
299
+
300
+ Returns
301
+ -------
302
+ ses : np.ndarray
303
+ Bootstrap SEs for each effect.
304
+ ci_lowers : np.ndarray
305
+ Lower CI bounds for each effect.
306
+ ci_uppers : np.ndarray
307
+ Upper CI bounds for each effect.
308
+ p_values : np.ndarray
309
+ Bootstrap p-values for each effect.
310
+ """
311
+ n_bootstrap, n_effects = bootstrap_matrix.shape
312
+ ses = np.full(n_effects, np.nan)
313
+ ci_lowers = np.full(n_effects, np.nan)
314
+ ci_uppers = np.full(n_effects, np.nan)
315
+ p_values = np.full(n_effects, np.nan)
316
+
317
+ # Check for non-finite original effects
318
+ valid_effects = np.isfinite(original_effects)
319
+ if not np.any(valid_effects):
320
+ return ses, ci_lowers, ci_uppers, p_values
321
+
322
+ # Count valid bootstrap samples per effect
323
+ finite_mask = np.isfinite(bootstrap_matrix) # (n_bootstrap, n_effects)
324
+ n_valid = finite_mask.sum(axis=0) # (n_effects,)
325
+
326
+ # Determine which effects have enough valid samples
327
+ enough_valid = (n_valid >= n_bootstrap * 0.5) & valid_effects
328
+
329
+ if not np.any(enough_valid):
330
+ n_insufficient = int(np.sum(valid_effects))
331
+ if n_insufficient > 0:
332
+ warnings.warn(
333
+ f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
334
+ "Returning NaN for SE/CI/p-value.",
335
+ RuntimeWarning,
336
+ stacklevel=2,
337
+ )
338
+ return ses, ci_lowers, ci_uppers, p_values
339
+
340
+ # Warn about subset with insufficient samples
341
+ n_insufficient = int(np.sum(valid_effects & ~enough_valid))
342
+ if n_insufficient > 0:
343
+ warnings.warn(
344
+ f"{n_insufficient} effect(s) had too few valid bootstrap samples (<50%). "
345
+ "Returning NaN for SE/CI/p-value.",
346
+ RuntimeWarning,
347
+ stacklevel=2,
348
+ )
349
+
350
+ # For effects with all-finite bootstraps (common case), use vectorized ops
351
+ all_finite = (n_valid == n_bootstrap) & enough_valid
352
+ if np.any(all_finite):
353
+ idx = np.where(all_finite)[0]
354
+ sub = bootstrap_matrix[:, idx]
355
+
356
+ # Vectorized SE: std across bootstrap dimension
357
+ batch_ses = np.std(sub, axis=0, ddof=1)
358
+
359
+ # Vectorized percentile CI
360
+ lower_pct = alpha / 2 * 100
361
+ upper_pct = (1 - alpha / 2) * 100
362
+ batch_ci = np.percentile(sub, [lower_pct, upper_pct], axis=0)
363
+
364
+ # Vectorized p-values
365
+ batch_p = np.empty(len(idx))
366
+ for j, eff_idx in enumerate(idx):
367
+ eff = original_effects[eff_idx]
368
+ if eff >= 0:
369
+ batch_p[j] = np.mean(sub[:, j] <= 0)
370
+ else:
371
+ batch_p[j] = np.mean(sub[:, j] >= 0)
372
+ batch_p = np.minimum(2 * batch_p, 1.0)
373
+ batch_p = np.maximum(batch_p, 1 / (n_bootstrap + 1))
374
+
375
+ # Guard: SE must be positive and finite
376
+ se_valid = np.isfinite(batch_ses) & (batch_ses > 0)
377
+ n_bad_se = int(np.sum(~se_valid))
378
+ if n_bad_se > 0:
379
+ warnings.warn(
380
+ f"{n_bad_se} effect(s) had non-finite or zero bootstrap SE. "
381
+ "Returning NaN for SE/CI/p-value.",
382
+ RuntimeWarning,
383
+ stacklevel=2,
384
+ )
385
+ ses[idx[se_valid]] = batch_ses[se_valid]
386
+ ci_lowers[idx[se_valid]] = batch_ci[0][se_valid]
387
+ ci_uppers[idx[se_valid]] = batch_ci[1][se_valid]
388
+ p_values[idx[se_valid]] = batch_p[se_valid]
389
+
390
+ # Handle effects with some non-finite bootstraps (rare) via scalar fallback
391
+ partial_valid = enough_valid & ~all_finite
392
+ if np.any(partial_valid):
393
+ for j in np.where(partial_valid)[0]:
394
+ se, ci, pv = compute_effect_bootstrap_stats(
395
+ original_effects[j], bootstrap_matrix[:, j], alpha=alpha,
396
+ context=f"effect {j}"
397
+ )
398
+ ses[j] = se
399
+ ci_lowers[j] = ci[0]
400
+ ci_uppers[j] = ci[1]
401
+ p_values[j] = pv
402
+
403
+ return ses, ci_lowers, ci_uppers, p_values