diff-diff 2.5.0__tar.gz → 2.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {diff_diff-2.5.0 → diff_diff-2.6.1}/PKG-INFO +4 -1
  2. {diff_diff-2.5.0 → diff_diff-2.6.1}/README.md +3 -0
  3. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/__init__.py +38 -1
  4. diff_diff-2.6.1/diff_diff/bootstrap_utils.py +279 -0
  5. diff_diff-2.6.1/diff_diff/continuous_did.py +1155 -0
  6. diff_diff-2.6.1/diff_diff/continuous_did_bspline.py +188 -0
  7. diff_diff-2.6.1/diff_diff/continuous_did_results.py +353 -0
  8. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/prep.py +1 -0
  9. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/prep_dgp.py +156 -1
  10. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered.py +3 -1
  11. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered_bootstrap.py +23 -251
  12. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/sun_abraham.py +9 -41
  13. {diff_diff-2.5.0 → diff_diff-2.6.1}/pyproject.toml +1 -1
  14. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/Cargo.lock +24 -24
  15. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/Cargo.toml +1 -1
  16. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/_backend.py +0 -0
  17. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/bacon.py +0 -0
  18. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/datasets.py +0 -0
  19. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/diagnostics.py +0 -0
  20. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/estimators.py +0 -0
  21. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/honest_did.py +0 -0
  22. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/imputation.py +0 -0
  23. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/imputation_bootstrap.py +0 -0
  24. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/imputation_results.py +0 -0
  25. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/linalg.py +0 -0
  26. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/power.py +0 -0
  27. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/pretrends.py +0 -0
  28. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/results.py +0 -0
  29. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/stacked_did.py +0 -0
  30. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/stacked_did_results.py +0 -0
  31. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered_aggregation.py +0 -0
  32. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/staggered_results.py +0 -0
  33. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/synthetic_did.py +0 -0
  34. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/triple_diff.py +0 -0
  35. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/trop.py +0 -0
  36. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/trop_results.py +0 -0
  37. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/twfe.py +0 -0
  38. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/two_stage.py +0 -0
  39. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/two_stage_bootstrap.py +0 -0
  40. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/two_stage_results.py +0 -0
  41. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/utils.py +0 -0
  42. {diff_diff-2.5.0 → diff_diff-2.6.1}/diff_diff/visualization.py +0 -0
  43. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/build.rs +0 -0
  44. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/bootstrap.rs +0 -0
  45. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/lib.rs +0 -0
  46. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/linalg.rs +0 -0
  47. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/trop.rs +0 -0
  48. {diff_diff-2.5.0 → diff_diff-2.6.1}/rust/src/weights.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 2.5.0
3
+ Version: 2.6.1
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -138,6 +138,9 @@ We provide Jupyter notebook tutorials in `docs/tutorials/`:
138
138
  | `08_triple_diff.ipynb` | Triple Difference (DDD) estimation with proper covariate handling |
139
139
  | `09_real_world_examples.ipynb` | Real-world data examples (Card-Krueger, Castle Doctrine, Divorce Laws) |
140
140
  | `10_trop.ipynb` | Triply Robust Panel (TROP) estimation with factor model adjustment |
141
+ | `11_imputation_did.ipynb` | Imputation DiD (Borusyak et al. 2024), pre-trend test, efficiency comparison |
142
+ | `12_two_stage_did.ipynb` | Two-Stage DiD (Gardner 2022), GMM sandwich variance, per-observation effects |
143
+ | `13_stacked_did.ipynb` | Stacked DiD (Wing et al. 2024), Q-weights, sub-experiment inspection, trimming, clean control definitions |
141
144
 
142
145
  ## Data Preparation
143
146
 
@@ -100,6 +100,9 @@ We provide Jupyter notebook tutorials in `docs/tutorials/`:
100
100
  | `08_triple_diff.ipynb` | Triple Difference (DDD) estimation with proper covariate handling |
101
101
  | `09_real_world_examples.ipynb` | Real-world data examples (Card-Krueger, Castle Doctrine, Divorce Laws) |
102
102
  | `10_trop.ipynb` | Triply Robust Panel (TROP) estimation with factor model adjustment |
103
+ | `11_imputation_did.ipynb` | Imputation DiD (Borusyak et al. 2024), pre-trend test, efficiency comparison |
104
+ | `12_two_stage_did.ipynb` | Two-Stage DiD (Gardner 2022), GMM sandwich variance, per-observation effects |
105
+ | `13_stacked_did.ipynb` | Stacked DiD (Wing et al. 2024), Q-weights, sub-experiment inspection, trimming, clean control definitions |
103
106
 
104
107
  ## Data Preparation
105
108
 
@@ -70,6 +70,7 @@ from diff_diff.prep import (
70
70
  aggregate_to_cohorts,
71
71
  balance_panel,
72
72
  create_event_time,
73
+ generate_continuous_did_data,
73
74
  generate_did_data,
74
75
  generate_ddd_data,
75
76
  generate_event_study_data,
@@ -122,6 +123,11 @@ from diff_diff.triple_diff import (
122
123
  TripleDifferenceResults,
123
124
  triple_difference,
124
125
  )
126
+ from diff_diff.continuous_did import (
127
+ ContinuousDiD,
128
+ ContinuousDiDResults,
129
+ DoseResponseCurve,
130
+ )
125
131
  from diff_diff.trop import (
126
132
  TROP,
127
133
  TROPResults,
@@ -153,7 +159,21 @@ from diff_diff.datasets import (
153
159
  load_mpdta,
154
160
  )
155
161
 
156
- __version__ = "2.5.0"
162
+ # Estimator aliases — short names for convenience
163
+ DiD = DifferenceInDifferences
164
+ TWFE = TwoWayFixedEffects
165
+ EventStudy = MultiPeriodDiD
166
+ SDiD = SyntheticDiD
167
+ CS = CallawaySantAnna
168
+ CDiD = ContinuousDiD
169
+ SA = SunAbraham
170
+ BJS = ImputationDiD
171
+ Gardner = TwoStageDiD
172
+ DDD = TripleDifference
173
+ Stacked = StackedDiD
174
+ Bacon = BaconDecomposition
175
+
176
+ __version__ = "2.6.1"
157
177
  __all__ = [
158
178
  # Estimators
159
179
  "DifferenceInDifferences",
@@ -161,12 +181,26 @@ __all__ = [
161
181
  "MultiPeriodDiD",
162
182
  "SyntheticDiD",
163
183
  "CallawaySantAnna",
184
+ "ContinuousDiD",
164
185
  "SunAbraham",
165
186
  "ImputationDiD",
166
187
  "TwoStageDiD",
167
188
  "TripleDifference",
168
189
  "TROP",
169
190
  "StackedDiD",
191
+ # Estimator aliases (short names)
192
+ "DiD",
193
+ "TWFE",
194
+ "EventStudy",
195
+ "SDiD",
196
+ "CS",
197
+ "CDiD",
198
+ "SA",
199
+ "BJS",
200
+ "Gardner",
201
+ "DDD",
202
+ "Stacked",
203
+ "Bacon",
170
204
  # Bacon Decomposition
171
205
  "BaconDecomposition",
172
206
  "BaconDecompositionResults",
@@ -181,6 +215,8 @@ __all__ = [
181
215
  "CallawaySantAnnaResults",
182
216
  "CSBootstrapResults",
183
217
  "GroupTimeEffect",
218
+ "ContinuousDiDResults",
219
+ "DoseResponseCurve",
184
220
  "SunAbrahamResults",
185
221
  "SABootstrapResults",
186
222
  "ImputationDiDResults",
@@ -228,6 +264,7 @@ __all__ = [
228
264
  "generate_ddd_data",
229
265
  "generate_panel_data",
230
266
  "generate_event_study_data",
267
+ "generate_continuous_did_data",
231
268
  "create_event_time",
232
269
  "aggregate_to_cohorts",
233
270
  "rank_control_units",
@@ -0,0 +1,279 @@
1
+ """
2
+ Shared bootstrap utilities for multiplier bootstrap inference.
3
+
4
+ Provides weight generation, percentile CI, and p-value helpers used by
5
+ both CallawaySantAnna and ContinuousDiD estimators.
6
+ """
7
+
8
+ import warnings
9
+ from typing import Optional, Tuple
10
+
11
+ import numpy as np
12
+
13
+ from diff_diff._backend import HAS_RUST_BACKEND, _rust_bootstrap_weights
14
+
15
+ __all__ = [
16
+ "generate_bootstrap_weights",
17
+ "generate_bootstrap_weights_batch",
18
+ "generate_bootstrap_weights_batch_numpy",
19
+ "compute_percentile_ci",
20
+ "compute_bootstrap_pvalue",
21
+ "compute_effect_bootstrap_stats",
22
+ ]
23
+
24
+
25
+ def generate_bootstrap_weights(
26
+ n_units: int,
27
+ weight_type: str,
28
+ rng: np.random.Generator,
29
+ ) -> np.ndarray:
30
+ """
31
+ Generate bootstrap weights for multiplier bootstrap.
32
+
33
+ Parameters
34
+ ----------
35
+ n_units : int
36
+ Number of units (clusters) to generate weights for.
37
+ weight_type : str
38
+ Type of weights: "rademacher", "mammen", or "webb".
39
+ rng : np.random.Generator
40
+ Random number generator.
41
+
42
+ Returns
43
+ -------
44
+ np.ndarray
45
+ Array of bootstrap weights with shape (n_units,).
46
+ """
47
+ if weight_type == "rademacher":
48
+ return rng.choice([-1.0, 1.0], size=n_units)
49
+ elif weight_type == "mammen":
50
+ sqrt5 = np.sqrt(5)
51
+ val1 = -(sqrt5 - 1) / 2
52
+ val2 = (sqrt5 + 1) / 2
53
+ p1 = (sqrt5 + 1) / (2 * sqrt5)
54
+ return rng.choice([val1, val2], size=n_units, p=[p1, 1 - p1])
55
+ elif weight_type == "webb":
56
+ values = np.array([
57
+ -np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
58
+ np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
59
+ ])
60
+ return rng.choice(values, size=n_units)
61
+ else:
62
+ raise ValueError(
63
+ f"weight_type must be 'rademacher', 'mammen', or 'webb', "
64
+ f"got '{weight_type}'"
65
+ )
66
+
67
+
68
+ def generate_bootstrap_weights_batch(
69
+ n_bootstrap: int,
70
+ n_units: int,
71
+ weight_type: str,
72
+ rng: np.random.Generator,
73
+ ) -> np.ndarray:
74
+ """
75
+ Generate all bootstrap weights at once (vectorized).
76
+
77
+ Uses Rust backend if available for parallel generation.
78
+
79
+ Parameters
80
+ ----------
81
+ n_bootstrap : int
82
+ Number of bootstrap iterations.
83
+ n_units : int
84
+ Number of units (clusters) to generate weights for.
85
+ weight_type : str
86
+ Type of weights: "rademacher", "mammen", or "webb".
87
+ rng : np.random.Generator
88
+ Random number generator.
89
+
90
+ Returns
91
+ -------
92
+ np.ndarray
93
+ Array of bootstrap weights with shape (n_bootstrap, n_units).
94
+ """
95
+ if HAS_RUST_BACKEND and _rust_bootstrap_weights is not None:
96
+ seed = rng.integers(0, 2**63 - 1)
97
+ return _rust_bootstrap_weights(n_bootstrap, n_units, weight_type, seed)
98
+ return generate_bootstrap_weights_batch_numpy(n_bootstrap, n_units, weight_type, rng)
99
+
100
+
101
+ def generate_bootstrap_weights_batch_numpy(
102
+ n_bootstrap: int,
103
+ n_units: int,
104
+ weight_type: str,
105
+ rng: np.random.Generator,
106
+ ) -> np.ndarray:
107
+ """
108
+ NumPy fallback implementation of :func:`generate_bootstrap_weights_batch`.
109
+
110
+ Parameters
111
+ ----------
112
+ n_bootstrap : int
113
+ Number of bootstrap iterations.
114
+ n_units : int
115
+ Number of units (clusters) to generate weights for.
116
+ weight_type : str
117
+ Type of weights: "rademacher", "mammen", or "webb".
118
+ rng : np.random.Generator
119
+ Random number generator.
120
+
121
+ Returns
122
+ -------
123
+ np.ndarray
124
+ Array of bootstrap weights with shape (n_bootstrap, n_units).
125
+ """
126
+ if weight_type == "rademacher":
127
+ return rng.choice([-1.0, 1.0], size=(n_bootstrap, n_units))
128
+ elif weight_type == "mammen":
129
+ sqrt5 = np.sqrt(5)
130
+ val1 = -(sqrt5 - 1) / 2
131
+ val2 = (sqrt5 + 1) / 2
132
+ p1 = (sqrt5 + 1) / (2 * sqrt5)
133
+ return rng.choice([val1, val2], size=(n_bootstrap, n_units), p=[p1, 1 - p1])
134
+ elif weight_type == "webb":
135
+ values = np.array([
136
+ -np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
137
+ np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
138
+ ])
139
+ return rng.choice(values, size=(n_bootstrap, n_units))
140
+ else:
141
+ raise ValueError(
142
+ f"weight_type must be 'rademacher', 'mammen', or 'webb', "
143
+ f"got '{weight_type}'"
144
+ )
145
+
146
+
147
+ def compute_percentile_ci(
148
+ boot_dist: np.ndarray,
149
+ alpha: float,
150
+ ) -> Tuple[float, float]:
151
+ """
152
+ Compute percentile confidence interval from bootstrap distribution.
153
+
154
+ Parameters
155
+ ----------
156
+ boot_dist : np.ndarray
157
+ Bootstrap distribution (1-D array).
158
+ alpha : float
159
+ Significance level (e.g., 0.05 for 95% CI).
160
+
161
+ Returns
162
+ -------
163
+ tuple of float
164
+ ``(lower, upper)`` confidence interval bounds.
165
+ """
166
+ lower = float(np.percentile(boot_dist, alpha / 2 * 100))
167
+ upper = float(np.percentile(boot_dist, (1 - alpha / 2) * 100))
168
+ return (lower, upper)
169
+
170
+
171
+ def compute_bootstrap_pvalue(
172
+ original_effect: float,
173
+ boot_dist: np.ndarray,
174
+ n_valid: Optional[int] = None,
175
+ ) -> float:
176
+ """
177
+ Compute two-sided bootstrap p-value using the percentile method.
178
+
179
+ Parameters
180
+ ----------
181
+ original_effect : float
182
+ Original point estimate.
183
+ boot_dist : np.ndarray
184
+ Bootstrap distribution of the effect.
185
+ n_valid : int, optional
186
+ Number of valid bootstrap samples for p-value floor.
187
+ If None, uses ``len(boot_dist)``.
188
+
189
+ Returns
190
+ -------
191
+ float
192
+ Two-sided bootstrap p-value.
193
+ """
194
+ if original_effect >= 0:
195
+ p_one_sided = np.mean(boot_dist <= 0)
196
+ else:
197
+ p_one_sided = np.mean(boot_dist >= 0)
198
+
199
+ p_value = min(2 * p_one_sided, 1.0)
200
+ n_for_floor = n_valid if n_valid is not None else len(boot_dist)
201
+ p_value = max(p_value, 1 / (n_for_floor + 1))
202
+ return float(p_value)
203
+
204
+
205
+ def compute_effect_bootstrap_stats(
206
+ original_effect: float,
207
+ boot_dist: np.ndarray,
208
+ alpha: float = 0.05,
209
+ context: str = "bootstrap distribution",
210
+ ) -> Tuple[float, Tuple[float, float], float]:
211
+ """
212
+ Compute bootstrap statistics for a single effect.
213
+
214
+ Filters non-finite samples, returning NaN for all statistics if
215
+ fewer than 50% of samples are valid.
216
+
217
+ Parameters
218
+ ----------
219
+ original_effect : float
220
+ Original point estimate.
221
+ boot_dist : np.ndarray
222
+ Bootstrap distribution of the effect.
223
+ alpha : float, default=0.05
224
+ Significance level.
225
+ context : str, optional
226
+ Description for warning messages.
227
+
228
+ Returns
229
+ -------
230
+ se : float
231
+ Bootstrap standard error.
232
+ ci : tuple of float
233
+ Percentile confidence interval.
234
+ p_value : float
235
+ Bootstrap p-value.
236
+ """
237
+ if not np.isfinite(original_effect):
238
+ return np.nan, (np.nan, np.nan), np.nan
239
+
240
+ finite_mask = np.isfinite(boot_dist)
241
+ n_valid = np.sum(finite_mask)
242
+ n_total = len(boot_dist)
243
+
244
+ if n_valid < n_total:
245
+ n_nonfinite = n_total - n_valid
246
+ warnings.warn(
247
+ f"Dropping {n_nonfinite}/{n_total} non-finite bootstrap samples "
248
+ f"in {context}. Bootstrap estimates based on remaining valid samples.",
249
+ RuntimeWarning,
250
+ stacklevel=3,
251
+ )
252
+
253
+ if n_valid < n_total * 0.5:
254
+ warnings.warn(
255
+ f"Too few valid bootstrap samples ({n_valid}/{n_total}) in {context}. "
256
+ "Returning NaN for SE/CI/p-value to signal invalid inference.",
257
+ RuntimeWarning,
258
+ stacklevel=3,
259
+ )
260
+ return np.nan, (np.nan, np.nan), np.nan
261
+
262
+ valid_dist = boot_dist[finite_mask]
263
+ se = float(np.std(valid_dist, ddof=1))
264
+
265
+ # Guard: if SE is not finite or zero, all inference fields must be NaN.
266
+ if not np.isfinite(se) or se <= 0:
267
+ warnings.warn(
268
+ f"Bootstrap SE is non-finite or zero (n_valid={n_valid}) in {context}. "
269
+ "Returning NaN for SE/CI/p-value.",
270
+ RuntimeWarning,
271
+ stacklevel=3,
272
+ )
273
+ return np.nan, (np.nan, np.nan), np.nan
274
+
275
+ ci = compute_percentile_ci(valid_dist, alpha)
276
+ p_value = compute_bootstrap_pvalue(
277
+ original_effect, valid_dist, n_valid=len(valid_dist)
278
+ )
279
+ return se, ci, p_value