diff-diff 3.0.1__cp314-cp314-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. diff_diff/__init__.py +382 -0
  2. diff_diff/_backend.py +134 -0
  3. diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
  4. diff_diff/bacon.py +1140 -0
  5. diff_diff/bootstrap_utils.py +730 -0
  6. diff_diff/continuous_did.py +1626 -0
  7. diff_diff/continuous_did_bspline.py +190 -0
  8. diff_diff/continuous_did_results.py +374 -0
  9. diff_diff/datasets.py +815 -0
  10. diff_diff/diagnostics.py +882 -0
  11. diff_diff/efficient_did.py +1770 -0
  12. diff_diff/efficient_did_bootstrap.py +359 -0
  13. diff_diff/efficient_did_covariates.py +899 -0
  14. diff_diff/efficient_did_results.py +368 -0
  15. diff_diff/efficient_did_weights.py +617 -0
  16. diff_diff/estimators.py +1501 -0
  17. diff_diff/honest_did.py +2585 -0
  18. diff_diff/imputation.py +2458 -0
  19. diff_diff/imputation_bootstrap.py +418 -0
  20. diff_diff/imputation_results.py +448 -0
  21. diff_diff/linalg.py +2538 -0
  22. diff_diff/power.py +2588 -0
  23. diff_diff/practitioner.py +869 -0
  24. diff_diff/prep.py +1738 -0
  25. diff_diff/prep_dgp.py +1718 -0
  26. diff_diff/pretrends.py +1105 -0
  27. diff_diff/results.py +918 -0
  28. diff_diff/stacked_did.py +1049 -0
  29. diff_diff/stacked_did_results.py +339 -0
  30. diff_diff/staggered.py +3895 -0
  31. diff_diff/staggered_aggregation.py +864 -0
  32. diff_diff/staggered_bootstrap.py +752 -0
  33. diff_diff/staggered_results.py +416 -0
  34. diff_diff/staggered_triple_diff.py +1545 -0
  35. diff_diff/staggered_triple_diff_results.py +416 -0
  36. diff_diff/sun_abraham.py +1685 -0
  37. diff_diff/survey.py +1981 -0
  38. diff_diff/synthetic_did.py +1136 -0
  39. diff_diff/triple_diff.py +2047 -0
  40. diff_diff/trop.py +952 -0
  41. diff_diff/trop_global.py +1270 -0
  42. diff_diff/trop_local.py +1307 -0
  43. diff_diff/trop_results.py +356 -0
  44. diff_diff/twfe.py +542 -0
  45. diff_diff/two_stage.py +1952 -0
  46. diff_diff/two_stage_bootstrap.py +520 -0
  47. diff_diff/two_stage_results.py +400 -0
  48. diff_diff/utils.py +1902 -0
  49. diff_diff/visualization/__init__.py +61 -0
  50. diff_diff/visualization/_common.py +328 -0
  51. diff_diff/visualization/_continuous.py +274 -0
  52. diff_diff/visualization/_diagnostic.py +817 -0
  53. diff_diff/visualization/_event_study.py +1086 -0
  54. diff_diff/visualization/_power.py +661 -0
  55. diff_diff/visualization/_staggered.py +833 -0
  56. diff_diff/visualization/_synthetic.py +197 -0
  57. diff_diff/wooldridge.py +1285 -0
  58. diff_diff/wooldridge_results.py +349 -0
  59. diff_diff-3.0.1.dist-info/METADATA +2997 -0
  60. diff_diff-3.0.1.dist-info/RECORD +62 -0
  61. diff_diff-3.0.1.dist-info/WHEEL +4 -0
  62. diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
@@ -0,0 +1,356 @@
1
+ """
2
+ Result containers for the Triply Robust Panel (TROP) estimator.
3
+
4
+ This module contains the TROPResults dataclass, _PrecomputedStructures TypedDict,
5
+ and _LAMBDA_INF sentinel value. Extracted from trop.py for module size management.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ try:
15
+ from typing import TypedDict
16
+ except ImportError:
17
+ from typing_extensions import TypedDict
18
+
19
+ from diff_diff.results import _format_survey_block, _get_significance_stars
20
+
21
+ __all__ = [
22
+ "_LAMBDA_INF",
23
+ "_PrecomputedStructures",
24
+ "TROPResults",
25
+ ]
26
+
27
+
28
+ # Sentinel value for "disabled" λ_nn in LOOCV parameter search.
29
+ # Per paper's footnote 2: λ_nn=∞ disables the factor model (L=0).
30
+ # For λ_time and λ_unit, 0.0 means disabled (uniform weights) per Eq. 3:
31
+ # exp(-0 × dist) = 1 for all distances.
32
+ _LAMBDA_INF: float = float("inf")
33
+
34
+
35
+ class _PrecomputedStructures(TypedDict):
36
+ """Type definition for pre-computed structures used across LOOCV iterations.
37
+
38
+ These structures are computed once in `_precompute_structures()` and reused
39
+ to avoid redundant computation during LOOCV and final estimation.
40
+ """
41
+
42
+ unit_dist_matrix: np.ndarray
43
+ """Pairwise unit distance matrix (n_units x n_units)."""
44
+ time_dist_matrix: np.ndarray
45
+ """Time distance matrix where [t, s] = |t - s| (n_periods x n_periods)."""
46
+ control_mask: np.ndarray
47
+ """Boolean mask for control observations (D == 0)."""
48
+ treated_mask: np.ndarray
49
+ """Boolean mask for treated observations (D == 1)."""
50
+ treated_observations: List[Tuple[int, int]]
51
+ """List of (t, i) tuples for treated observations."""
52
+ control_obs: List[Tuple[int, int]]
53
+ """List of (t, i) tuples for valid control observations."""
54
+ control_unit_idx: np.ndarray
55
+ """Array of never-treated unit indices (for backward compatibility)."""
56
+ D: np.ndarray
57
+ """Treatment indicator matrix (n_periods x n_units) for dynamic control sets."""
58
+ Y: np.ndarray
59
+ """Outcome matrix (n_periods x n_units)."""
60
+ n_units: int
61
+ """Number of units."""
62
+ n_periods: int
63
+ """Number of time periods."""
64
+
65
+
66
+ @dataclass
67
+ class TROPResults:
68
+ """
69
+ Results from a Triply Robust Panel (TROP) estimation.
70
+
71
+ TROP combines nuclear norm regularized factor estimation with
72
+ exponential distance-based unit weights and time decay weights.
73
+
74
+ Attributes
75
+ ----------
76
+ att : float
77
+ Average Treatment effect on the Treated (ATT).
78
+ se : float
79
+ Standard error of the ATT estimate.
80
+ t_stat : float
81
+ T-statistic for the ATT estimate.
82
+ p_value : float
83
+ P-value for the null hypothesis that ATT = 0.
84
+ conf_int : tuple[float, float]
85
+ Confidence interval for the ATT.
86
+ n_obs : int
87
+ Number of observations used in estimation.
88
+ n_treated : int
89
+ Number of treated units.
90
+ n_control : int
91
+ Number of control units.
92
+ n_treated_obs : int
93
+ Number of treated unit-time observations.
94
+ unit_effects : dict
95
+ Estimated unit fixed effects (alpha_i).
96
+ time_effects : dict
97
+ Estimated time fixed effects (beta_t).
98
+ treatment_effects : dict
99
+ Individual treatment effects for each treated (unit, time) pair.
100
+ lambda_time : float
101
+ Selected time weight decay parameter from grid. 0.0 = uniform time
102
+ weights (disabled) per Eq. 3.
103
+ lambda_unit : float
104
+ Selected unit weight decay parameter from grid. 0.0 = uniform unit
105
+ weights (disabled) per Eq. 3.
106
+ lambda_nn : float
107
+ Selected nuclear norm regularization parameter from grid. inf = factor
108
+ model disabled (L=0); converted to 1e10 internally for computation.
109
+ factor_matrix : np.ndarray
110
+ Estimated low-rank factor matrix L (n_periods x n_units).
111
+ effective_rank : float
112
+ Effective rank of the factor matrix (sum of singular values / max).
113
+ loocv_score : float
114
+ Leave-one-out cross-validation score for selected parameters.
115
+ alpha : float
116
+ Significance level for confidence interval.
117
+ n_pre_periods : int
118
+ Number of pre-treatment periods.
119
+ n_post_periods : int
120
+ Number of post-treatment periods (periods with D=1 observations).
121
+ n_bootstrap : int, optional
122
+ Number of bootstrap replications (if bootstrap variance).
123
+ bootstrap_distribution : np.ndarray, optional
124
+ Bootstrap distribution of estimates.
125
+ """
126
+
127
+ att: float
128
+ se: float
129
+ t_stat: float
130
+ p_value: float
131
+ conf_int: Tuple[float, float]
132
+ n_obs: int
133
+ n_treated: int
134
+ n_control: int
135
+ n_treated_obs: int
136
+ unit_effects: Dict[Any, float]
137
+ time_effects: Dict[Any, float]
138
+ treatment_effects: Dict[Tuple[Any, Any], float]
139
+ lambda_time: float
140
+ lambda_unit: float
141
+ lambda_nn: float
142
+ factor_matrix: np.ndarray
143
+ effective_rank: float
144
+ loocv_score: float
145
+ alpha: float = 0.05
146
+ n_pre_periods: int = 0
147
+ n_post_periods: int = 0
148
+ n_bootstrap: Optional[int] = field(default=None)
149
+ bootstrap_distribution: Optional[np.ndarray] = field(default=None, repr=False)
150
+ # Survey design metadata (SurveyMetadata instance from diff_diff.survey)
151
+ survey_metadata: Optional[Any] = field(default=None)
152
+
153
+ def __repr__(self) -> str:
154
+ """Concise string representation."""
155
+ sig = _get_significance_stars(self.p_value)
156
+ return (
157
+ f"TROPResults(ATT={self.att:.4f}{sig}, "
158
+ f"SE={self.se:.4f}, "
159
+ f"eff_rank={self.effective_rank:.1f}, "
160
+ f"p={self.p_value:.4f})"
161
+ )
162
+
163
+ @property
164
+ def coef_var(self) -> float:
165
+ """Coefficient of variation: SE / |ATT|. NaN when ATT is 0 or SE non-finite."""
166
+ if not (np.isfinite(self.se) and self.se >= 0):
167
+ return np.nan
168
+ if not np.isfinite(self.att) or self.att == 0:
169
+ return np.nan
170
+ return self.se / abs(self.att)
171
+
172
+ def summary(self, alpha: Optional[float] = None) -> str:
173
+ """
174
+ Generate a formatted summary of the estimation results.
175
+
176
+ Parameters
177
+ ----------
178
+ alpha : float, optional
179
+ Significance level for confidence intervals. Defaults to the
180
+ alpha used during estimation.
181
+
182
+ Returns
183
+ -------
184
+ str
185
+ Formatted summary table.
186
+ """
187
+ alpha = alpha or self.alpha
188
+ conf_level = int((1 - alpha) * 100)
189
+
190
+ lines = [
191
+ "=" * 75,
192
+ "Triply Robust Panel (TROP) Estimation Results".center(75),
193
+ "Athey, Imbens, Qu & Viviano (2025)".center(75),
194
+ "=" * 75,
195
+ "",
196
+ f"{'Observations:':<25} {self.n_obs:>10}",
197
+ f"{'Treated units:':<25} {self.n_treated:>10}",
198
+ f"{'Control units:':<25} {self.n_control:>10}",
199
+ f"{'Treated observations:':<25} {self.n_treated_obs:>10}",
200
+ f"{'Pre-treatment periods:':<25} {self.n_pre_periods:>10}",
201
+ f"{'Post-treatment periods:':<25} {self.n_post_periods:>10}",
202
+ "",
203
+ "-" * 75,
204
+ "Tuning Parameters (selected via LOOCV)".center(75),
205
+ "-" * 75,
206
+ f"{'Lambda (time decay):':<25} {self.lambda_time:>10.4f}",
207
+ f"{'Lambda (unit distance):':<25} {self.lambda_unit:>10.4f}",
208
+ f"{'Lambda (nuclear norm):':<25} {self.lambda_nn:>10.4f}",
209
+ f"{'Effective rank:':<25} {self.effective_rank:>10.2f}",
210
+ f"{'LOOCV score:':<25} {self.loocv_score:>10.6f}",
211
+ ]
212
+
213
+ # Variance info
214
+ if self.n_bootstrap is not None:
215
+ lines.append(f"{'Bootstrap replications:':<25} {self.n_bootstrap:>10}")
216
+
217
+ # Add survey design info
218
+ if self.survey_metadata is not None:
219
+ sm = self.survey_metadata
220
+ lines.extend(_format_survey_block(sm, 75))
221
+
222
+ lines.extend(
223
+ [
224
+ "",
225
+ "-" * 75,
226
+ f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
227
+ f"{'t-stat':>10} {'P>|t|':>10} {'':>5}",
228
+ "-" * 75,
229
+ f"{'ATT':<15} {self.att:>12.4f} {self.se:>12.4f} "
230
+ f"{self.t_stat:>10.3f} {self.p_value:>10.4f} {self.significance_stars:>5}",
231
+ "-" * 75,
232
+ "",
233
+ f"{conf_level}% Confidence Interval: [{self.conf_int[0]:.4f}, {self.conf_int[1]:.4f}]",
234
+ ]
235
+ )
236
+
237
+ cv = self.coef_var
238
+ if np.isfinite(cv):
239
+ lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
240
+
241
+ # Add significance codes
242
+ lines.extend(
243
+ [
244
+ "",
245
+ "Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
246
+ "=" * 75,
247
+ ]
248
+ )
249
+
250
+ return "\n".join(lines)
251
+
252
+ def print_summary(self, alpha: Optional[float] = None) -> None:
253
+ """Print the summary to stdout."""
254
+ print(self.summary(alpha))
255
+
256
+ def to_dict(self) -> Dict[str, Any]:
257
+ """
258
+ Convert results to a dictionary.
259
+
260
+ Returns
261
+ -------
262
+ Dict[str, Any]
263
+ Dictionary containing all estimation results.
264
+ """
265
+ result = {
266
+ "att": self.att,
267
+ "se": self.se,
268
+ "t_stat": self.t_stat,
269
+ "p_value": self.p_value,
270
+ "conf_int_lower": self.conf_int[0],
271
+ "conf_int_upper": self.conf_int[1],
272
+ "n_obs": self.n_obs,
273
+ "n_treated": self.n_treated,
274
+ "n_control": self.n_control,
275
+ "n_treated_obs": self.n_treated_obs,
276
+ "n_pre_periods": self.n_pre_periods,
277
+ "n_post_periods": self.n_post_periods,
278
+ "lambda_time": self.lambda_time,
279
+ "lambda_unit": self.lambda_unit,
280
+ "lambda_nn": self.lambda_nn,
281
+ "effective_rank": self.effective_rank,
282
+ "loocv_score": self.loocv_score,
283
+ }
284
+ if self.survey_metadata is not None:
285
+ sm = self.survey_metadata
286
+ result["weight_type"] = sm.weight_type
287
+ result["effective_n"] = sm.effective_n
288
+ result["design_effect"] = sm.design_effect
289
+ result["sum_weights"] = sm.sum_weights
290
+ result["n_strata"] = sm.n_strata
291
+ result["n_psu"] = sm.n_psu
292
+ result["df_survey"] = sm.df_survey
293
+ return result
294
+
295
+ def to_dataframe(self) -> pd.DataFrame:
296
+ """
297
+ Convert results to a pandas DataFrame.
298
+
299
+ Returns
300
+ -------
301
+ pd.DataFrame
302
+ DataFrame with estimation results.
303
+ """
304
+ return pd.DataFrame([self.to_dict()])
305
+
306
+ def get_treatment_effects_df(self) -> pd.DataFrame:
307
+ """
308
+ Get individual treatment effects as a DataFrame.
309
+
310
+ Returns
311
+ -------
312
+ pd.DataFrame
313
+ DataFrame with unit, time, and treatment effect columns.
314
+ """
315
+ return pd.DataFrame(
316
+ [
317
+ {"unit": unit, "time": time, "effect": effect}
318
+ for (unit, time), effect in self.treatment_effects.items()
319
+ ]
320
+ )
321
+
322
+ def get_unit_effects_df(self) -> pd.DataFrame:
323
+ """
324
+ Get unit fixed effects as a DataFrame.
325
+
326
+ Returns
327
+ -------
328
+ pd.DataFrame
329
+ DataFrame with unit and effect columns.
330
+ """
331
+ return pd.DataFrame(
332
+ [{"unit": unit, "effect": effect} for unit, effect in self.unit_effects.items()]
333
+ )
334
+
335
+ def get_time_effects_df(self) -> pd.DataFrame:
336
+ """
337
+ Get time fixed effects as a DataFrame.
338
+
339
+ Returns
340
+ -------
341
+ pd.DataFrame
342
+ DataFrame with time and effect columns.
343
+ """
344
+ return pd.DataFrame(
345
+ [{"time": time, "effect": effect} for time, effect in self.time_effects.items()]
346
+ )
347
+
348
+ @property
349
+ def is_significant(self) -> bool:
350
+ """Check if the ATT is statistically significant at the alpha level."""
351
+ return bool(self.p_value < self.alpha)
352
+
353
+ @property
354
+ def significance_stars(self) -> str:
355
+ """Return significance stars based on p-value."""
356
+ return _get_significance_stars(self.p_value)