diff-diff 3.0.1__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diff_diff/__init__.py +382 -0
- diff_diff/_backend.py +134 -0
- diff_diff/_rust_backend.cp314-win_amd64.pyd +0 -0
- diff_diff/bacon.py +1140 -0
- diff_diff/bootstrap_utils.py +730 -0
- diff_diff/continuous_did.py +1626 -0
- diff_diff/continuous_did_bspline.py +190 -0
- diff_diff/continuous_did_results.py +374 -0
- diff_diff/datasets.py +815 -0
- diff_diff/diagnostics.py +882 -0
- diff_diff/efficient_did.py +1770 -0
- diff_diff/efficient_did_bootstrap.py +359 -0
- diff_diff/efficient_did_covariates.py +899 -0
- diff_diff/efficient_did_results.py +368 -0
- diff_diff/efficient_did_weights.py +617 -0
- diff_diff/estimators.py +1501 -0
- diff_diff/honest_did.py +2585 -0
- diff_diff/imputation.py +2458 -0
- diff_diff/imputation_bootstrap.py +418 -0
- diff_diff/imputation_results.py +448 -0
- diff_diff/linalg.py +2538 -0
- diff_diff/power.py +2588 -0
- diff_diff/practitioner.py +869 -0
- diff_diff/prep.py +1738 -0
- diff_diff/prep_dgp.py +1718 -0
- diff_diff/pretrends.py +1105 -0
- diff_diff/results.py +918 -0
- diff_diff/stacked_did.py +1049 -0
- diff_diff/stacked_did_results.py +339 -0
- diff_diff/staggered.py +3895 -0
- diff_diff/staggered_aggregation.py +864 -0
- diff_diff/staggered_bootstrap.py +752 -0
- diff_diff/staggered_results.py +416 -0
- diff_diff/staggered_triple_diff.py +1545 -0
- diff_diff/staggered_triple_diff_results.py +416 -0
- diff_diff/sun_abraham.py +1685 -0
- diff_diff/survey.py +1981 -0
- diff_diff/synthetic_did.py +1136 -0
- diff_diff/triple_diff.py +2047 -0
- diff_diff/trop.py +952 -0
- diff_diff/trop_global.py +1270 -0
- diff_diff/trop_local.py +1307 -0
- diff_diff/trop_results.py +356 -0
- diff_diff/twfe.py +542 -0
- diff_diff/two_stage.py +1952 -0
- diff_diff/two_stage_bootstrap.py +520 -0
- diff_diff/two_stage_results.py +400 -0
- diff_diff/utils.py +1902 -0
- diff_diff/visualization/__init__.py +61 -0
- diff_diff/visualization/_common.py +328 -0
- diff_diff/visualization/_continuous.py +274 -0
- diff_diff/visualization/_diagnostic.py +817 -0
- diff_diff/visualization/_event_study.py +1086 -0
- diff_diff/visualization/_power.py +661 -0
- diff_diff/visualization/_staggered.py +833 -0
- diff_diff/visualization/_synthetic.py +197 -0
- diff_diff/wooldridge.py +1285 -0
- diff_diff/wooldridge_results.py +349 -0
- diff_diff-3.0.1.dist-info/METADATA +2997 -0
- diff_diff-3.0.1.dist-info/RECORD +62 -0
- diff_diff-3.0.1.dist-info/WHEEL +4 -0
- diff_diff-3.0.1.dist-info/sboms/diff_diff_rust.cyclonedx.json +5843 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Result containers for the Triply Robust Panel (TROP) estimator.
|
|
3
|
+
|
|
4
|
+
This module contains the TROPResults dataclass, _PrecomputedStructures TypedDict,
|
|
5
|
+
and _LAMBDA_INF sentinel value. Extracted from trop.py for module size management.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from typing import TypedDict
|
|
16
|
+
except ImportError:
|
|
17
|
+
from typing_extensions import TypedDict
|
|
18
|
+
|
|
19
|
+
from diff_diff.results import _format_survey_block, _get_significance_stars
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"_LAMBDA_INF",
|
|
23
|
+
"_PrecomputedStructures",
|
|
24
|
+
"TROPResults",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Sentinel value for "disabled" λ_nn in LOOCV parameter search.
|
|
29
|
+
# Per paper's footnote 2: λ_nn=∞ disables the factor model (L=0).
|
|
30
|
+
# For λ_time and λ_unit, 0.0 means disabled (uniform weights) per Eq. 3:
|
|
31
|
+
# exp(-0 × dist) = 1 for all distances.
|
|
32
|
+
_LAMBDA_INF: float = float("inf")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class _PrecomputedStructures(TypedDict):
|
|
36
|
+
"""Type definition for pre-computed structures used across LOOCV iterations.
|
|
37
|
+
|
|
38
|
+
These structures are computed once in `_precompute_structures()` and reused
|
|
39
|
+
to avoid redundant computation during LOOCV and final estimation.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
unit_dist_matrix: np.ndarray
|
|
43
|
+
"""Pairwise unit distance matrix (n_units x n_units)."""
|
|
44
|
+
time_dist_matrix: np.ndarray
|
|
45
|
+
"""Time distance matrix where [t, s] = |t - s| (n_periods x n_periods)."""
|
|
46
|
+
control_mask: np.ndarray
|
|
47
|
+
"""Boolean mask for control observations (D == 0)."""
|
|
48
|
+
treated_mask: np.ndarray
|
|
49
|
+
"""Boolean mask for treated observations (D == 1)."""
|
|
50
|
+
treated_observations: List[Tuple[int, int]]
|
|
51
|
+
"""List of (t, i) tuples for treated observations."""
|
|
52
|
+
control_obs: List[Tuple[int, int]]
|
|
53
|
+
"""List of (t, i) tuples for valid control observations."""
|
|
54
|
+
control_unit_idx: np.ndarray
|
|
55
|
+
"""Array of never-treated unit indices (for backward compatibility)."""
|
|
56
|
+
D: np.ndarray
|
|
57
|
+
"""Treatment indicator matrix (n_periods x n_units) for dynamic control sets."""
|
|
58
|
+
Y: np.ndarray
|
|
59
|
+
"""Outcome matrix (n_periods x n_units)."""
|
|
60
|
+
n_units: int
|
|
61
|
+
"""Number of units."""
|
|
62
|
+
n_periods: int
|
|
63
|
+
"""Number of time periods."""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class TROPResults:
|
|
68
|
+
"""
|
|
69
|
+
Results from a Triply Robust Panel (TROP) estimation.
|
|
70
|
+
|
|
71
|
+
TROP combines nuclear norm regularized factor estimation with
|
|
72
|
+
exponential distance-based unit weights and time decay weights.
|
|
73
|
+
|
|
74
|
+
Attributes
|
|
75
|
+
----------
|
|
76
|
+
att : float
|
|
77
|
+
Average Treatment effect on the Treated (ATT).
|
|
78
|
+
se : float
|
|
79
|
+
Standard error of the ATT estimate.
|
|
80
|
+
t_stat : float
|
|
81
|
+
T-statistic for the ATT estimate.
|
|
82
|
+
p_value : float
|
|
83
|
+
P-value for the null hypothesis that ATT = 0.
|
|
84
|
+
conf_int : tuple[float, float]
|
|
85
|
+
Confidence interval for the ATT.
|
|
86
|
+
n_obs : int
|
|
87
|
+
Number of observations used in estimation.
|
|
88
|
+
n_treated : int
|
|
89
|
+
Number of treated units.
|
|
90
|
+
n_control : int
|
|
91
|
+
Number of control units.
|
|
92
|
+
n_treated_obs : int
|
|
93
|
+
Number of treated unit-time observations.
|
|
94
|
+
unit_effects : dict
|
|
95
|
+
Estimated unit fixed effects (alpha_i).
|
|
96
|
+
time_effects : dict
|
|
97
|
+
Estimated time fixed effects (beta_t).
|
|
98
|
+
treatment_effects : dict
|
|
99
|
+
Individual treatment effects for each treated (unit, time) pair.
|
|
100
|
+
lambda_time : float
|
|
101
|
+
Selected time weight decay parameter from grid. 0.0 = uniform time
|
|
102
|
+
weights (disabled) per Eq. 3.
|
|
103
|
+
lambda_unit : float
|
|
104
|
+
Selected unit weight decay parameter from grid. 0.0 = uniform unit
|
|
105
|
+
weights (disabled) per Eq. 3.
|
|
106
|
+
lambda_nn : float
|
|
107
|
+
Selected nuclear norm regularization parameter from grid. inf = factor
|
|
108
|
+
model disabled (L=0); converted to 1e10 internally for computation.
|
|
109
|
+
factor_matrix : np.ndarray
|
|
110
|
+
Estimated low-rank factor matrix L (n_periods x n_units).
|
|
111
|
+
effective_rank : float
|
|
112
|
+
Effective rank of the factor matrix (sum of singular values / max).
|
|
113
|
+
loocv_score : float
|
|
114
|
+
Leave-one-out cross-validation score for selected parameters.
|
|
115
|
+
alpha : float
|
|
116
|
+
Significance level for confidence interval.
|
|
117
|
+
n_pre_periods : int
|
|
118
|
+
Number of pre-treatment periods.
|
|
119
|
+
n_post_periods : int
|
|
120
|
+
Number of post-treatment periods (periods with D=1 observations).
|
|
121
|
+
n_bootstrap : int, optional
|
|
122
|
+
Number of bootstrap replications (if bootstrap variance).
|
|
123
|
+
bootstrap_distribution : np.ndarray, optional
|
|
124
|
+
Bootstrap distribution of estimates.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
att: float
|
|
128
|
+
se: float
|
|
129
|
+
t_stat: float
|
|
130
|
+
p_value: float
|
|
131
|
+
conf_int: Tuple[float, float]
|
|
132
|
+
n_obs: int
|
|
133
|
+
n_treated: int
|
|
134
|
+
n_control: int
|
|
135
|
+
n_treated_obs: int
|
|
136
|
+
unit_effects: Dict[Any, float]
|
|
137
|
+
time_effects: Dict[Any, float]
|
|
138
|
+
treatment_effects: Dict[Tuple[Any, Any], float]
|
|
139
|
+
lambda_time: float
|
|
140
|
+
lambda_unit: float
|
|
141
|
+
lambda_nn: float
|
|
142
|
+
factor_matrix: np.ndarray
|
|
143
|
+
effective_rank: float
|
|
144
|
+
loocv_score: float
|
|
145
|
+
alpha: float = 0.05
|
|
146
|
+
n_pre_periods: int = 0
|
|
147
|
+
n_post_periods: int = 0
|
|
148
|
+
n_bootstrap: Optional[int] = field(default=None)
|
|
149
|
+
bootstrap_distribution: Optional[np.ndarray] = field(default=None, repr=False)
|
|
150
|
+
# Survey design metadata (SurveyMetadata instance from diff_diff.survey)
|
|
151
|
+
survey_metadata: Optional[Any] = field(default=None)
|
|
152
|
+
|
|
153
|
+
def __repr__(self) -> str:
|
|
154
|
+
"""Concise string representation."""
|
|
155
|
+
sig = _get_significance_stars(self.p_value)
|
|
156
|
+
return (
|
|
157
|
+
f"TROPResults(ATT={self.att:.4f}{sig}, "
|
|
158
|
+
f"SE={self.se:.4f}, "
|
|
159
|
+
f"eff_rank={self.effective_rank:.1f}, "
|
|
160
|
+
f"p={self.p_value:.4f})"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def coef_var(self) -> float:
|
|
165
|
+
"""Coefficient of variation: SE / |ATT|. NaN when ATT is 0 or SE non-finite."""
|
|
166
|
+
if not (np.isfinite(self.se) and self.se >= 0):
|
|
167
|
+
return np.nan
|
|
168
|
+
if not np.isfinite(self.att) or self.att == 0:
|
|
169
|
+
return np.nan
|
|
170
|
+
return self.se / abs(self.att)
|
|
171
|
+
|
|
172
|
+
def summary(self, alpha: Optional[float] = None) -> str:
|
|
173
|
+
"""
|
|
174
|
+
Generate a formatted summary of the estimation results.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
alpha : float, optional
|
|
179
|
+
Significance level for confidence intervals. Defaults to the
|
|
180
|
+
alpha used during estimation.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
str
|
|
185
|
+
Formatted summary table.
|
|
186
|
+
"""
|
|
187
|
+
alpha = alpha or self.alpha
|
|
188
|
+
conf_level = int((1 - alpha) * 100)
|
|
189
|
+
|
|
190
|
+
lines = [
|
|
191
|
+
"=" * 75,
|
|
192
|
+
"Triply Robust Panel (TROP) Estimation Results".center(75),
|
|
193
|
+
"Athey, Imbens, Qu & Viviano (2025)".center(75),
|
|
194
|
+
"=" * 75,
|
|
195
|
+
"",
|
|
196
|
+
f"{'Observations:':<25} {self.n_obs:>10}",
|
|
197
|
+
f"{'Treated units:':<25} {self.n_treated:>10}",
|
|
198
|
+
f"{'Control units:':<25} {self.n_control:>10}",
|
|
199
|
+
f"{'Treated observations:':<25} {self.n_treated_obs:>10}",
|
|
200
|
+
f"{'Pre-treatment periods:':<25} {self.n_pre_periods:>10}",
|
|
201
|
+
f"{'Post-treatment periods:':<25} {self.n_post_periods:>10}",
|
|
202
|
+
"",
|
|
203
|
+
"-" * 75,
|
|
204
|
+
"Tuning Parameters (selected via LOOCV)".center(75),
|
|
205
|
+
"-" * 75,
|
|
206
|
+
f"{'Lambda (time decay):':<25} {self.lambda_time:>10.4f}",
|
|
207
|
+
f"{'Lambda (unit distance):':<25} {self.lambda_unit:>10.4f}",
|
|
208
|
+
f"{'Lambda (nuclear norm):':<25} {self.lambda_nn:>10.4f}",
|
|
209
|
+
f"{'Effective rank:':<25} {self.effective_rank:>10.2f}",
|
|
210
|
+
f"{'LOOCV score:':<25} {self.loocv_score:>10.6f}",
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
# Variance info
|
|
214
|
+
if self.n_bootstrap is not None:
|
|
215
|
+
lines.append(f"{'Bootstrap replications:':<25} {self.n_bootstrap:>10}")
|
|
216
|
+
|
|
217
|
+
# Add survey design info
|
|
218
|
+
if self.survey_metadata is not None:
|
|
219
|
+
sm = self.survey_metadata
|
|
220
|
+
lines.extend(_format_survey_block(sm, 75))
|
|
221
|
+
|
|
222
|
+
lines.extend(
|
|
223
|
+
[
|
|
224
|
+
"",
|
|
225
|
+
"-" * 75,
|
|
226
|
+
f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
|
|
227
|
+
f"{'t-stat':>10} {'P>|t|':>10} {'':>5}",
|
|
228
|
+
"-" * 75,
|
|
229
|
+
f"{'ATT':<15} {self.att:>12.4f} {self.se:>12.4f} "
|
|
230
|
+
f"{self.t_stat:>10.3f} {self.p_value:>10.4f} {self.significance_stars:>5}",
|
|
231
|
+
"-" * 75,
|
|
232
|
+
"",
|
|
233
|
+
f"{conf_level}% Confidence Interval: [{self.conf_int[0]:.4f}, {self.conf_int[1]:.4f}]",
|
|
234
|
+
]
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
cv = self.coef_var
|
|
238
|
+
if np.isfinite(cv):
|
|
239
|
+
lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
|
|
240
|
+
|
|
241
|
+
# Add significance codes
|
|
242
|
+
lines.extend(
|
|
243
|
+
[
|
|
244
|
+
"",
|
|
245
|
+
"Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1",
|
|
246
|
+
"=" * 75,
|
|
247
|
+
]
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return "\n".join(lines)
|
|
251
|
+
|
|
252
|
+
def print_summary(self, alpha: Optional[float] = None) -> None:
|
|
253
|
+
"""Print the summary to stdout."""
|
|
254
|
+
print(self.summary(alpha))
|
|
255
|
+
|
|
256
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
257
|
+
"""
|
|
258
|
+
Convert results to a dictionary.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
Dict[str, Any]
|
|
263
|
+
Dictionary containing all estimation results.
|
|
264
|
+
"""
|
|
265
|
+
result = {
|
|
266
|
+
"att": self.att,
|
|
267
|
+
"se": self.se,
|
|
268
|
+
"t_stat": self.t_stat,
|
|
269
|
+
"p_value": self.p_value,
|
|
270
|
+
"conf_int_lower": self.conf_int[0],
|
|
271
|
+
"conf_int_upper": self.conf_int[1],
|
|
272
|
+
"n_obs": self.n_obs,
|
|
273
|
+
"n_treated": self.n_treated,
|
|
274
|
+
"n_control": self.n_control,
|
|
275
|
+
"n_treated_obs": self.n_treated_obs,
|
|
276
|
+
"n_pre_periods": self.n_pre_periods,
|
|
277
|
+
"n_post_periods": self.n_post_periods,
|
|
278
|
+
"lambda_time": self.lambda_time,
|
|
279
|
+
"lambda_unit": self.lambda_unit,
|
|
280
|
+
"lambda_nn": self.lambda_nn,
|
|
281
|
+
"effective_rank": self.effective_rank,
|
|
282
|
+
"loocv_score": self.loocv_score,
|
|
283
|
+
}
|
|
284
|
+
if self.survey_metadata is not None:
|
|
285
|
+
sm = self.survey_metadata
|
|
286
|
+
result["weight_type"] = sm.weight_type
|
|
287
|
+
result["effective_n"] = sm.effective_n
|
|
288
|
+
result["design_effect"] = sm.design_effect
|
|
289
|
+
result["sum_weights"] = sm.sum_weights
|
|
290
|
+
result["n_strata"] = sm.n_strata
|
|
291
|
+
result["n_psu"] = sm.n_psu
|
|
292
|
+
result["df_survey"] = sm.df_survey
|
|
293
|
+
return result
|
|
294
|
+
|
|
295
|
+
def to_dataframe(self) -> pd.DataFrame:
|
|
296
|
+
"""
|
|
297
|
+
Convert results to a pandas DataFrame.
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
pd.DataFrame
|
|
302
|
+
DataFrame with estimation results.
|
|
303
|
+
"""
|
|
304
|
+
return pd.DataFrame([self.to_dict()])
|
|
305
|
+
|
|
306
|
+
def get_treatment_effects_df(self) -> pd.DataFrame:
|
|
307
|
+
"""
|
|
308
|
+
Get individual treatment effects as a DataFrame.
|
|
309
|
+
|
|
310
|
+
Returns
|
|
311
|
+
-------
|
|
312
|
+
pd.DataFrame
|
|
313
|
+
DataFrame with unit, time, and treatment effect columns.
|
|
314
|
+
"""
|
|
315
|
+
return pd.DataFrame(
|
|
316
|
+
[
|
|
317
|
+
{"unit": unit, "time": time, "effect": effect}
|
|
318
|
+
for (unit, time), effect in self.treatment_effects.items()
|
|
319
|
+
]
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
def get_unit_effects_df(self) -> pd.DataFrame:
|
|
323
|
+
"""
|
|
324
|
+
Get unit fixed effects as a DataFrame.
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
pd.DataFrame
|
|
329
|
+
DataFrame with unit and effect columns.
|
|
330
|
+
"""
|
|
331
|
+
return pd.DataFrame(
|
|
332
|
+
[{"unit": unit, "effect": effect} for unit, effect in self.unit_effects.items()]
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
def get_time_effects_df(self) -> pd.DataFrame:
|
|
336
|
+
"""
|
|
337
|
+
Get time fixed effects as a DataFrame.
|
|
338
|
+
|
|
339
|
+
Returns
|
|
340
|
+
-------
|
|
341
|
+
pd.DataFrame
|
|
342
|
+
DataFrame with time and effect columns.
|
|
343
|
+
"""
|
|
344
|
+
return pd.DataFrame(
|
|
345
|
+
[{"time": time, "effect": effect} for time, effect in self.time_effects.items()]
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
@property
|
|
349
|
+
def is_significant(self) -> bool:
|
|
350
|
+
"""Check if the ATT is statistically significant at the alpha level."""
|
|
351
|
+
return bool(self.p_value < self.alpha)
|
|
352
|
+
|
|
353
|
+
@property
|
|
354
|
+
def significance_stars(self) -> str:
|
|
355
|
+
"""Return significance stars based on p-value."""
|
|
356
|
+
return _get_significance_stars(self.p_value)
|