diff-diff 3.1.0__tar.gz → 3.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diff_diff-3.1.0 → diff_diff-3.1.2}/PKG-INFO +28 -15
- {diff_diff-3.1.0 → diff_diff-3.1.2}/README.md +26 -13
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/__init__.py +11 -6
- diff_diff-3.1.2/diff_diff/_guides_api.py +48 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/chaisemartin_dhaultfoeuille.py +658 -104
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/chaisemartin_dhaultfoeuille_results.py +7 -2
- diff_diff-3.1.2/diff_diff/guides/__init__.py +1 -0
- diff_diff-3.1.2/diff_diff/guides/llms-full.txt +1743 -0
- diff_diff-3.1.2/diff_diff/guides/llms-practitioner.txt +562 -0
- diff_diff-3.1.2/diff_diff/guides/llms.txt +116 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/honest_did.py +11 -1
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/imputation.py +7 -1
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/practitioner.py +51 -12
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/results.py +546 -2
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/synthetic_did.py +410 -64
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/two_stage.py +7 -1
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/utils.py +56 -4
- {diff_diff-3.1.0 → diff_diff-3.1.2}/pyproject.toml +5 -2
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/Cargo.lock +11 -11
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/Cargo.toml +1 -1
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/_backend.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/bacon.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/bootstrap_utils.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/continuous_did.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/continuous_did_bspline.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/continuous_did_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/datasets.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/diagnostics.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/efficient_did.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/efficient_did_bootstrap.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/efficient_did_covariates.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/efficient_did_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/efficient_did_weights.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/estimators.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/imputation_bootstrap.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/imputation_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/linalg.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/power.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/prep.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/prep_dgp.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/pretrends.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/stacked_did.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/stacked_did_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/staggered.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/staggered_aggregation.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/staggered_bootstrap.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/staggered_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/staggered_triple_diff.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/staggered_triple_diff_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/sun_abraham.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/survey.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/triple_diff.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/trop.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/trop_global.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/trop_local.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/trop_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/twfe.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/two_stage_bootstrap.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/two_stage_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/__init__.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_common.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_continuous.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_diagnostic.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_event_study.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_power.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_staggered.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/visualization/_synthetic.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/wooldridge.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/diff_diff/wooldridge_results.py +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/build.rs +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/src/bootstrap.rs +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/src/lib.rs +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/src/linalg.rs +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/src/trop.rs +0 -0
- {diff_diff-3.1.0 → diff_diff-3.1.2}/rust/src/weights.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diff-diff
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.2
|
|
4
4
|
Classifier: Development Status :: 5 - Production/Stable
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
46
46
|
Project-URL: Documentation, https://diff-diff.readthedocs.io
|
|
47
47
|
Project-URL: Homepage, https://github.com/igerber/diff-diff
|
|
48
48
|
Project-URL: Issues, https://github.com/igerber/diff-diff/issues
|
|
49
|
-
Project-URL: Practitioner Guide, https://
|
|
49
|
+
Project-URL: Practitioner Guide, https://diff-diff.readthedocs.io/en/stable/llms-practitioner.txt
|
|
50
50
|
Project-URL: Repository, https://github.com/igerber/diff-diff
|
|
51
51
|
|
|
52
52
|
# diff-diff
|
|
@@ -120,11 +120,19 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
120
120
|
|
|
121
121
|
## For AI Agents
|
|
122
122
|
|
|
123
|
-
If you are an AI agent or LLM using this library,
|
|
123
|
+
If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis — not just calling `fit()`, but testing assumptions, running sensitivity analysis, and checking robustness.
|
|
124
124
|
|
|
125
|
-
|
|
125
|
+
```python
|
|
126
|
+
from diff_diff import get_llm_guide
|
|
127
|
+
|
|
128
|
+
get_llm_guide() # concise API reference
|
|
129
|
+
get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
|
|
130
|
+
get_llm_guide("full") # comprehensive documentation
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
The guides are bundled in the wheel, so they are accessible from a `pip install` with no network access required.
|
|
126
134
|
|
|
127
|
-
|
|
135
|
+
After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
|
|
128
136
|
|
|
129
137
|
## For Data Scientists
|
|
130
138
|
|
|
@@ -1156,7 +1164,7 @@ results = stacked_did(
|
|
|
1156
1164
|
|
|
1157
1165
|
### Efficient DiD (Chen, Sant'Anna & Xie 2025)
|
|
1158
1166
|
|
|
1159
|
-
Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega
|
|
1167
|
+
Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs along the **no-covariate path**, producing tighter confidence intervals than standard estimators when the stronger PT-All assumption holds. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega*. A doubly-robust covariate path is also available: it is consistent if either the outcome regression or the sieve propensity ratio is correctly specified, but the linear OLS outcome regression does not generically attain the efficiency bound unless the conditional mean is linear in the covariates.
|
|
1160
1168
|
|
|
1161
1169
|
```python
|
|
1162
1170
|
from diff_diff import EfficientDiD, generate_staggered_data
|
|
@@ -1191,8 +1199,13 @@ EfficientDiD(
|
|
|
1191
1199
|
)
|
|
1192
1200
|
```
|
|
1193
1201
|
|
|
1194
|
-
> **Note:**
|
|
1195
|
-
>
|
|
1202
|
+
> **Note:** EfficientDiD supports covariate adjustment via a doubly-robust path
|
|
1203
|
+
> (sieve-based propensity score ratios and a linear OLS outcome regression).
|
|
1204
|
+
> The DR property gives consistency if either the OR or the PS is correctly
|
|
1205
|
+
> specified, but the OLS working model for the outcome regression does not
|
|
1206
|
+
> generically attain the semiparametric efficiency bound. The unqualified
|
|
1207
|
+
> efficiency-bound claim applies to the no-covariate path only. See the
|
|
1208
|
+
> `covariates` parameter on `fit()` and `docs/methodology/REGISTRY.md`.
|
|
1196
1209
|
|
|
1197
1210
|
**When to use Efficient DiD vs Callaway-Sant'Anna:**
|
|
1198
1211
|
|
|
@@ -1200,15 +1213,15 @@ EfficientDiD(
|
|
|
1200
1213
|
|--------|--------------|-------------------|
|
|
1201
1214
|
| Approach | Optimal EIF-based weighting | Separate 2x2 DiD aggregation |
|
|
1202
1215
|
| PT assumption | PT-All (stronger) or PT-Post | Conditional PT |
|
|
1203
|
-
| Efficiency | Achieves semiparametric bound | Not efficient |
|
|
1204
|
-
| Covariates |
|
|
1216
|
+
| Efficiency | Achieves semiparametric bound on the no-covariate path; DR covariate path is consistent but does not generically attain the bound under a linear OLS outcome regression | Not efficient |
|
|
1217
|
+
| Covariates | Supported (doubly robust, sieve-based PS + linear OLS OR) | Supported (OR, IPW, DR) |
|
|
1205
1218
|
| When to choose | Maximum efficiency, PT-All credible | Covariates needed, weaker PT |
|
|
1206
1219
|
|
|
1207
1220
|
### de Chaisemartin-D'Haultfœuille (dCDH) for Reversible Treatments
|
|
1208
1221
|
|
|
1209
1222
|
`ChaisemartinDHaultfoeuille` (alias `DCDH`) is the only library estimator that handles **non-absorbing (reversible) treatments** — treatment can switch on AND off over time. This is the natural fit for marketing campaigns, seasonal promotions, on/off policy cycles.
|
|
1210
1223
|
|
|
1211
|
-
Ships `DID_M` (= `DID_1` at horizon `l = 1`)
|
|
1224
|
+
Ships `DID_M` (= `DID_1` at horizon `l = 1`), the full multi-horizon event study `DID_l` for `l = 1..L_max` via the `L_max` parameter, residualization-style covariate adjustment (`controls`), group-specific linear trends (`trends_linear`), state-set-specific trends (`trends_nonparam`), heterogeneity testing, non-binary treatment, HonestDiD sensitivity integration on placebos, and survey support via Taylor-series linearization.
|
|
1212
1225
|
|
|
1213
1226
|
```python
|
|
1214
1227
|
from diff_diff import ChaisemartinDHaultfoeuille
|
|
@@ -1264,7 +1277,7 @@ ChaisemartinDHaultfoeuille(
|
|
|
1264
1277
|
| `n_groups_dropped_crossers`, `n_groups_dropped_singleton_baseline` | Filter counts (multi-switch groups dropped before estimation; singleton-baseline groups excluded from variance) |
|
|
1265
1278
|
| `n_groups_dropped_never_switching` | Backwards-compatibility metadata. Never-switching groups participate in the variance via stable-control roles; this field is no longer a filter count. |
|
|
1266
1279
|
|
|
1267
|
-
**Multi-horizon event study** (
|
|
1280
|
+
**Multi-horizon event study** (pass `L_max` to `fit()`):
|
|
1268
1281
|
|
|
1269
1282
|
```python
|
|
1270
1283
|
results = est.fit(data, outcome="outcome", group="group",
|
|
@@ -1303,13 +1316,13 @@ print(f"Fraction of negative weights: {diagnostic.fraction_negative:.3f}")
|
|
|
1303
1316
|
print(f"sigma_fe (sign-flipping threshold): {diagnostic.sigma_fe:.3f}")
|
|
1304
1317
|
```
|
|
1305
1318
|
|
|
1306
|
-
> **Note:** Placebo SE is `NaN` for
|
|
1319
|
+
> **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`) because the per-period aggregation path has no influence-function derivation; the point estimate is meaningful for visual pre-trends inspection. Multi-horizon dynamic placebos `DID^{pl}_l` (`L_max >= 1`) have valid analytical SE via the same cohort-recentered plug-in variance as the positive horizons, with bootstrap SE available when `n_bootstrap > 0`. See `docs/methodology/REGISTRY.md` for the full contract.
|
|
1307
1320
|
|
|
1308
1321
|
> **Note:** By default (`drop_larger_lower=True`), the estimator drops groups whose treatment switches more than once before estimation. This matches R `DIDmultiplegtDYN`'s default and is required for the analytical variance formula to be consistent with the point estimate. Each drop emits an explicit warning.
|
|
1309
1322
|
|
|
1310
|
-
> **Note:**
|
|
1323
|
+
> **Note:** The estimator requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. Late-entry groups (missing the baseline) raise `ValueError`; interior-gap groups are dropped with a warning; terminally-missing groups (early exit / right-censoring) are retained and contribute from their observed periods only. This is a documented deviation from R `DIDmultiplegtDYN`, which supports unbalanced panels - see [`docs/methodology/REGISTRY.md`](docs/methodology/REGISTRY.md) for the rationale, the defensive guards that make terminal missingness safe, and workarounds for unbalanced inputs.
|
|
1311
1324
|
|
|
1312
|
-
> **Note:** Survey design
|
|
1325
|
+
> **Note:** Survey design is supported via Taylor-series linearization on `pweight` with strata / PSU / FPC. Replicate-weight variance and PSU-level bootstrap for dCDH are a planned extension. The `aggregate` parameter still raises `NotImplementedError`.
|
|
1313
1326
|
|
|
1314
1327
|
### Triple Difference (DDD)
|
|
1315
1328
|
|
|
@@ -69,11 +69,19 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
|
|
|
69
69
|
|
|
70
70
|
## For AI Agents
|
|
71
71
|
|
|
72
|
-
If you are an AI agent or LLM using this library,
|
|
72
|
+
If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis — not just calling `fit()`, but testing assumptions, running sensitivity analysis, and checking robustness.
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
```python
|
|
75
|
+
from diff_diff import get_llm_guide
|
|
76
|
+
|
|
77
|
+
get_llm_guide() # concise API reference
|
|
78
|
+
get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
|
|
79
|
+
get_llm_guide("full") # comprehensive documentation
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
The guides are bundled in the wheel, so they are accessible from a `pip install` with no network access required.
|
|
75
83
|
|
|
76
|
-
|
|
84
|
+
After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
|
|
77
85
|
|
|
78
86
|
## For Data Scientists
|
|
79
87
|
|
|
@@ -1105,7 +1113,7 @@ results = stacked_did(
|
|
|
1105
1113
|
|
|
1106
1114
|
### Efficient DiD (Chen, Sant'Anna & Xie 2025)
|
|
1107
1115
|
|
|
1108
|
-
Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega
|
|
1116
|
+
Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs along the **no-covariate path**, producing tighter confidence intervals than standard estimators when the stronger PT-All assumption holds. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega*. A doubly-robust covariate path is also available: it is consistent if either the outcome regression or the sieve propensity ratio is correctly specified, but the linear OLS outcome regression does not generically attain the efficiency bound unless the conditional mean is linear in the covariates.
|
|
1109
1117
|
|
|
1110
1118
|
```python
|
|
1111
1119
|
from diff_diff import EfficientDiD, generate_staggered_data
|
|
@@ -1140,8 +1148,13 @@ EfficientDiD(
|
|
|
1140
1148
|
)
|
|
1141
1149
|
```
|
|
1142
1150
|
|
|
1143
|
-
> **Note:**
|
|
1144
|
-
>
|
|
1151
|
+
> **Note:** EfficientDiD supports covariate adjustment via a doubly-robust path
|
|
1152
|
+
> (sieve-based propensity score ratios and a linear OLS outcome regression).
|
|
1153
|
+
> The DR property gives consistency if either the OR or the PS is correctly
|
|
1154
|
+
> specified, but the OLS working model for the outcome regression does not
|
|
1155
|
+
> generically attain the semiparametric efficiency bound. The unqualified
|
|
1156
|
+
> efficiency-bound claim applies to the no-covariate path only. See the
|
|
1157
|
+
> `covariates` parameter on `fit()` and `docs/methodology/REGISTRY.md`.
|
|
1145
1158
|
|
|
1146
1159
|
**When to use Efficient DiD vs Callaway-Sant'Anna:**
|
|
1147
1160
|
|
|
@@ -1149,15 +1162,15 @@ EfficientDiD(
|
|
|
1149
1162
|
|--------|--------------|-------------------|
|
|
1150
1163
|
| Approach | Optimal EIF-based weighting | Separate 2x2 DiD aggregation |
|
|
1151
1164
|
| PT assumption | PT-All (stronger) or PT-Post | Conditional PT |
|
|
1152
|
-
| Efficiency | Achieves semiparametric bound | Not efficient |
|
|
1153
|
-
| Covariates |
|
|
1165
|
+
| Efficiency | Achieves semiparametric bound on the no-covariate path; DR covariate path is consistent but does not generically attain the bound under a linear OLS outcome regression | Not efficient |
|
|
1166
|
+
| Covariates | Supported (doubly robust, sieve-based PS + linear OLS OR) | Supported (OR, IPW, DR) |
|
|
1154
1167
|
| When to choose | Maximum efficiency, PT-All credible | Covariates needed, weaker PT |
|
|
1155
1168
|
|
|
1156
1169
|
### de Chaisemartin-D'Haultfœuille (dCDH) for Reversible Treatments
|
|
1157
1170
|
|
|
1158
1171
|
`ChaisemartinDHaultfoeuille` (alias `DCDH`) is the only library estimator that handles **non-absorbing (reversible) treatments** — treatment can switch on AND off over time. This is the natural fit for marketing campaigns, seasonal promotions, on/off policy cycles.
|
|
1159
1172
|
|
|
1160
|
-
Ships `DID_M` (= `DID_1` at horizon `l = 1`)
|
|
1173
|
+
Ships `DID_M` (= `DID_1` at horizon `l = 1`), the full multi-horizon event study `DID_l` for `l = 1..L_max` via the `L_max` parameter, residualization-style covariate adjustment (`controls`), group-specific linear trends (`trends_linear`), state-set-specific trends (`trends_nonparam`), heterogeneity testing, non-binary treatment, HonestDiD sensitivity integration on placebos, and survey support via Taylor-series linearization.
|
|
1161
1174
|
|
|
1162
1175
|
```python
|
|
1163
1176
|
from diff_diff import ChaisemartinDHaultfoeuille
|
|
@@ -1213,7 +1226,7 @@ ChaisemartinDHaultfoeuille(
|
|
|
1213
1226
|
| `n_groups_dropped_crossers`, `n_groups_dropped_singleton_baseline` | Filter counts (multi-switch groups dropped before estimation; singleton-baseline groups excluded from variance) |
|
|
1214
1227
|
| `n_groups_dropped_never_switching` | Backwards-compatibility metadata. Never-switching groups participate in the variance via stable-control roles; this field is no longer a filter count. |
|
|
1215
1228
|
|
|
1216
|
-
**Multi-horizon event study** (
|
|
1229
|
+
**Multi-horizon event study** (pass `L_max` to `fit()`):
|
|
1217
1230
|
|
|
1218
1231
|
```python
|
|
1219
1232
|
results = est.fit(data, outcome="outcome", group="group",
|
|
@@ -1252,13 +1265,13 @@ print(f"Fraction of negative weights: {diagnostic.fraction_negative:.3f}")
|
|
|
1252
1265
|
print(f"sigma_fe (sign-flipping threshold): {diagnostic.sigma_fe:.3f}")
|
|
1253
1266
|
```
|
|
1254
1267
|
|
|
1255
|
-
> **Note:** Placebo SE is `NaN` for
|
|
1268
|
+
> **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`) because the per-period aggregation path has no influence-function derivation; the point estimate is meaningful for visual pre-trends inspection. Multi-horizon dynamic placebos `DID^{pl}_l` (`L_max >= 1`) have valid analytical SE via the same cohort-recentered plug-in variance as the positive horizons, with bootstrap SE available when `n_bootstrap > 0`. See `docs/methodology/REGISTRY.md` for the full contract.
|
|
1256
1269
|
|
|
1257
1270
|
> **Note:** By default (`drop_larger_lower=True`), the estimator drops groups whose treatment switches more than once before estimation. This matches R `DIDmultiplegtDYN`'s default and is required for the analytical variance formula to be consistent with the point estimate. Each drop emits an explicit warning.
|
|
1258
1271
|
|
|
1259
|
-
> **Note:**
|
|
1272
|
+
> **Note:** The estimator requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. Late-entry groups (missing the baseline) raise `ValueError`; interior-gap groups are dropped with a warning; terminally-missing groups (early exit / right-censoring) are retained and contribute from their observed periods only. This is a documented deviation from R `DIDmultiplegtDYN`, which supports unbalanced panels - see [`docs/methodology/REGISTRY.md`](docs/methodology/REGISTRY.md) for the rationale, the defensive guards that make terminal missingness safe, and workarounds for unbalanced inputs.
|
|
1260
1273
|
|
|
1261
|
-
> **Note:** Survey design
|
|
1274
|
+
> **Note:** Survey design is supported via Taylor-series linearization on `pweight` with strata / PSU / FPC. Replicate-weight variance and PSU-level bootstrap for dCDH are a planned extension. The `aggregate` parameter still raises `NotImplementedError`.
|
|
1262
1275
|
|
|
1263
1276
|
### Triple Difference (DDD)
|
|
1264
1277
|
|
|
@@ -4,12 +4,14 @@ diff-diff: A library for Difference-in-Differences analysis.
|
|
|
4
4
|
This library provides sklearn-like estimators for causal inference
|
|
5
5
|
using the difference-in-differences methodology.
|
|
6
6
|
|
|
7
|
-
For rigorous analysis, follow the 8-step practitioner workflow
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
For rigorous analysis, follow the 8-step practitioner workflow based
|
|
8
|
+
on Baker et al. (2025). After estimation, call
|
|
9
|
+
``practitioner_next_steps(results)`` for context-aware guidance on
|
|
10
|
+
remaining diagnostic steps.
|
|
11
11
|
|
|
12
|
-
AI
|
|
12
|
+
AI agents: call ``diff_diff.get_llm_guide()`` for a complete API reference.
|
|
13
|
+
Use ``get_llm_guide("practitioner")`` for the 8-step workflow or
|
|
14
|
+
``get_llm_guide("full")`` for comprehensive documentation.
|
|
13
15
|
"""
|
|
14
16
|
|
|
15
17
|
# Import backend detection from dedicated module (avoids circular imports)
|
|
@@ -200,6 +202,7 @@ from diff_diff.visualization import (
|
|
|
200
202
|
plot_synth_weights,
|
|
201
203
|
)
|
|
202
204
|
from diff_diff.practitioner import practitioner_next_steps
|
|
205
|
+
from diff_diff._guides_api import get_llm_guide
|
|
203
206
|
from diff_diff.datasets import (
|
|
204
207
|
clear_cache,
|
|
205
208
|
list_datasets,
|
|
@@ -228,7 +231,7 @@ EDiD = EfficientDiD
|
|
|
228
231
|
ETWFE = WooldridgeDiD
|
|
229
232
|
DCDH = ChaisemartinDHaultfoeuille
|
|
230
233
|
|
|
231
|
-
__version__ = "3.1.
|
|
234
|
+
__version__ = "3.1.2"
|
|
232
235
|
__all__ = [
|
|
233
236
|
# Estimators
|
|
234
237
|
"DifferenceInDifferences",
|
|
@@ -402,4 +405,6 @@ __all__ = [
|
|
|
402
405
|
"clear_cache",
|
|
403
406
|
# Practitioner guidance
|
|
404
407
|
"practitioner_next_steps",
|
|
408
|
+
# LLM guide accessor
|
|
409
|
+
"get_llm_guide",
|
|
405
410
|
]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Runtime accessor for bundled LLM guide files."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from importlib.resources import files
|
|
5
|
+
|
|
6
|
+
_VARIANT_TO_FILE = {
|
|
7
|
+
"concise": "llms.txt",
|
|
8
|
+
"full": "llms-full.txt",
|
|
9
|
+
"practitioner": "llms-practitioner.txt",
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_llm_guide(variant: str = "concise") -> str:
|
|
14
|
+
"""Return the contents of a bundled LLM guide.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
variant : str, default "concise"
|
|
19
|
+
Which guide to load. Names are case-sensitive. One of:
|
|
20
|
+
|
|
21
|
+
- ``"concise"`` -- compact API reference (llms.txt)
|
|
22
|
+
- ``"full"`` -- complete API documentation (llms-full.txt)
|
|
23
|
+
- ``"practitioner"`` -- 8-step practitioner workflow (llms-practitioner.txt)
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
str
|
|
28
|
+
The full text of the requested guide.
|
|
29
|
+
|
|
30
|
+
Raises
|
|
31
|
+
------
|
|
32
|
+
ValueError
|
|
33
|
+
If ``variant`` is not one of the known guide names.
|
|
34
|
+
|
|
35
|
+
Examples
|
|
36
|
+
--------
|
|
37
|
+
>>> from diff_diff import get_llm_guide
|
|
38
|
+
>>> concise = get_llm_guide()
|
|
39
|
+
>>> workflow = get_llm_guide("practitioner")
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
filename = _VARIANT_TO_FILE[variant]
|
|
43
|
+
except (KeyError, TypeError):
|
|
44
|
+
valid = ", ".join(repr(k) for k in _VARIANT_TO_FILE)
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Unknown guide variant {variant!r}. Valid options: {valid}."
|
|
47
|
+
) from None
|
|
48
|
+
return files("diff_diff.guides").joinpath(filename).read_text(encoding="utf-8")
|