diff-diff 3.1.1__tar.gz → 3.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. diff_diff-3.1.3/LICENSE +21 -0
  2. {diff_diff-3.1.1 → diff_diff-3.1.3}/PKG-INFO +29 -15
  3. {diff_diff-3.1.1 → diff_diff-3.1.3}/README.md +26 -13
  4. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/__init__.py +11 -6
  5. diff_diff-3.1.3/diff_diff/_guides_api.py +48 -0
  6. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/chaisemartin_dhaultfoeuille.py +1107 -116
  7. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +204 -13
  8. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/chaisemartin_dhaultfoeuille_results.py +7 -2
  9. diff_diff-3.1.3/diff_diff/guides/__init__.py +1 -0
  10. diff_diff-3.1.3/diff_diff/guides/llms-full.txt +1743 -0
  11. diff_diff-3.1.3/diff_diff/guides/llms-practitioner.txt +562 -0
  12. diff_diff-3.1.3/diff_diff/guides/llms.txt +116 -0
  13. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/honest_did.py +11 -1
  14. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/imputation.py +19 -3
  15. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/practitioner.py +51 -12
  16. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/results.py +539 -0
  17. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/synthetic_did.py +183 -61
  18. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/trop.py +16 -2
  19. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/trop_global.py +70 -6
  20. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/trop_local.py +50 -2
  21. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/two_stage.py +19 -3
  22. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/two_stage_bootstrap.py +11 -1
  23. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/utils.py +56 -4
  24. {diff_diff-3.1.1 → diff_diff-3.1.3}/pyproject.toml +5 -2
  25. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/Cargo.lock +11 -11
  26. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/Cargo.toml +1 -1
  27. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/_backend.py +0 -0
  28. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/bacon.py +0 -0
  29. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/bootstrap_utils.py +0 -0
  30. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/continuous_did.py +0 -0
  31. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/continuous_did_bspline.py +0 -0
  32. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/continuous_did_results.py +0 -0
  33. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/datasets.py +0 -0
  34. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/diagnostics.py +0 -0
  35. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/efficient_did.py +0 -0
  36. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/efficient_did_bootstrap.py +0 -0
  37. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/efficient_did_covariates.py +0 -0
  38. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/efficient_did_results.py +0 -0
  39. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/efficient_did_weights.py +0 -0
  40. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/estimators.py +0 -0
  41. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/imputation_bootstrap.py +0 -0
  42. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/imputation_results.py +0 -0
  43. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/linalg.py +0 -0
  44. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/power.py +0 -0
  45. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/prep.py +0 -0
  46. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/prep_dgp.py +0 -0
  47. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/pretrends.py +0 -0
  48. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/stacked_did.py +0 -0
  49. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/stacked_did_results.py +0 -0
  50. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/staggered.py +0 -0
  51. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/staggered_aggregation.py +0 -0
  52. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/staggered_bootstrap.py +0 -0
  53. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/staggered_results.py +0 -0
  54. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/staggered_triple_diff.py +0 -0
  55. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/staggered_triple_diff_results.py +0 -0
  56. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/sun_abraham.py +0 -0
  57. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/survey.py +0 -0
  58. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/triple_diff.py +0 -0
  59. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/trop_results.py +0 -0
  60. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/twfe.py +0 -0
  61. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/two_stage_results.py +0 -0
  62. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/__init__.py +0 -0
  63. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_common.py +0 -0
  64. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_continuous.py +0 -0
  65. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_diagnostic.py +0 -0
  66. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_event_study.py +0 -0
  67. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_power.py +0 -0
  68. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_staggered.py +0 -0
  69. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/visualization/_synthetic.py +0 -0
  70. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/wooldridge.py +0 -0
  71. {diff_diff-3.1.1 → diff_diff-3.1.3}/diff_diff/wooldridge_results.py +0 -0
  72. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/build.rs +0 -0
  73. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/src/bootstrap.rs +0 -0
  74. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/src/lib.rs +0 -0
  75. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/src/linalg.rs +0 -0
  76. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/src/trop.rs +0 -0
  77. {diff_diff-3.1.1 → diff_diff-3.1.3}/rust/src/weights.rs +0 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Isaac Gerber
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diff-diff
3
- Version: 3.1.1
3
+ Version: 3.1.3
4
4
  Classifier: Development Status :: 5 - Production/Stable
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Operating System :: OS Independent
@@ -37,6 +37,7 @@ Requires-Dist: plotly>=5.0 ; extra == 'plotly'
37
37
  Provides-Extra: dev
38
38
  Provides-Extra: docs
39
39
  Provides-Extra: plotly
40
+ License-File: LICENSE
40
41
  Summary: Difference-in-Differences causal inference with sklearn-like API. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends.
41
42
  Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects,event-study,staggered-adoption,parallel-trends,synthetic-control,panel-data,did,twfe,callaway-santanna,honest-did,sensitivity-analysis
42
43
  Author: diff-diff contributors
@@ -46,7 +47,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
46
47
  Project-URL: Documentation, https://diff-diff.readthedocs.io
47
48
  Project-URL: Homepage, https://github.com/igerber/diff-diff
48
49
  Project-URL: Issues, https://github.com/igerber/diff-diff/issues
49
- Project-URL: Practitioner Guide, https://github.com/igerber/diff-diff/blob/main/docs/llms-practitioner.txt
50
+ Project-URL: Practitioner Guide, https://diff-diff.readthedocs.io/en/stable/llms-practitioner.txt
50
51
  Project-URL: Repository, https://github.com/igerber/diff-diff
51
52
 
52
53
  # diff-diff
@@ -120,11 +121,19 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
120
121
 
121
122
  ## For AI Agents
122
123
 
123
- If you are an AI agent or LLM using this library, read [`docs/llms.txt`](docs/llms.txt) for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis — not just calling `fit()`, but testing assumptions, running sensitivity analysis, and checking robustness.
124
+ If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis — not just calling `fit()`, but testing assumptions, running sensitivity analysis, and checking robustness.
124
125
 
125
- After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
126
+ ```python
127
+ from diff_diff import get_llm_guide
128
+
129
+ get_llm_guide() # concise API reference
130
+ get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
131
+ get_llm_guide("full") # comprehensive documentation
132
+ ```
133
+
134
+ The guides are bundled in the wheel, so they are accessible from a `pip install` with no network access required.
126
135
 
127
- Detailed guide: [`docs/llms-practitioner.txt`](docs/llms-practitioner.txt)
136
+ After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
128
137
 
129
138
  ## For Data Scientists
130
139
 
@@ -1156,7 +1165,7 @@ results = stacked_did(
1156
1165
 
1157
1166
  ### Efficient DiD (Chen, Sant'Anna & Xie 2025)
1158
1167
 
1159
- Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega*, producing tighter confidence intervals than standard estimators like Callaway-Sant'Anna when the stronger PT-All assumption holds.
1168
+ Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs along the **no-covariate path**, producing tighter confidence intervals than standard estimators when the stronger PT-All assumption holds. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega*. A doubly-robust covariate path is also available: it is consistent if either the outcome regression or the sieve propensity ratio is correctly specified, but the linear OLS outcome regression does not generically attain the efficiency bound unless the conditional mean is linear in the covariates.
1160
1169
 
1161
1170
  ```python
1162
1171
  from diff_diff import EfficientDiD, generate_staggered_data
@@ -1191,8 +1200,13 @@ EfficientDiD(
1191
1200
  )
1192
1201
  ```
1193
1202
 
1194
- > **Note:** Phase 1 supports the no-covariates path only. Use CallawaySantAnna with
1195
- > `estimation_method='dr'` if you need covariate adjustment.
1203
+ > **Note:** EfficientDiD supports covariate adjustment via a doubly-robust path
1204
+ > (sieve-based propensity score ratios and a linear OLS outcome regression).
1205
+ > The DR property gives consistency if either the OR or the PS is correctly
1206
+ > specified, but the OLS working model for the outcome regression does not
1207
+ > generically attain the semiparametric efficiency bound. The unqualified
1208
+ > efficiency-bound claim applies to the no-covariate path only. See the
1209
+ > `covariates` parameter on `fit()` and `docs/methodology/REGISTRY.md`.
1196
1210
 
1197
1211
  **When to use Efficient DiD vs Callaway-Sant'Anna:**
1198
1212
 
@@ -1200,15 +1214,15 @@ EfficientDiD(
1200
1214
  |--------|--------------|-------------------|
1201
1215
  | Approach | Optimal EIF-based weighting | Separate 2x2 DiD aggregation |
1202
1216
  | PT assumption | PT-All (stronger) or PT-Post | Conditional PT |
1203
- | Efficiency | Achieves semiparametric bound | Not efficient |
1204
- | Covariates | Not yet (Phase 2) | Supported (OR, IPW, DR) |
1217
+ | Efficiency | Achieves semiparametric bound on the no-covariate path; DR covariate path is consistent but does not generically attain the bound under a linear OLS outcome regression | Not efficient |
1218
+ | Covariates | Supported (doubly robust, sieve-based PS + linear OLS OR) | Supported (OR, IPW, DR) |
1205
1219
  | When to choose | Maximum efficiency, PT-All credible | Covariates needed, weaker PT |
1206
1220
 
1207
1221
  ### de Chaisemartin-D'Haultfœuille (dCDH) for Reversible Treatments
1208
1222
 
1209
1223
  `ChaisemartinDHaultfoeuille` (alias `DCDH`) is the only library estimator that handles **non-absorbing (reversible) treatments** — treatment can switch on AND off over time. This is the natural fit for marketing campaigns, seasonal promotions, on/off policy cycles.
1210
1224
 
1211
- Ships `DID_M` (= `DID_1` at horizon `l = 1`) plus the full multi-horizon event study `DID_l` for `l = 1..L_max` via the `L_max` parameter. Phase 3 will add covariate adjustment.
1225
+ Ships `DID_M` (= `DID_1` at horizon `l = 1`), the full multi-horizon event study `DID_l` for `l = 1..L_max` via the `L_max` parameter, residualization-style covariate adjustment (`controls`), group-specific linear trends (`trends_linear`), state-set-specific trends (`trends_nonparam`), heterogeneity testing, non-binary treatment, HonestDiD sensitivity integration on placebos, and survey support via Taylor-series linearization.
1212
1226
 
1213
1227
  ```python
1214
1228
  from diff_diff import ChaisemartinDHaultfoeuille
@@ -1264,7 +1278,7 @@ ChaisemartinDHaultfoeuille(
1264
1278
  | `n_groups_dropped_crossers`, `n_groups_dropped_singleton_baseline` | Filter counts (multi-switch groups dropped before estimation; singleton-baseline groups excluded from variance) |
1265
1279
  | `n_groups_dropped_never_switching` | Backwards-compatibility metadata. Never-switching groups participate in the variance via stable-control roles; this field is no longer a filter count. |
1266
1280
 
1267
- **Multi-horizon event study** (Phase 2 - pass `L_max` to `fit()`):
1281
+ **Multi-horizon event study** (pass `L_max` to `fit()`):
1268
1282
 
1269
1283
  ```python
1270
1284
  results = est.fit(data, outcome="outcome", group="group",
@@ -1303,13 +1317,13 @@ print(f"Fraction of negative weights: {diagnostic.fraction_negative:.3f}")
1303
1317
  print(f"sigma_fe (sign-flipping threshold): {diagnostic.sigma_fe:.3f}")
1304
1318
  ```
1305
1319
 
1306
- > **Note:** Placebo SE is `NaN` for both the single-lag `DID_M^pl` and the dynamic placebos `DID^{pl}_l`. The point estimates are meaningful for visual pre-trends inspection; formal placebo inference (influence-function derivation) is deferred to a follow-up. See `REGISTRY.md` for the full contract.
1320
+ > **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`) because the per-period aggregation path has no influence-function derivation; the point estimate is meaningful for visual pre-trends inspection. Multi-horizon dynamic placebos `DID^{pl}_l` (`L_max >= 1`) have valid analytical SE via the same cohort-recentered plug-in variance as the positive horizons, with bootstrap SE available when `n_bootstrap > 0`. See `docs/methodology/REGISTRY.md` for the full contract.
1307
1321
 
1308
1322
  > **Note:** By default (`drop_larger_lower=True`), the estimator drops groups whose treatment switches more than once before estimation. This matches R `DIDmultiplegtDYN`'s default and is required for the analytical variance formula to be consistent with the point estimate. Each drop emits an explicit warning.
1309
1323
 
1310
- > **Note:** Phase 1 requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. Late-entry groups (missing the baseline) raise `ValueError`; interior-gap groups are dropped with a warning; terminally-missing groups (early exit / right-censoring) are retained and contribute from their observed periods only. This is a documented deviation from R `DIDmultiplegtDYN`, which supports unbalanced panels see [`docs/methodology/REGISTRY.md`](docs/methodology/REGISTRY.md) for the rationale, the defensive guards that make terminal missingness safe, and workarounds for unbalanced inputs.
1324
+ > **Note:** The estimator requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. Late-entry groups (missing the baseline) raise `ValueError`; interior-gap groups are dropped with a warning; terminally-missing groups (early exit / right-censoring) are retained and contribute from their observed periods only. This is a documented deviation from R `DIDmultiplegtDYN`, which supports unbalanced panels - see [`docs/methodology/REGISTRY.md`](docs/methodology/REGISTRY.md) for the rationale, the defensive guards that make terminal missingness safe, and workarounds for unbalanced inputs.
1311
1325
 
1312
- > **Note:** Survey design (`survey_design`), covariate adjustment (`controls`), group-specific linear trends (`trends_linear`), and HonestDiD integration (`honest_did`) are not yet supported. They raise `NotImplementedError` with phase pointers - see [`ROADMAP.md`](ROADMAP.md) for the Phase 3 rollout.
1326
+ > **Note:** Survey design is supported via Taylor-series linearization on `pweight` with strata / PSU / FPC. Replicate-weight variance and PSU-level bootstrap for dCDH are a planned extension. The `aggregate` parameter still raises `NotImplementedError`.
1313
1327
 
1314
1328
  ### Triple Difference (DDD)
1315
1329
 
@@ -69,11 +69,19 @@ Signif. codes: '***' 0.001, '**' 0.01, '*' 0.05, '.' 0.1
69
69
 
70
70
  ## For AI Agents
71
71
 
72
- If you are an AI agent or LLM using this library, read [`docs/llms.txt`](docs/llms.txt) for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis — not just calling `fit()`, but testing assumptions, running sensitivity analysis, and checking robustness.
72
+ If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis — not just calling `fit()`, but testing assumptions, running sensitivity analysis, and checking robustness.
73
73
 
74
- After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
74
+ ```python
75
+ from diff_diff import get_llm_guide
76
+
77
+ get_llm_guide() # concise API reference
78
+ get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
79
+ get_llm_guide("full") # comprehensive documentation
80
+ ```
81
+
82
+ The guides are bundled in the wheel, so they are accessible from a `pip install` with no network access required.
75
83
 
76
- Detailed guide: [`docs/llms-practitioner.txt`](docs/llms-practitioner.txt)
84
+ After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
77
85
 
78
86
  ## For Data Scientists
79
87
 
@@ -1105,7 +1113,7 @@ results = stacked_did(
1105
1113
 
1106
1114
  ### Efficient DiD (Chen, Sant'Anna & Xie 2025)
1107
1115
 
1108
- Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega*, producing tighter confidence intervals than standard estimators like Callaway-Sant'Anna when the stronger PT-All assumption holds.
1116
+ Efficient DiD achieves the semiparametric efficiency bound for ATT estimation in staggered adoption designs along the **no-covariate path**, producing tighter confidence intervals than standard estimators when the stronger PT-All assumption holds. It optimally weights across all valid comparison groups and baselines via the inverse covariance matrix Omega*. A doubly-robust covariate path is also available: it is consistent if either the outcome regression or the sieve propensity ratio is correctly specified, but the linear OLS outcome regression does not generically attain the efficiency bound unless the conditional mean is linear in the covariates.
1109
1117
 
1110
1118
  ```python
1111
1119
  from diff_diff import EfficientDiD, generate_staggered_data
@@ -1140,8 +1148,13 @@ EfficientDiD(
1140
1148
  )
1141
1149
  ```
1142
1150
 
1143
- > **Note:** Phase 1 supports the no-covariates path only. Use CallawaySantAnna with
1144
- > `estimation_method='dr'` if you need covariate adjustment.
1151
+ > **Note:** EfficientDiD supports covariate adjustment via a doubly-robust path
1152
+ > (sieve-based propensity score ratios and a linear OLS outcome regression).
1153
+ > The DR property gives consistency if either the OR or the PS is correctly
1154
+ > specified, but the OLS working model for the outcome regression does not
1155
+ > generically attain the semiparametric efficiency bound. The unqualified
1156
+ > efficiency-bound claim applies to the no-covariate path only. See the
1157
+ > `covariates` parameter on `fit()` and `docs/methodology/REGISTRY.md`.
1145
1158
 
1146
1159
  **When to use Efficient DiD vs Callaway-Sant'Anna:**
1147
1160
 
@@ -1149,15 +1162,15 @@ EfficientDiD(
1149
1162
  |--------|--------------|-------------------|
1150
1163
  | Approach | Optimal EIF-based weighting | Separate 2x2 DiD aggregation |
1151
1164
  | PT assumption | PT-All (stronger) or PT-Post | Conditional PT |
1152
- | Efficiency | Achieves semiparametric bound | Not efficient |
1153
- | Covariates | Not yet (Phase 2) | Supported (OR, IPW, DR) |
1165
+ | Efficiency | Achieves semiparametric bound on the no-covariate path; DR covariate path is consistent but does not generically attain the bound under a linear OLS outcome regression | Not efficient |
1166
+ | Covariates | Supported (doubly robust, sieve-based PS + linear OLS OR) | Supported (OR, IPW, DR) |
1154
1167
  | When to choose | Maximum efficiency, PT-All credible | Covariates needed, weaker PT |
1155
1168
 
1156
1169
  ### de Chaisemartin-D'Haultfœuille (dCDH) for Reversible Treatments
1157
1170
 
1158
1171
  `ChaisemartinDHaultfoeuille` (alias `DCDH`) is the only library estimator that handles **non-absorbing (reversible) treatments** — treatment can switch on AND off over time. This is the natural fit for marketing campaigns, seasonal promotions, on/off policy cycles.
1159
1172
 
1160
- Ships `DID_M` (= `DID_1` at horizon `l = 1`) plus the full multi-horizon event study `DID_l` for `l = 1..L_max` via the `L_max` parameter. Phase 3 will add covariate adjustment.
1173
+ Ships `DID_M` (= `DID_1` at horizon `l = 1`), the full multi-horizon event study `DID_l` for `l = 1..L_max` via the `L_max` parameter, residualization-style covariate adjustment (`controls`), group-specific linear trends (`trends_linear`), state-set-specific trends (`trends_nonparam`), heterogeneity testing, non-binary treatment, HonestDiD sensitivity integration on placebos, and survey support via Taylor-series linearization.
1161
1174
 
1162
1175
  ```python
1163
1176
  from diff_diff import ChaisemartinDHaultfoeuille
@@ -1213,7 +1226,7 @@ ChaisemartinDHaultfoeuille(
1213
1226
  | `n_groups_dropped_crossers`, `n_groups_dropped_singleton_baseline` | Filter counts (multi-switch groups dropped before estimation; singleton-baseline groups excluded from variance) |
1214
1227
  | `n_groups_dropped_never_switching` | Backwards-compatibility metadata. Never-switching groups participate in the variance via stable-control roles; this field is no longer a filter count. |
1215
1228
 
1216
- **Multi-horizon event study** (Phase 2 - pass `L_max` to `fit()`):
1229
+ **Multi-horizon event study** (pass `L_max` to `fit()`):
1217
1230
 
1218
1231
  ```python
1219
1232
  results = est.fit(data, outcome="outcome", group="group",
@@ -1252,13 +1265,13 @@ print(f"Fraction of negative weights: {diagnostic.fraction_negative:.3f}")
1252
1265
  print(f"sigma_fe (sign-flipping threshold): {diagnostic.sigma_fe:.3f}")
1253
1266
  ```
1254
1267
 
1255
- > **Note:** Placebo SE is `NaN` for both the single-lag `DID_M^pl` and the dynamic placebos `DID^{pl}_l`. The point estimates are meaningful for visual pre-trends inspection; formal placebo inference (influence-function derivation) is deferred to a follow-up. See `REGISTRY.md` for the full contract.
1268
+ > **Note:** Placebo SE is `NaN` for the single-period `DID_M^pl` (`L_max=None`) because the per-period aggregation path has no influence-function derivation; the point estimate is meaningful for visual pre-trends inspection. Multi-horizon dynamic placebos `DID^{pl}_l` (`L_max >= 1`) have valid analytical SE via the same cohort-recentered plug-in variance as the positive horizons, with bootstrap SE available when `n_bootstrap > 0`. See `docs/methodology/REGISTRY.md` for the full contract.
1256
1269
 
1257
1270
  > **Note:** By default (`drop_larger_lower=True`), the estimator drops groups whose treatment switches more than once before estimation. This matches R `DIDmultiplegtDYN`'s default and is required for the analytical variance formula to be consistent with the point estimate. Each drop emits an explicit warning.
1258
1271
 
1259
- > **Note:** Phase 1 requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. Late-entry groups (missing the baseline) raise `ValueError`; interior-gap groups are dropped with a warning; terminally-missing groups (early exit / right-censoring) are retained and contribute from their observed periods only. This is a documented deviation from R `DIDmultiplegtDYN`, which supports unbalanced panels see [`docs/methodology/REGISTRY.md`](docs/methodology/REGISTRY.md) for the rationale, the defensive guards that make terminal missingness safe, and workarounds for unbalanced inputs.
1272
+ > **Note:** The estimator requires panels with a **balanced baseline** (every group observed at the first global period) and **no interior period gaps**. Late-entry groups (missing the baseline) raise `ValueError`; interior-gap groups are dropped with a warning; terminally-missing groups (early exit / right-censoring) are retained and contribute from their observed periods only. This is a documented deviation from R `DIDmultiplegtDYN`, which supports unbalanced panels - see [`docs/methodology/REGISTRY.md`](docs/methodology/REGISTRY.md) for the rationale, the defensive guards that make terminal missingness safe, and workarounds for unbalanced inputs.
1260
1273
 
1261
- > **Note:** Survey design (`survey_design`), covariate adjustment (`controls`), group-specific linear trends (`trends_linear`), and HonestDiD integration (`honest_did`) are not yet supported. They raise `NotImplementedError` with phase pointers - see [`ROADMAP.md`](ROADMAP.md) for the Phase 3 rollout.
1274
+ > **Note:** Survey design is supported via Taylor-series linearization on `pweight` with strata / PSU / FPC. Replicate-weight variance and PSU-level bootstrap for dCDH are a planned extension. The `aggregate` parameter still raises `NotImplementedError`.
1262
1275
 
1263
1276
  ### Triple Difference (DDD)
1264
1277
 
@@ -4,12 +4,14 @@ diff-diff: A library for Difference-in-Differences analysis.
4
4
  This library provides sklearn-like estimators for causal inference
5
5
  using the difference-in-differences methodology.
6
6
 
7
- For rigorous analysis, follow the 8-step practitioner workflow in
8
- docs/llms-practitioner.txt (based on Baker et al. 2025). After
9
- estimation, call ``practitioner_next_steps(results)`` for context-aware
10
- guidance on remaining diagnostic steps.
7
+ For rigorous analysis, follow the 8-step practitioner workflow based
8
+ on Baker et al. (2025). After estimation, call
9
+ ``practitioner_next_steps(results)`` for context-aware guidance on
10
+ remaining diagnostic steps.
11
11
 
12
- AI agent reference: docs/llms.txt
12
+ AI agents: call ``diff_diff.get_llm_guide()`` for a complete API reference.
13
+ Use ``get_llm_guide("practitioner")`` for the 8-step workflow or
14
+ ``get_llm_guide("full")`` for comprehensive documentation.
13
15
  """
14
16
 
15
17
  # Import backend detection from dedicated module (avoids circular imports)
@@ -200,6 +202,7 @@ from diff_diff.visualization import (
200
202
  plot_synth_weights,
201
203
  )
202
204
  from diff_diff.practitioner import practitioner_next_steps
205
+ from diff_diff._guides_api import get_llm_guide
203
206
  from diff_diff.datasets import (
204
207
  clear_cache,
205
208
  list_datasets,
@@ -228,7 +231,7 @@ EDiD = EfficientDiD
228
231
  ETWFE = WooldridgeDiD
229
232
  DCDH = ChaisemartinDHaultfoeuille
230
233
 
231
- __version__ = "3.1.1"
234
+ __version__ = "3.1.3"
232
235
  __all__ = [
233
236
  # Estimators
234
237
  "DifferenceInDifferences",
@@ -402,4 +405,6 @@ __all__ = [
402
405
  "clear_cache",
403
406
  # Practitioner guidance
404
407
  "practitioner_next_steps",
408
+ # LLM guide accessor
409
+ "get_llm_guide",
405
410
  ]
@@ -0,0 +1,48 @@
1
+ """Runtime accessor for bundled LLM guide files."""
2
+ from __future__ import annotations
3
+
4
+ from importlib.resources import files
5
+
6
+ _VARIANT_TO_FILE = {
7
+ "concise": "llms.txt",
8
+ "full": "llms-full.txt",
9
+ "practitioner": "llms-practitioner.txt",
10
+ }
11
+
12
+
13
+ def get_llm_guide(variant: str = "concise") -> str:
14
+ """Return the contents of a bundled LLM guide.
15
+
16
+ Parameters
17
+ ----------
18
+ variant : str, default "concise"
19
+ Which guide to load. Names are case-sensitive. One of:
20
+
21
+ - ``"concise"`` -- compact API reference (llms.txt)
22
+ - ``"full"`` -- complete API documentation (llms-full.txt)
23
+ - ``"practitioner"`` -- 8-step practitioner workflow (llms-practitioner.txt)
24
+
25
+ Returns
26
+ -------
27
+ str
28
+ The full text of the requested guide.
29
+
30
+ Raises
31
+ ------
32
+ ValueError
33
+ If ``variant`` is not one of the known guide names.
34
+
35
+ Examples
36
+ --------
37
+ >>> from diff_diff import get_llm_guide
38
+ >>> concise = get_llm_guide()
39
+ >>> workflow = get_llm_guide("practitioner")
40
+ """
41
+ try:
42
+ filename = _VARIANT_TO_FILE[variant]
43
+ except (KeyError, TypeError):
44
+ valid = ", ".join(repr(k) for k in _VARIANT_TO_FILE)
45
+ raise ValueError(
46
+ f"Unknown guide variant {variant!r}. Valid options: {valid}."
47
+ ) from None
48
+ return files("diff_diff.guides").joinpath(filename).read_text(encoding="utf-8")