diff-diff 3.3.0__tar.gz → 3.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. diff_diff-3.3.1/PKG-INFO +241 -0
  2. diff_diff-3.3.1/README.md +188 -0
  3. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/__init__.py +12 -2
  4. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/chaisemartin_dhaultfoeuille.py +530 -12
  5. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +185 -25
  6. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/chaisemartin_dhaultfoeuille_results.py +221 -26
  7. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/guides/llms-autonomous.txt +464 -23
  8. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/guides/llms-full.txt +1 -1
  9. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/guides/llms-practitioner.txt +18 -4
  10. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/guides/llms.txt +2 -1
  11. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/had.py +122 -50
  12. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/had_pretests.py +1683 -212
  13. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/profile.py +287 -3
  14. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/survey.py +146 -13
  15. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/synthetic_did.py +79 -11
  16. {diff_diff-3.3.0 → diff_diff-3.3.1}/pyproject.toml +1 -1
  17. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/Cargo.lock +1 -1
  18. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/Cargo.toml +1 -1
  19. diff_diff-3.3.0/PKG-INFO +0 -3172
  20. diff_diff-3.3.0/README.md +0 -3119
  21. {diff_diff-3.3.0 → diff_diff-3.3.1}/LICENSE +0 -0
  22. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/_backend.py +0 -0
  23. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/_guides_api.py +0 -0
  24. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/_nprobust_port.py +0 -0
  25. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/_reporting_helpers.py +0 -0
  26. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/bacon.py +0 -0
  27. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/bootstrap_utils.py +0 -0
  28. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/business_report.py +0 -0
  29. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/continuous_did.py +0 -0
  30. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/continuous_did_bspline.py +0 -0
  31. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/continuous_did_results.py +0 -0
  32. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/datasets.py +0 -0
  33. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/diagnostic_report.py +0 -0
  34. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/diagnostics.py +0 -0
  35. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/efficient_did.py +0 -0
  36. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/efficient_did_bootstrap.py +0 -0
  37. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/efficient_did_covariates.py +0 -0
  38. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/efficient_did_results.py +0 -0
  39. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/efficient_did_weights.py +0 -0
  40. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/estimators.py +0 -0
  41. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/guides/__init__.py +0 -0
  42. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/honest_did.py +0 -0
  43. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/imputation.py +0 -0
  44. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/imputation_bootstrap.py +0 -0
  45. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/imputation_results.py +0 -0
  46. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/linalg.py +0 -0
  47. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/local_linear.py +0 -0
  48. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/power.py +0 -0
  49. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/practitioner.py +0 -0
  50. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/prep.py +0 -0
  51. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/prep_dgp.py +0 -0
  52. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/pretrends.py +0 -0
  53. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/results.py +0 -0
  54. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/stacked_did.py +0 -0
  55. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/stacked_did_results.py +0 -0
  56. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/staggered.py +0 -0
  57. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/staggered_aggregation.py +0 -0
  58. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/staggered_bootstrap.py +0 -0
  59. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/staggered_results.py +0 -0
  60. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/staggered_triple_diff.py +0 -0
  61. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/staggered_triple_diff_results.py +0 -0
  62. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/sun_abraham.py +0 -0
  63. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/triple_diff.py +0 -0
  64. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/trop.py +0 -0
  65. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/trop_global.py +0 -0
  66. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/trop_local.py +0 -0
  67. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/trop_results.py +0 -0
  68. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/twfe.py +0 -0
  69. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/two_stage.py +0 -0
  70. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/two_stage_bootstrap.py +0 -0
  71. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/two_stage_results.py +0 -0
  72. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/utils.py +0 -0
  73. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/__init__.py +0 -0
  74. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_common.py +0 -0
  75. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_continuous.py +0 -0
  76. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_diagnostic.py +0 -0
  77. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_event_study.py +0 -0
  78. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_power.py +0 -0
  79. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_staggered.py +0 -0
  80. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/visualization/_synthetic.py +0 -0
  81. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/wooldridge.py +0 -0
  82. {diff_diff-3.3.0 → diff_diff-3.3.1}/diff_diff/wooldridge_results.py +0 -0
  83. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/build.rs +0 -0
  84. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/src/bootstrap.rs +0 -0
  85. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/src/lib.rs +0 -0
  86. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/src/linalg.rs +0 -0
  87. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/src/trop.rs +0 -0
  88. {diff_diff-3.3.0 → diff_diff-3.3.1}/rust/src/weights.rs +0 -0
@@ -0,0 +1,241 @@
1
+ Metadata-Version: 2.4
2
+ Name: diff-diff
3
+ Version: 3.3.1
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: Intended Audience :: Science/Research
6
+ Classifier: Operating System :: OS Independent
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.9
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
15
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
16
+ Classifier: Topic :: Scientific/Engineering
17
+ Requires-Dist: numpy>=1.20.0
18
+ Requires-Dist: pandas>=1.3.0
19
+ Requires-Dist: scipy>=1.7.0
20
+ Requires-Dist: pytest>=7.0 ; extra == 'dev'
21
+ Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
22
+ Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
23
+ Requires-Dist: black>=23.0 ; extra == 'dev'
24
+ Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
25
+ Requires-Dist: mypy>=1.0 ; extra == 'dev'
26
+ Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
27
+ Requires-Dist: matplotlib>=3.5 ; extra == 'dev'
28
+ Requires-Dist: nbmake>=1.5 ; extra == 'dev'
29
+ Requires-Dist: plotly>=5.0 ; extra == 'dev'
30
+ Requires-Dist: sphinx>=6.0 ; extra == 'docs'
31
+ Requires-Dist: pydata-sphinx-theme>=0.15 ; extra == 'docs'
32
+ Requires-Dist: sphinxext-opengraph>=0.9 ; extra == 'docs'
33
+ Requires-Dist: sphinx-sitemap>=2.5 ; extra == 'docs'
34
+ Requires-Dist: nbsphinx>=0.9 ; extra == 'docs'
35
+ Requires-Dist: matplotlib>=3.5 ; extra == 'docs'
36
+ Requires-Dist: plotly>=5.0 ; extra == 'plotly'
37
+ Provides-Extra: dev
38
+ Provides-Extra: docs
39
+ Provides-Extra: plotly
40
+ License-File: LICENSE
41
+ Summary: Difference-in-Differences causal inference with sklearn-like API. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends.
42
+ Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects,event-study,staggered-adoption,parallel-trends,synthetic-control,panel-data,did,twfe,callaway-santanna,honest-did,sensitivity-analysis
43
+ Author: diff-diff contributors
44
+ License-Expression: MIT
45
+ Requires-Python: >=3.9, <3.15
46
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
47
+ Project-URL: Documentation, https://diff-diff.readthedocs.io
48
+ Project-URL: Homepage, https://github.com/igerber/diff-diff
49
+ Project-URL: Issues, https://github.com/igerber/diff-diff/issues
50
+ Project-URL: Practitioner Guide, https://diff-diff.readthedocs.io/en/stable/llms-practitioner.txt
51
+ Project-URL: Repository, https://github.com/igerber/diff-diff
52
+
53
+ # diff-diff
54
+
55
+ <p align="center">
56
+ <img src="https://raw.githubusercontent.com/igerber/diff-diff/main/diff-diff.png"
57
+ alt="diff-diff: Difference-in-Differences causal inference in Python - sklearn-like API with Callaway-Sant'Anna, Synthetic DiD, Honest DiD, and Event Studies"
58
+ width="800">
59
+ </p>
60
+
61
+ [![PyPI version](https://img.shields.io/pypi/v/diff-diff.svg)](https://pypi.org/project/diff-diff/)
62
+ [![Python versions](https://img.shields.io/pypi/pyversions/diff-diff.svg)](https://pypi.org/project/diff-diff/)
63
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
64
+ [![Downloads](https://img.shields.io/pypi/dm/diff-diff.svg)](https://pypi.org/project/diff-diff/)
65
+ [![Documentation](https://readthedocs.org/projects/diff-diff/badge/?version=stable)](https://diff-diff.readthedocs.io/en/stable/)
66
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.19646175.svg)](https://doi.org/10.5281/zenodo.19646175)
67
+
68
+ A Python library for Difference-in-Differences (DiD) causal inference - sklearn-like estimators with statsmodels-style outputs, built for econometricians, marketing analysts, and data scientists running campaign-lift, policy, and staggered-rollout analyses.
69
+
70
+ ## Installation
71
+
72
+ ```bash
73
+ pip install diff-diff
74
+ ```
75
+
76
+ For development:
77
+
78
+ ```bash
79
+ git clone https://github.com/igerber/diff-diff.git
80
+ cd diff-diff
81
+ pip install -e ".[dev]"
82
+ ```
83
+
84
+ ## Quick Start
85
+
86
+ ```python
87
+ import pandas as pd
88
+ from diff_diff import DifferenceInDifferences # or: DiD
89
+
90
+ data = pd.DataFrame({
91
+ 'outcome': [10, 11, 15, 18, 9, 10, 12, 13],
92
+ 'treated': [1, 1, 1, 1, 0, 0, 0, 0],
93
+ 'post': [0, 0, 1, 1, 0, 0, 1, 1],
94
+ })
95
+
96
+ did = DifferenceInDifferences()
97
+ results = did.fit(data, outcome='outcome', treatment='treated', time='post')
98
+ print(results) # DiDResults(ATT=3.0000, SE=1.7321, p=0.1583)
99
+ results.print_summary() # full statsmodels-style table
100
+ ```
101
+
102
+ ## Documentation
103
+
104
+ - [Quickstart](https://diff-diff.readthedocs.io/en/stable/quickstart.html) - basic 2x2 DiD with column-name and formula interfaces, covariates, fixed effects, cluster-robust SEs
105
+ - [Choosing an Estimator](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html) - decision flowchart for picking the right estimator
106
+ - [Tutorials](https://diff-diff.readthedocs.io/en/stable/tutorials/01_basic_did.html) - hands-on Jupyter notebooks covering every estimator and design pattern
107
+ - [Troubleshooting](https://diff-diff.readthedocs.io/en/stable/troubleshooting.html) - common issues and solutions
108
+ - [R Comparison](https://diff-diff.readthedocs.io/en/stable/r_comparison.html) | [Python Comparison](https://diff-diff.readthedocs.io/en/stable/python_comparison.html) | [Benchmarks](https://diff-diff.readthedocs.io/en/stable/benchmarks.html) - validation results vs `did`, `synthdid`, `fixest`
109
+ - [API Reference](https://diff-diff.readthedocs.io/en/stable/api/index.html) - full API for all estimators, results classes, diagnostics, utilities
110
+
111
+ ## For AI Agents
112
+
113
+ If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis - testing assumptions, running sensitivity analysis, and checking robustness, not just calling `fit()`.
114
+
115
+ ```python
116
+ from diff_diff import get_llm_guide
117
+
118
+ get_llm_guide() # concise API reference
119
+ get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
120
+ get_llm_guide("full") # comprehensive documentation
121
+ get_llm_guide("autonomous") # autonomous-agent variant
122
+ ```
123
+
124
+ The guides are bundled in the wheel - accessible from a `pip install` with no network access. After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
125
+
126
+ ## For Data Scientists
127
+
128
+ Measuring campaign lift? Evaluating a product launch? Rolling out a policy in waves? diff-diff handles the causal inference so you can focus on the business question.
129
+
130
+ - [Which method fits my problem?](https://diff-diff.readthedocs.io/en/stable/practitioner_decision_tree.html) - start from your business scenario (campaign in some markets, staggered rollout, survey data) and find the right estimator
131
+ - [Getting started for practitioners](https://diff-diff.readthedocs.io/en/stable/practitioner_getting_started.html) - end-to-end walkthrough from marketing campaign to causal estimate to stakeholder-ready result
132
+ - [Brand awareness survey tutorial](https://diff-diff.readthedocs.io/en/stable/tutorials/17_brand_awareness_survey.html) - full example with complex survey design, brand funnel analysis, and staggered rollouts
133
+ - Have BRFSS/ACS/CPS individual records? Use [`aggregate_survey()`](https://diff-diff.readthedocs.io/en/stable/api/prep.html) to roll respondent-level microdata into a geographic-period panel with inverse-variance precision weights for second-stage DiD
134
+
135
+ `BusinessReport` and `DiagnosticReport` are experimental preview classes that produce plain-English output and a structured `to_dict()` schema from any fitted result - wording and schema will evolve. See [docs/methodology/REPORTING.md](https://github.com/igerber/diff-diff/blob/main/docs/methodology/REPORTING.md) for usage and stability notes.
136
+
137
+ ## Practitioner Workflow (Baker et al. 2025)
138
+
139
+ For rigorous DiD analysis, follow these 8 steps. Skipping diagnostic steps produces unreliable results.
140
+
141
+ 1. **Define target parameter** - ATT, group-time ATT(g,t), or event-study ATT_es(e). State whether weighted or unweighted.
142
+ 2. **State identification assumptions** - which parallel trends variant (unconditional, conditional, PT-GT-Nev, PT-GT-NYT), no-anticipation, overlap.
143
+ 3. **Test parallel trends** - simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds.
144
+ 4. **Choose estimator** - staggered adoption -> CS/SA/BJS (NOT plain TWFE); few treated units -> SDiD; factor confounding -> TROP; simple 2x2 -> DiD. Run `BaconDecomposition` to diagnose TWFE bias.
145
+ 5. **Estimate** - `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer).
146
+ 6. **Sensitivity analysis** - `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
147
+ 7. **Heterogeneity** - CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects` / `to_dataframe(level='cohort')`; subgroup re-estimation.
148
+ 8. **Robustness** - compare 2-3 estimators (CS vs SA vs BJS), report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds.
149
+
150
+ Full guide: `diff_diff.get_llm_guide("practitioner")`.
151
+
152
+ ## Estimators
153
+
154
+ - [DifferenceInDifferences](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - basic 2x2 DiD with robust/cluster-robust SEs, wild bootstrap, formula interface, and fixed effects
155
+ - [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - panel data DiD with unit and time fixed effects via within-transformation or dummies
156
+ - [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - event study design with period-specific treatment effects for dynamic analysis
157
+ - [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption
158
+ - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The only library option for treatments that switch on AND off. Alias `DCDH`.
159
+ - [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
160
+ - [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html) - Borusyak, Jaravel & Spiess (2024) imputation estimator, most efficient under homogeneous effects
161
+ - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance
162
+ - [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - Synthetic DiD combining standard DiD and synthetic control for few treated units
163
+ - [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html) - triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility
164
+ - [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html) - Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves
165
+ - [HeterogeneousAdoptionDiD](https://diff-diff.readthedocs.io/en/stable/api/had.html) - de Chaisemartin, Ciccia, D'Haultfœuille & Knau (2026) for designs where **no unit remains untreated**; local-linear estimator at the dose support boundary returning Weighted Average Slope (WAS) on Design 1' (`d̲ = 0` / QUG) or `WAS_{d̲}` on Design 1 (`d̲ > 0`, continuous-near-d̲ or mass-point), with a multi-period event-study extension (last-treatment cohort, pointwise CIs). **Panel-only** in this release - repeated cross-sections rejected by the validator. Alias `HAD`.
166
+ - [StackedDiD](https://diff-diff.readthedocs.io/en/stable/api/stacked_did.html) - Wing, Freedman & Hollingsworth (2024) stacked DiD with Q-weights and sub-experiments
167
+ - [EfficientDiD](https://diff-diff.readthedocs.io/en/stable/api/efficient_did.html) - Chen, Sant'Anna & Xie (2025) efficient DiD with optimal weighting for tighter SEs
168
+ - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
169
+ - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
170
+ - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
171
+ - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
172
+
173
+ ## Diagnostics & Sensitivity
174
+
175
+ - [Parallel Trends Testing](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - simple and Wasserstein-robust parallel trends tests, equivalence testing (TOST)
176
+ - [Placebo Tests](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - placebo timing, group, permutation, leave-one-out
177
+ - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html) - Rambachan & Roth (2023) sensitivity analysis: robust CI under PT violations, breakdown values
178
+ - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html) - Roth (2022) minimum detectable violation and power curves
179
+ - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html) - analytical and simulation-based MDE, sample size, power curves for study design
180
+
181
+ ## Survey Support
182
+
183
+ Most estimators accept an optional `survey_design` parameter (or `survey=` / `weights=` for `HeterogeneousAdoptionDiD`) for design-based variance estimation. Coverage and supported weight types vary by estimator - see the [Survey Design Support compatibility matrix](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html#survey-design-support) for the per-estimator support table.
184
+
185
+ - **Design elements available across the supported set**: strata, PSU, FPC, lonely PSU handling, nest. Weight types vary by estimator: some surfaces (e.g. CallawaySantAnna, StackedDiD, the HAD continuous path) accept `pweight` only; others accept `pweight` / `fweight` / `aweight`.
186
+ - **Variance methods**: Taylor Series Linearization (TSL via Binder 1983), replicate weights (BRR / Fay / JK1 / JKn / SDR), survey-aware bootstrap
187
+ - **Diagnostics**: DEFF per coefficient, effective n, subpopulation analysis, weight trimming, CV on estimates
188
+ - **Repeated cross-sections**: `CallawaySantAnna(panel=False)` for BRFSS, ACS, CPS
189
+
190
+ No other Python or R DiD package offers design-based variance estimation for modern heterogeneity-robust estimators.
191
+
192
+ ## Requirements
193
+
194
+ - Python 3.9 - 3.14
195
+ - numpy >= 1.20
196
+ - pandas >= 1.3
197
+ - scipy >= 1.7
198
+
199
+ ## Development
200
+
201
+ ```bash
202
+ # Install with dev dependencies
203
+ pip install -e ".[dev]"
204
+
205
+ # Run tests
206
+ pytest
207
+
208
+ # Format code
209
+ black diff_diff tests
210
+ ruff check diff_diff tests
211
+ ```
212
+
213
+ ## References
214
+
215
+ This library implements methods from a wide body of econometric and causal-inference research. See the full bibliography on [Read the Docs](https://diff-diff.readthedocs.io/en/stable/references.html) for citations spanning DiD foundations, modern staggered estimators, sensitivity analysis, and synthetic controls.
216
+
217
+ ## Citing diff-diff
218
+
219
+ If you use diff-diff in your research, please cite it:
220
+
221
+ ```bibtex
222
+ @software{diff_diff,
223
+ title = {diff-diff: Difference-in-Differences Causal Inference for Python},
224
+ author = {Gerber, Isaac},
225
+ year = {2026},
226
+ url = {https://github.com/igerber/diff-diff},
227
+ doi = {10.5281/zenodo.19646175},
228
+ license = {MIT},
229
+ }
230
+ ```
231
+
232
+ The DOI above is the Zenodo concept DOI - it always resolves to the latest release. To cite a specific version, look up its versioned DOI on [the Zenodo project page](https://doi.org/10.5281/zenodo.19646175).
233
+
234
+ See [`CITATION.cff`](https://github.com/igerber/diff-diff/blob/main/CITATION.cff) for the full citation metadata.
235
+
236
+ **Note on authorship**: academic citation (`CITATION.cff`, the BibTeX above) lists individual authors with ORCIDs per scholarly convention. Package metadata surfaces (`pyproject.toml`, Sphinx docs) list "diff-diff contributors" to acknowledge the collective - see [`CONTRIBUTORS.md`](https://github.com/igerber/diff-diff/blob/main/CONTRIBUTORS.md) for the full list.
237
+
238
+ ## License
239
+
240
+ MIT License
241
+
@@ -0,0 +1,188 @@
1
+ # diff-diff
2
+
3
+ <p align="center">
4
+ <img src="https://raw.githubusercontent.com/igerber/diff-diff/main/diff-diff.png"
5
+ alt="diff-diff: Difference-in-Differences causal inference in Python - sklearn-like API with Callaway-Sant'Anna, Synthetic DiD, Honest DiD, and Event Studies"
6
+ width="800">
7
+ </p>
8
+
9
+ [![PyPI version](https://img.shields.io/pypi/v/diff-diff.svg)](https://pypi.org/project/diff-diff/)
10
+ [![Python versions](https://img.shields.io/pypi/pyversions/diff-diff.svg)](https://pypi.org/project/diff-diff/)
11
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
12
+ [![Downloads](https://img.shields.io/pypi/dm/diff-diff.svg)](https://pypi.org/project/diff-diff/)
13
+ [![Documentation](https://readthedocs.org/projects/diff-diff/badge/?version=stable)](https://diff-diff.readthedocs.io/en/stable/)
14
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.19646175.svg)](https://doi.org/10.5281/zenodo.19646175)
15
+
16
+ A Python library for Difference-in-Differences (DiD) causal inference - sklearn-like estimators with statsmodels-style outputs, built for econometricians, marketing analysts, and data scientists running campaign-lift, policy, and staggered-rollout analyses.
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install diff-diff
22
+ ```
23
+
24
+ For development:
25
+
26
+ ```bash
27
+ git clone https://github.com/igerber/diff-diff.git
28
+ cd diff-diff
29
+ pip install -e ".[dev]"
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ```python
35
+ import pandas as pd
36
+ from diff_diff import DifferenceInDifferences # or: DiD
37
+
38
+ data = pd.DataFrame({
39
+ 'outcome': [10, 11, 15, 18, 9, 10, 12, 13],
40
+ 'treated': [1, 1, 1, 1, 0, 0, 0, 0],
41
+ 'post': [0, 0, 1, 1, 0, 0, 1, 1],
42
+ })
43
+
44
+ did = DifferenceInDifferences()
45
+ results = did.fit(data, outcome='outcome', treatment='treated', time='post')
46
+ print(results) # DiDResults(ATT=3.0000, SE=1.7321, p=0.1583)
47
+ results.print_summary() # full statsmodels-style table
48
+ ```
49
+
50
+ ## Documentation
51
+
52
+ - [Quickstart](https://diff-diff.readthedocs.io/en/stable/quickstart.html) - basic 2x2 DiD with column-name and formula interfaces, covariates, fixed effects, cluster-robust SEs
53
+ - [Choosing an Estimator](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html) - decision flowchart for picking the right estimator
54
+ - [Tutorials](https://diff-diff.readthedocs.io/en/stable/tutorials/01_basic_did.html) - hands-on Jupyter notebooks covering every estimator and design pattern
55
+ - [Troubleshooting](https://diff-diff.readthedocs.io/en/stable/troubleshooting.html) - common issues and solutions
56
+ - [R Comparison](https://diff-diff.readthedocs.io/en/stable/r_comparison.html) | [Python Comparison](https://diff-diff.readthedocs.io/en/stable/python_comparison.html) | [Benchmarks](https://diff-diff.readthedocs.io/en/stable/benchmarks.html) - validation results vs `did`, `synthdid`, `fixest`
57
+ - [API Reference](https://diff-diff.readthedocs.io/en/stable/api/index.html) - full API for all estimators, results classes, diagnostics, utilities
58
+
59
+ ## For AI Agents
60
+
61
+ If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis - testing assumptions, running sensitivity analysis, and checking robustness, not just calling `fit()`.
62
+
63
+ ```python
64
+ from diff_diff import get_llm_guide
65
+
66
+ get_llm_guide() # concise API reference
67
+ get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
68
+ get_llm_guide("full") # comprehensive documentation
69
+ get_llm_guide("autonomous") # autonomous-agent variant
70
+ ```
71
+
72
+ The guides are bundled in the wheel - accessible from a `pip install` with no network access. After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
73
+
74
+ ## For Data Scientists
75
+
76
+ Measuring campaign lift? Evaluating a product launch? Rolling out a policy in waves? diff-diff handles the causal inference so you can focus on the business question.
77
+
78
+ - [Which method fits my problem?](https://diff-diff.readthedocs.io/en/stable/practitioner_decision_tree.html) - start from your business scenario (campaign in some markets, staggered rollout, survey data) and find the right estimator
79
+ - [Getting started for practitioners](https://diff-diff.readthedocs.io/en/stable/practitioner_getting_started.html) - end-to-end walkthrough from marketing campaign to causal estimate to stakeholder-ready result
80
+ - [Brand awareness survey tutorial](https://diff-diff.readthedocs.io/en/stable/tutorials/17_brand_awareness_survey.html) - full example with complex survey design, brand funnel analysis, and staggered rollouts
81
+ - Have BRFSS/ACS/CPS individual records? Use [`aggregate_survey()`](https://diff-diff.readthedocs.io/en/stable/api/prep.html) to roll respondent-level microdata into a geographic-period panel with inverse-variance precision weights for second-stage DiD
82
+
83
+ `BusinessReport` and `DiagnosticReport` are experimental preview classes that produce plain-English output and a structured `to_dict()` schema from any fitted result - wording and schema will evolve. See [docs/methodology/REPORTING.md](https://github.com/igerber/diff-diff/blob/main/docs/methodology/REPORTING.md) for usage and stability notes.
84
+
85
+ ## Practitioner Workflow (Baker et al. 2025)
86
+
87
+ For rigorous DiD analysis, follow these 8 steps. Skipping diagnostic steps produces unreliable results.
88
+
89
+ 1. **Define target parameter** - ATT, group-time ATT(g,t), or event-study ATT_es(e). State whether weighted or unweighted.
90
+ 2. **State identification assumptions** - which parallel trends variant (unconditional, conditional, PT-GT-Nev, PT-GT-NYT), no-anticipation, overlap.
91
+ 3. **Test parallel trends** - simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds.
92
+ 4. **Choose estimator** - staggered adoption -> CS/SA/BJS (NOT plain TWFE); few treated units -> SDiD; factor confounding -> TROP; simple 2x2 -> DiD. Run `BaconDecomposition` to diagnose TWFE bias.
93
+ 5. **Estimate** - `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer).
94
+ 6. **Sensitivity analysis** - `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
95
+ 7. **Heterogeneity** - CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects` / `to_dataframe(level='cohort')`; subgroup re-estimation.
96
+ 8. **Robustness** - compare 2-3 estimators (CS vs SA vs BJS), report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds.
97
+
98
+ Full guide: `diff_diff.get_llm_guide("practitioner")`.
99
+
100
+ ## Estimators
101
+
102
+ - [DifferenceInDifferences](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - basic 2x2 DiD with robust/cluster-robust SEs, wild bootstrap, formula interface, and fixed effects
103
+ - [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - panel data DiD with unit and time fixed effects via within-transformation or dummies
104
+ - [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - event study design with period-specific treatment effects for dynamic analysis
105
+ - [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption
106
+ - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The only library option for treatments that switch on AND off. Alias `DCDH`.
107
+ - [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
108
+ - [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html) - Borusyak, Jaravel & Spiess (2024) imputation estimator, most efficient under homogeneous effects
109
+ - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance
110
+ - [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - Synthetic DiD combining standard DiD and synthetic control for few treated units
111
+ - [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html) - triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility
112
+ - [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html) - Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves
113
+ - [HeterogeneousAdoptionDiD](https://diff-diff.readthedocs.io/en/stable/api/had.html) - de Chaisemartin, Ciccia, D'Haultfœuille & Knau (2026) for designs where **no unit remains untreated**; local-linear estimator at the dose support boundary returning Weighted Average Slope (WAS) on Design 1' (`d̲ = 0` / QUG) or `WAS_{d̲}` on Design 1 (`d̲ > 0`, continuous-near-d̲ or mass-point), with a multi-period event-study extension (last-treatment cohort, pointwise CIs). **Panel-only** in this release - repeated cross-sections rejected by the validator. Alias `HAD`.
114
+ - [StackedDiD](https://diff-diff.readthedocs.io/en/stable/api/stacked_did.html) - Wing, Freedman & Hollingsworth (2024) stacked DiD with Q-weights and sub-experiments
115
+ - [EfficientDiD](https://diff-diff.readthedocs.io/en/stable/api/efficient_did.html) - Chen, Sant'Anna & Xie (2025) efficient DiD with optimal weighting for tighter SEs
116
+ - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
117
+ - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
118
+ - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
119
+ - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
120
+
121
+ ## Diagnostics & Sensitivity
122
+
123
+ - [Parallel Trends Testing](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - simple and Wasserstein-robust parallel trends tests, equivalence testing (TOST)
124
+ - [Placebo Tests](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - placebo timing, group, permutation, leave-one-out
125
+ - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html) - Rambachan & Roth (2023) sensitivity analysis: robust CI under PT violations, breakdown values
126
+ - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html) - Roth (2022) minimum detectable violation and power curves
127
+ - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html) - analytical and simulation-based MDE, sample size, power curves for study design
128
+
129
+ ## Survey Support
130
+
131
+ Most estimators accept an optional `survey_design` parameter (or `survey=` / `weights=` for `HeterogeneousAdoptionDiD`) for design-based variance estimation. Coverage and supported weight types vary by estimator - see the [Survey Design Support compatibility matrix](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html#survey-design-support) for the per-estimator support table.
132
+
133
+ - **Design elements available across the supported set**: strata, PSU, FPC, lonely PSU handling, nest. Weight types vary by estimator: some surfaces (e.g. CallawaySantAnna, StackedDiD, the HAD continuous path) accept `pweight` only; others accept `pweight` / `fweight` / `aweight`.
134
+ - **Variance methods**: Taylor Series Linearization (TSL via Binder 1983), replicate weights (BRR / Fay / JK1 / JKn / SDR), survey-aware bootstrap
135
+ - **Diagnostics**: DEFF per coefficient, effective n, subpopulation analysis, weight trimming, CV on estimates
136
+ - **Repeated cross-sections**: `CallawaySantAnna(panel=False)` for BRFSS, ACS, CPS
137
+
138
+ No other Python or R DiD package offers design-based variance estimation for modern heterogeneity-robust estimators.
139
+
140
+ ## Requirements
141
+
142
+ - Python 3.9 - 3.14
143
+ - numpy >= 1.20
144
+ - pandas >= 1.3
145
+ - scipy >= 1.7
146
+
147
+ ## Development
148
+
149
+ ```bash
150
+ # Install with dev dependencies
151
+ pip install -e ".[dev]"
152
+
153
+ # Run tests
154
+ pytest
155
+
156
+ # Format code
157
+ black diff_diff tests
158
+ ruff check diff_diff tests
159
+ ```
160
+
161
+ ## References
162
+
163
+ This library implements methods from a wide body of econometric and causal-inference research. See the full bibliography on [Read the Docs](https://diff-diff.readthedocs.io/en/stable/references.html) for citations spanning DiD foundations, modern staggered estimators, sensitivity analysis, and synthetic controls.
164
+
165
+ ## Citing diff-diff
166
+
167
+ If you use diff-diff in your research, please cite it:
168
+
169
+ ```bibtex
170
+ @software{diff_diff,
171
+ title = {diff-diff: Difference-in-Differences Causal Inference for Python},
172
+ author = {Gerber, Isaac},
173
+ year = {2026},
174
+ url = {https://github.com/igerber/diff-diff},
175
+ doi = {10.5281/zenodo.19646175},
176
+ license = {MIT},
177
+ }
178
+ ```
179
+
180
+ The DOI above is the Zenodo concept DOI - it always resolves to the latest release. To cite a specific version, look up its versioned DOI on [the Zenodo project page](https://doi.org/10.5281/zenodo.19646175).
181
+
182
+ See [`CITATION.cff`](https://github.com/igerber/diff-diff/blob/main/CITATION.cff) for the full citation metadata.
183
+
184
+ **Note on authorship**: academic citation (`CITATION.cff`, the BibTeX above) lists individual authors with ORCIDs per scholarly convention. Package metadata surfaces (`pyproject.toml`, Sphinx docs) list "diff-diff contributors" to acknowledge the collective - see [`CONTRIBUTORS.md`](https://github.com/igerber/diff-diff/blob/main/CONTRIBUTORS.md) for the full list.
185
+
186
+ ## License
187
+
188
+ MIT License
@@ -151,6 +151,7 @@ from diff_diff.survey import (
151
151
  SurveyDesign,
152
152
  SurveyMetadata,
153
153
  compute_deff_diagnostics,
154
+ make_pweight_design,
154
155
  )
155
156
  from diff_diff.staggered import (
156
157
  CallawaySantAnna,
@@ -250,7 +251,13 @@ from diff_diff.diagnostic_report import (
250
251
  DiagnosticReportResults,
251
252
  )
252
253
  from diff_diff._guides_api import get_llm_guide
253
- from diff_diff.profile import Alert, PanelProfile, profile_panel
254
+ from diff_diff.profile import (
255
+ Alert,
256
+ OutcomeShape,
257
+ PanelProfile,
258
+ TreatmentDoseShape,
259
+ profile_panel,
260
+ )
254
261
  from diff_diff.datasets import (
255
262
  clear_cache,
256
263
  list_datasets,
@@ -280,7 +287,7 @@ ETWFE = WooldridgeDiD
280
287
  DCDH = ChaisemartinDHaultfoeuille
281
288
  HAD = HeterogeneousAdoptionDiD
282
289
 
283
- __version__ = "3.3.0"
290
+ __version__ = "3.3.1"
284
291
  __all__ = [
285
292
  # Estimators
286
293
  "DifferenceInDifferences",
@@ -439,6 +446,7 @@ __all__ = [
439
446
  "SurveyMetadata",
440
447
  "DEFFDiagnostics",
441
448
  "compute_deff_diagnostics",
449
+ "make_pweight_design",
442
450
  # Rust backend
443
451
  "HAS_RUST_BACKEND",
444
452
  # Linear algebra helpers
@@ -498,6 +506,8 @@ __all__ = [
498
506
  "profile_panel",
499
507
  "PanelProfile",
500
508
  "Alert",
509
+ "OutcomeShape",
510
+ "TreatmentDoseShape",
501
511
  # LLM guide accessor
502
512
  "get_llm_guide",
503
513
  ]