diff-diff 3.3.0__tar.gz → 3.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. diff_diff-3.3.2/PKG-INFO +240 -0
  2. diff_diff-3.3.2/README.md +187 -0
  3. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/__init__.py +12 -2
  4. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/chaisemartin_dhaultfoeuille.py +928 -33
  5. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/chaisemartin_dhaultfoeuille_bootstrap.py +185 -25
  6. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/chaisemartin_dhaultfoeuille_results.py +306 -26
  7. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/guides/llms-autonomous.txt +464 -23
  8. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/guides/llms-full.txt +1 -1
  9. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/guides/llms-practitioner.txt +18 -4
  10. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/guides/llms.txt +2 -1
  11. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/had.py +226 -50
  12. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/had_pretests.py +2174 -242
  13. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/profile.py +287 -3
  14. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/survey.py +146 -13
  15. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/synthetic_did.py +79 -11
  16. {diff_diff-3.3.0 → diff_diff-3.3.2}/pyproject.toml +1 -1
  17. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/Cargo.lock +321 -32
  18. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/Cargo.toml +4 -4
  19. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/src/bootstrap.rs +3 -3
  20. diff_diff-3.3.0/PKG-INFO +0 -3172
  21. diff_diff-3.3.0/README.md +0 -3119
  22. {diff_diff-3.3.0 → diff_diff-3.3.2}/LICENSE +0 -0
  23. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/_backend.py +0 -0
  24. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/_guides_api.py +0 -0
  25. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/_nprobust_port.py +0 -0
  26. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/_reporting_helpers.py +0 -0
  27. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/bacon.py +0 -0
  28. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/bootstrap_utils.py +0 -0
  29. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/business_report.py +0 -0
  30. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/continuous_did.py +0 -0
  31. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/continuous_did_bspline.py +0 -0
  32. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/continuous_did_results.py +0 -0
  33. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/datasets.py +0 -0
  34. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/diagnostic_report.py +0 -0
  35. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/diagnostics.py +0 -0
  36. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/efficient_did.py +0 -0
  37. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/efficient_did_bootstrap.py +0 -0
  38. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/efficient_did_covariates.py +0 -0
  39. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/efficient_did_results.py +0 -0
  40. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/efficient_did_weights.py +0 -0
  41. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/estimators.py +0 -0
  42. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/guides/__init__.py +0 -0
  43. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/honest_did.py +0 -0
  44. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/imputation.py +0 -0
  45. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/imputation_bootstrap.py +0 -0
  46. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/imputation_results.py +0 -0
  47. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/linalg.py +0 -0
  48. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/local_linear.py +0 -0
  49. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/power.py +0 -0
  50. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/practitioner.py +0 -0
  51. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/prep.py +0 -0
  52. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/prep_dgp.py +0 -0
  53. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/pretrends.py +0 -0
  54. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/results.py +0 -0
  55. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/stacked_did.py +0 -0
  56. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/stacked_did_results.py +0 -0
  57. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/staggered.py +0 -0
  58. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/staggered_aggregation.py +0 -0
  59. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/staggered_bootstrap.py +0 -0
  60. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/staggered_results.py +0 -0
  61. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/staggered_triple_diff.py +0 -0
  62. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/staggered_triple_diff_results.py +0 -0
  63. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/sun_abraham.py +0 -0
  64. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/triple_diff.py +0 -0
  65. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/trop.py +0 -0
  66. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/trop_global.py +0 -0
  67. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/trop_local.py +0 -0
  68. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/trop_results.py +0 -0
  69. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/twfe.py +0 -0
  70. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/two_stage.py +0 -0
  71. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/two_stage_bootstrap.py +0 -0
  72. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/two_stage_results.py +0 -0
  73. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/utils.py +0 -0
  74. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/__init__.py +0 -0
  75. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_common.py +0 -0
  76. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_continuous.py +0 -0
  77. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_diagnostic.py +0 -0
  78. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_event_study.py +0 -0
  79. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_power.py +0 -0
  80. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_staggered.py +0 -0
  81. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/visualization/_synthetic.py +0 -0
  82. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/wooldridge.py +0 -0
  83. {diff_diff-3.3.0 → diff_diff-3.3.2}/diff_diff/wooldridge_results.py +0 -0
  84. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/build.rs +0 -0
  85. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/src/lib.rs +0 -0
  86. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/src/linalg.rs +0 -0
  87. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/src/trop.rs +0 -0
  88. {diff_diff-3.3.0 → diff_diff-3.3.2}/rust/src/weights.rs +0 -0
@@ -0,0 +1,240 @@
1
+ Metadata-Version: 2.4
2
+ Name: diff-diff
3
+ Version: 3.3.2
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: Intended Audience :: Science/Research
6
+ Classifier: Operating System :: OS Independent
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.9
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
15
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
16
+ Classifier: Topic :: Scientific/Engineering
17
+ Requires-Dist: numpy>=1.20.0
18
+ Requires-Dist: pandas>=1.3.0
19
+ Requires-Dist: scipy>=1.7.0
20
+ Requires-Dist: pytest>=7.0 ; extra == 'dev'
21
+ Requires-Dist: pytest-xdist>=3.0 ; extra == 'dev'
22
+ Requires-Dist: pytest-cov>=4.0 ; extra == 'dev'
23
+ Requires-Dist: black>=23.0 ; extra == 'dev'
24
+ Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
25
+ Requires-Dist: mypy>=1.0 ; extra == 'dev'
26
+ Requires-Dist: maturin>=1.4,<2.0 ; extra == 'dev'
27
+ Requires-Dist: matplotlib>=3.5 ; extra == 'dev'
28
+ Requires-Dist: nbmake>=1.5 ; extra == 'dev'
29
+ Requires-Dist: plotly>=5.0 ; extra == 'dev'
30
+ Requires-Dist: sphinx>=6.0 ; extra == 'docs'
31
+ Requires-Dist: pydata-sphinx-theme>=0.15 ; extra == 'docs'
32
+ Requires-Dist: sphinxext-opengraph>=0.9 ; extra == 'docs'
33
+ Requires-Dist: sphinx-sitemap>=2.5 ; extra == 'docs'
34
+ Requires-Dist: nbsphinx>=0.9 ; extra == 'docs'
35
+ Requires-Dist: matplotlib>=3.5 ; extra == 'docs'
36
+ Requires-Dist: plotly>=5.0 ; extra == 'plotly'
37
+ Provides-Extra: dev
38
+ Provides-Extra: docs
39
+ Provides-Extra: plotly
40
+ License-File: LICENSE
41
+ Summary: Difference-in-Differences causal inference with sklearn-like API. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends.
42
+ Keywords: causal-inference,difference-in-differences,econometrics,statistics,treatment-effects,event-study,staggered-adoption,parallel-trends,synthetic-control,panel-data,did,twfe,callaway-santanna,honest-did,sensitivity-analysis
43
+ Author: diff-diff contributors
44
+ License-Expression: MIT
45
+ Requires-Python: >=3.9, <3.15
46
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
47
+ Project-URL: Documentation, https://diff-diff.readthedocs.io
48
+ Project-URL: Homepage, https://github.com/igerber/diff-diff
49
+ Project-URL: Issues, https://github.com/igerber/diff-diff/issues
50
+ Project-URL: Practitioner Guide, https://diff-diff.readthedocs.io/en/stable/llms-practitioner.txt
51
+ Project-URL: Repository, https://github.com/igerber/diff-diff
52
+
53
+ # diff-diff
54
+
55
+ <p align="center">
56
+ <img src="https://raw.githubusercontent.com/igerber/diff-diff/main/diff-diff.png"
57
+ alt="diff-diff: Difference-in-Differences causal inference in Python - sklearn-like API with Callaway-Sant'Anna, Synthetic DiD, Honest DiD, and Event Studies"
58
+ width="800">
59
+ </p>
60
+
61
+ [![PyPI version](https://img.shields.io/pypi/v/diff-diff.svg)](https://pypi.org/project/diff-diff/)
62
+ [![Python versions](https://img.shields.io/pypi/pyversions/diff-diff.svg)](https://pypi.org/project/diff-diff/)
63
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
64
+ [![Downloads](https://static.pepy.tech/badge/diff-diff)](https://pepy.tech/projects/diff-diff)
65
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.19646175.svg)](https://doi.org/10.5281/zenodo.19646175)
66
+
67
+ A Python library for Difference-in-Differences (DiD) causal inference - sklearn-like estimators with statsmodels-style outputs, built for econometricians, marketing analysts, and data scientists running campaign-lift, policy, and staggered-rollout analyses.
68
+
69
+ ## Installation
70
+
71
+ ```bash
72
+ pip install diff-diff
73
+ ```
74
+
75
+ For development:
76
+
77
+ ```bash
78
+ git clone https://github.com/igerber/diff-diff.git
79
+ cd diff-diff
80
+ pip install -e ".[dev]"
81
+ ```
82
+
83
+ ## Quick Start
84
+
85
+ ```python
86
+ import pandas as pd
87
+ from diff_diff import DifferenceInDifferences # or: DiD
88
+
89
+ data = pd.DataFrame({
90
+ 'outcome': [10, 11, 15, 18, 9, 10, 12, 13],
91
+ 'treated': [1, 1, 1, 1, 0, 0, 0, 0],
92
+ 'post': [0, 0, 1, 1, 0, 0, 1, 1],
93
+ })
94
+
95
+ did = DifferenceInDifferences()
96
+ results = did.fit(data, outcome='outcome', treatment='treated', time='post')
97
+ print(results) # DiDResults(ATT=3.0000, SE=1.7321, p=0.1583)
98
+ results.print_summary() # full statsmodels-style table
99
+ ```
100
+
101
+ ## Documentation
102
+
103
+ - [Quickstart](https://diff-diff.readthedocs.io/en/stable/quickstart.html) - basic 2x2 DiD with column-name and formula interfaces, covariates, fixed effects, cluster-robust SEs
104
+ - [Choosing an Estimator](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html) - decision flowchart for picking the right estimator
105
+ - [Tutorials](https://diff-diff.readthedocs.io/en/stable/tutorials/01_basic_did.html) - hands-on Jupyter notebooks covering every estimator and design pattern
106
+ - [Troubleshooting](https://diff-diff.readthedocs.io/en/stable/troubleshooting.html) - common issues and solutions
107
+ - [R Comparison](https://diff-diff.readthedocs.io/en/stable/r_comparison.html) | [Python Comparison](https://diff-diff.readthedocs.io/en/stable/python_comparison.html) | [Benchmarks](https://diff-diff.readthedocs.io/en/stable/benchmarks.html) - validation results vs `did`, `synthdid`, `fixest`
108
+ - [API Reference](https://diff-diff.readthedocs.io/en/stable/api/index.html) - full API for all estimators, results classes, diagnostics, utilities
109
+
110
+ ## For AI Agents
111
+
112
+ If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis - testing assumptions, running sensitivity analysis, and checking robustness, not just calling `fit()`.
113
+
114
+ ```python
115
+ from diff_diff import get_llm_guide
116
+
117
+ get_llm_guide() # concise API reference
118
+ get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
119
+ get_llm_guide("full") # comprehensive documentation
120
+ get_llm_guide("autonomous") # autonomous-agent variant
121
+ ```
122
+
123
+ The guides are bundled in the wheel - accessible from a `pip install` with no network access. After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
124
+
125
+ ## For Data Scientists
126
+
127
+ Measuring campaign lift? Evaluating a product launch? Rolling out a policy in waves? diff-diff handles the causal inference so you can focus on the business question.
128
+
129
+ - [Which method fits my problem?](https://diff-diff.readthedocs.io/en/stable/practitioner_decision_tree.html) - start from your business scenario (campaign in some markets, staggered rollout, survey data) and find the right estimator
130
+ - [Getting started for practitioners](https://diff-diff.readthedocs.io/en/stable/practitioner_getting_started.html) - end-to-end walkthrough from marketing campaign to causal estimate to stakeholder-ready result
131
+ - [Brand awareness survey tutorial](https://diff-diff.readthedocs.io/en/stable/tutorials/17_brand_awareness_survey.html) - full example with complex survey design, brand funnel analysis, and staggered rollouts
132
+ - Have BRFSS/ACS/CPS individual records? Use [`aggregate_survey()`](https://diff-diff.readthedocs.io/en/stable/api/prep.html) to roll respondent-level microdata into a geographic-period panel with inverse-variance precision weights for second-stage DiD
133
+
134
+ `BusinessReport` and `DiagnosticReport` are experimental preview classes that produce plain-English output and a structured `to_dict()` schema from any fitted result - wording and schema will evolve. See [docs/methodology/REPORTING.md](https://github.com/igerber/diff-diff/blob/main/docs/methodology/REPORTING.md) for usage and stability notes.
135
+
136
+ ## Practitioner Workflow (Baker et al. 2025)
137
+
138
+ For rigorous DiD analysis, follow these 8 steps. Skipping diagnostic steps produces unreliable results.
139
+
140
+ 1. **Define target parameter** - ATT, group-time ATT(g,t), or event-study ATT_es(e). State whether weighted or unweighted.
141
+ 2. **State identification assumptions** - which parallel trends variant (unconditional, conditional, PT-GT-Nev, PT-GT-NYT), no-anticipation, overlap.
142
+ 3. **Test parallel trends** - simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds.
143
+ 4. **Choose estimator** - staggered adoption -> CS/SA/BJS (NOT plain TWFE); few treated units -> SDiD; factor confounding -> TROP; simple 2x2 -> DiD. Run `BaconDecomposition` to diagnose TWFE bias.
144
+ 5. **Estimate** - `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer).
145
+ 6. **Sensitivity analysis** - `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
146
+ 7. **Heterogeneity** - CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects` / `to_dataframe(level='cohort')`; subgroup re-estimation.
147
+ 8. **Robustness** - compare 2-3 estimators (CS vs SA vs BJS), report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds.
148
+
149
+ Full guide: `diff_diff.get_llm_guide("practitioner")`.
150
+
151
+ ## Estimators
152
+
153
+ - [DifferenceInDifferences](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - basic 2x2 DiD with robust/cluster-robust SEs, wild bootstrap, formula interface, and fixed effects
154
+ - [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - panel data DiD with unit and time fixed effects via within-transformation or dummies
155
+ - [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - event study design with period-specific treatment effects for dynamic analysis
156
+ - [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption
157
+ - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The only library option for treatments that switch on AND off. Alias `DCDH`.
158
+ - [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
159
+ - [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html) - Borusyak, Jaravel & Spiess (2024) imputation estimator, most efficient under homogeneous effects
160
+ - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance
161
+ - [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - Synthetic DiD combining standard DiD and synthetic control for few treated units
162
+ - [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html) - triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility
163
+ - [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html) - Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves
164
+ - [HeterogeneousAdoptionDiD](https://diff-diff.readthedocs.io/en/stable/api/had.html) - de Chaisemartin, Ciccia, D'Haultfœuille & Knau (2026) for designs where **no unit remains untreated**; local-linear estimator at the dose support boundary returning Weighted Average Slope (WAS) on Design 1' (`d̲ = 0` / QUG) or `WAS_{d̲}` on Design 1 (`d̲ > 0`, continuous-near-d̲ or mass-point), with a multi-period event-study extension (last-treatment cohort, pointwise CIs). **Panel-only** in this release - repeated cross-sections rejected by the validator. Alias `HAD`.
165
+ - [StackedDiD](https://diff-diff.readthedocs.io/en/stable/api/stacked_did.html) - Wing, Freedman & Hollingsworth (2024) stacked DiD with Q-weights and sub-experiments
166
+ - [EfficientDiD](https://diff-diff.readthedocs.io/en/stable/api/efficient_did.html) - Chen, Sant'Anna & Xie (2025) efficient DiD with optimal weighting for tighter SEs
167
+ - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
168
+ - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
169
+ - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
170
+ - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
171
+
172
+ ## Diagnostics & Sensitivity
173
+
174
+ - [Parallel Trends Testing](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - simple and Wasserstein-robust parallel trends tests, equivalence testing (TOST)
175
+ - [Placebo Tests](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - placebo timing, group, permutation, leave-one-out
176
+ - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html) - Rambachan & Roth (2023) sensitivity analysis: robust CI under PT violations, breakdown values
177
+ - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html) - Roth (2022) minimum detectable violation and power curves
178
+ - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html) - analytical and simulation-based MDE, sample size, power curves for study design
179
+
180
+ ## Survey Support
181
+
182
+ Most estimators accept an optional `survey_design` parameter (or `survey=` / `weights=` for `HeterogeneousAdoptionDiD`) for design-based variance estimation. Coverage and supported weight types vary by estimator - see the [Survey Design Support compatibility matrix](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html#survey-design-support) for the per-estimator support table.
183
+
184
+ - **Design elements available across the supported set**: strata, PSU, FPC, lonely PSU handling, nest. Weight types vary by estimator: some surfaces (e.g. CallawaySantAnna, StackedDiD, the HAD continuous path) accept `pweight` only; others accept `pweight` / `fweight` / `aweight`.
185
+ - **Variance methods**: Taylor Series Linearization (TSL via Binder 1983), replicate weights (BRR / Fay / JK1 / JKn / SDR), survey-aware bootstrap
186
+ - **Diagnostics**: DEFF per coefficient, effective n, subpopulation analysis, weight trimming, CV on estimates
187
+ - **Repeated cross-sections**: `CallawaySantAnna(panel=False)` for BRFSS, ACS, CPS
188
+
189
+ No other Python or R DiD package offers design-based variance estimation for modern heterogeneity-robust estimators.
190
+
191
+ ## Requirements
192
+
193
+ - Python 3.9 - 3.14
194
+ - numpy >= 1.20
195
+ - pandas >= 1.3
196
+ - scipy >= 1.7
197
+
198
+ ## Development
199
+
200
+ ```bash
201
+ # Install with dev dependencies
202
+ pip install -e ".[dev]"
203
+
204
+ # Run tests
205
+ pytest
206
+
207
+ # Format code
208
+ black diff_diff tests
209
+ ruff check diff_diff tests
210
+ ```
211
+
212
+ ## References
213
+
214
+ This library implements methods from a wide body of econometric and causal-inference research. See the full bibliography on [Read the Docs](https://diff-diff.readthedocs.io/en/stable/references.html) for citations spanning DiD foundations, modern staggered estimators, sensitivity analysis, and synthetic controls.
215
+
216
+ ## Citing diff-diff
217
+
218
+ If you use diff-diff in your research, please cite it:
219
+
220
+ ```bibtex
221
+ @software{diff_diff,
222
+ title = {diff-diff: Difference-in-Differences Causal Inference for Python},
223
+ author = {Gerber, Isaac},
224
+ year = {2026},
225
+ url = {https://github.com/igerber/diff-diff},
226
+ doi = {10.5281/zenodo.19646175},
227
+ license = {MIT},
228
+ }
229
+ ```
230
+
231
+ The DOI above is the Zenodo concept DOI - it always resolves to the latest release. To cite a specific version, look up its versioned DOI on [the Zenodo project page](https://doi.org/10.5281/zenodo.19646175).
232
+
233
+ See [`CITATION.cff`](https://github.com/igerber/diff-diff/blob/main/CITATION.cff) for the full citation metadata.
234
+
235
+ **Note on authorship**: academic citation (`CITATION.cff`, the BibTeX above) lists individual authors with ORCIDs per scholarly convention. Package metadata surfaces (`pyproject.toml`, Sphinx docs) list "diff-diff contributors" to acknowledge the collective - see [`CONTRIBUTORS.md`](https://github.com/igerber/diff-diff/blob/main/CONTRIBUTORS.md) for the full list.
236
+
237
+ ## License
238
+
239
+ MIT License
240
+
@@ -0,0 +1,187 @@
1
+ # diff-diff
2
+
3
+ <p align="center">
4
+ <img src="https://raw.githubusercontent.com/igerber/diff-diff/main/diff-diff.png"
5
+ alt="diff-diff: Difference-in-Differences causal inference in Python - sklearn-like API with Callaway-Sant'Anna, Synthetic DiD, Honest DiD, and Event Studies"
6
+ width="800">
7
+ </p>
8
+
9
+ [![PyPI version](https://img.shields.io/pypi/v/diff-diff.svg)](https://pypi.org/project/diff-diff/)
10
+ [![Python versions](https://img.shields.io/pypi/pyversions/diff-diff.svg)](https://pypi.org/project/diff-diff/)
11
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
12
+ [![Downloads](https://static.pepy.tech/badge/diff-diff)](https://pepy.tech/projects/diff-diff)
13
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.19646175.svg)](https://doi.org/10.5281/zenodo.19646175)
14
+
15
+ A Python library for Difference-in-Differences (DiD) causal inference - sklearn-like estimators with statsmodels-style outputs, built for econometricians, marketing analysts, and data scientists running campaign-lift, policy, and staggered-rollout analyses.
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install diff-diff
21
+ ```
22
+
23
+ For development:
24
+
25
+ ```bash
26
+ git clone https://github.com/igerber/diff-diff.git
27
+ cd diff-diff
28
+ pip install -e ".[dev]"
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ```python
34
+ import pandas as pd
35
+ from diff_diff import DifferenceInDifferences # or: DiD
36
+
37
+ data = pd.DataFrame({
38
+ 'outcome': [10, 11, 15, 18, 9, 10, 12, 13],
39
+ 'treated': [1, 1, 1, 1, 0, 0, 0, 0],
40
+ 'post': [0, 0, 1, 1, 0, 0, 1, 1],
41
+ })
42
+
43
+ did = DifferenceInDifferences()
44
+ results = did.fit(data, outcome='outcome', treatment='treated', time='post')
45
+ print(results) # DiDResults(ATT=3.0000, SE=1.7321, p=0.1583)
46
+ results.print_summary() # full statsmodels-style table
47
+ ```
48
+
49
+ ## Documentation
50
+
51
+ - [Quickstart](https://diff-diff.readthedocs.io/en/stable/quickstart.html) - basic 2x2 DiD with column-name and formula interfaces, covariates, fixed effects, cluster-robust SEs
52
+ - [Choosing an Estimator](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html) - decision flowchart for picking the right estimator
53
+ - [Tutorials](https://diff-diff.readthedocs.io/en/stable/tutorials/01_basic_did.html) - hands-on Jupyter notebooks covering every estimator and design pattern
54
+ - [Troubleshooting](https://diff-diff.readthedocs.io/en/stable/troubleshooting.html) - common issues and solutions
55
+ - [R Comparison](https://diff-diff.readthedocs.io/en/stable/r_comparison.html) | [Python Comparison](https://diff-diff.readthedocs.io/en/stable/python_comparison.html) | [Benchmarks](https://diff-diff.readthedocs.io/en/stable/benchmarks.html) - validation results vs `did`, `synthdid`, `fixest`
56
+ - [API Reference](https://diff-diff.readthedocs.io/en/stable/api/index.html) - full API for all estimators, results classes, diagnostics, utilities
57
+
58
+ ## For AI Agents
59
+
60
+ If you are an AI agent or LLM using this library, call `diff_diff.get_llm_guide()` for a concise API reference with an 8-step practitioner workflow (based on Baker et al. 2025). The workflow ensures rigorous DiD analysis - testing assumptions, running sensitivity analysis, and checking robustness, not just calling `fit()`.
61
+
62
+ ```python
63
+ from diff_diff import get_llm_guide
64
+
65
+ get_llm_guide() # concise API reference
66
+ get_llm_guide("practitioner") # 8-step workflow (Baker et al. 2025)
67
+ get_llm_guide("full") # comprehensive documentation
68
+ get_llm_guide("autonomous") # autonomous-agent variant
69
+ ```
70
+
71
+ The guides are bundled in the wheel - accessible from a `pip install` with no network access. After estimation, call `practitioner_next_steps(results)` for context-aware guidance on remaining diagnostic steps.
72
+
73
+ ## For Data Scientists
74
+
75
+ Measuring campaign lift? Evaluating a product launch? Rolling out a policy in waves? diff-diff handles the causal inference so you can focus on the business question.
76
+
77
+ - [Which method fits my problem?](https://diff-diff.readthedocs.io/en/stable/practitioner_decision_tree.html) - start from your business scenario (campaign in some markets, staggered rollout, survey data) and find the right estimator
78
+ - [Getting started for practitioners](https://diff-diff.readthedocs.io/en/stable/practitioner_getting_started.html) - end-to-end walkthrough from marketing campaign to causal estimate to stakeholder-ready result
79
+ - [Brand awareness survey tutorial](https://diff-diff.readthedocs.io/en/stable/tutorials/17_brand_awareness_survey.html) - full example with complex survey design, brand funnel analysis, and staggered rollouts
80
+ - Have BRFSS/ACS/CPS individual records? Use [`aggregate_survey()`](https://diff-diff.readthedocs.io/en/stable/api/prep.html) to roll respondent-level microdata into a geographic-period panel with inverse-variance precision weights for second-stage DiD
81
+
82
+ `BusinessReport` and `DiagnosticReport` are experimental preview classes that produce plain-English output and a structured `to_dict()` schema from any fitted result - wording and schema will evolve. See [docs/methodology/REPORTING.md](https://github.com/igerber/diff-diff/blob/main/docs/methodology/REPORTING.md) for usage and stability notes.
83
+
84
+ ## Practitioner Workflow (Baker et al. 2025)
85
+
86
+ For rigorous DiD analysis, follow these 8 steps. Skipping diagnostic steps produces unreliable results.
87
+
88
+ 1. **Define target parameter** - ATT, group-time ATT(g,t), or event-study ATT_es(e). State whether weighted or unweighted.
89
+ 2. **State identification assumptions** - which parallel trends variant (unconditional, conditional, PT-GT-Nev, PT-GT-NYT), no-anticipation, overlap.
90
+ 3. **Test parallel trends** - simple 2x2: `check_parallel_trends()`, `equivalence_test_trends()`; staggered: inspect CS event-study pre-period coefficients (generic PT tests are invalid for staggered designs). Insignificant pre-trends do NOT prove PT holds.
91
+ 4. **Choose estimator** - staggered adoption -> CS/SA/BJS (NOT plain TWFE); few treated units -> SDiD; factor confounding -> TROP; simple 2x2 -> DiD. Run `BaconDecomposition` to diagnose TWFE bias.
92
+ 5. **Estimate** - `estimator.fit(data, ...)`. Always print the cluster count first and choose inference method based on the result (cluster-robust if >= 50 clusters, wild bootstrap if fewer).
93
+ 6. **Sensitivity analysis** - `compute_honest_did(results)` for bounds under PT violations (MultiPeriodDiD, CS, or dCDH), `run_all_placebo_tests()` for 2x2 falsification, specification comparisons for staggered designs.
94
+ 7. **Heterogeneity** - CS: `aggregate='group'`/`'event_study'`; SA: `results.event_study_effects` / `to_dataframe(level='cohort')`; subgroup re-estimation.
95
+ 8. **Robustness** - compare 2-3 estimators (CS vs SA vs BJS), report with and without covariates (shows whether conditioning drives identification), present pre-trends and sensitivity bounds.
96
+
97
+ Full guide: `diff_diff.get_llm_guide("practitioner")`.
98
+
99
+ ## Estimators
100
+
101
+ - [DifferenceInDifferences](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - basic 2x2 DiD with robust/cluster-robust SEs, wild bootstrap, formula interface, and fixed effects
102
+ - [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - panel data DiD with unit and time fixed effects via within-transformation or dummies
103
+ - [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - event study design with period-specific treatment effects for dynamic analysis
104
+ - [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption
105
+ - [ChaisemartinDHaultfoeuille](https://diff-diff.readthedocs.io/en/stable/api/chaisemartin_dhaultfoeuille.html) - de Chaisemartin & D'Haultfœuille (2020/2022) for **reversible (non-absorbing) treatments** with multi-horizon event study, normalized effects, cost-benefit delta, sup-t bands, and dynamic placebos. The only library option for treatments that switch on AND off. Alias `DCDH`.
106
+ - [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html) - Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
107
+ - [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html) - Borusyak, Jaravel & Spiess (2024) imputation estimator, most efficient under homogeneous effects
108
+ - [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html) - Gardner (2022) two-stage estimator with GMM sandwich variance
109
+ - [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html) - Synthetic DiD combining standard DiD and synthetic control for few treated units
110
+ - [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html) - triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility
111
+ - [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html) - Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves
112
+ - [HeterogeneousAdoptionDiD](https://diff-diff.readthedocs.io/en/stable/api/had.html) - de Chaisemartin, Ciccia, D'Haultfœuille & Knau (2026) for designs where **no unit remains untreated**; local-linear estimator at the dose support boundary returning Weighted Average Slope (WAS) on Design 1' (`d̲ = 0` / QUG) or `WAS_{d̲}` on Design 1 (`d̲ > 0`, continuous-near-d̲ or mass-point), with a multi-period event-study extension (last-treatment cohort, pointwise CIs). **Panel-only** in this release - repeated cross-sections rejected by the validator. Alias `HAD`.
113
+ - [StackedDiD](https://diff-diff.readthedocs.io/en/stable/api/stacked_did.html) - Wing, Freedman & Hollingsworth (2024) stacked DiD with Q-weights and sub-experiments
114
+ - [EfficientDiD](https://diff-diff.readthedocs.io/en/stable/api/efficient_did.html) - Chen, Sant'Anna & Xie (2025) efficient DiD with optimal weighting for tighter SEs
115
+ - [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html) - Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
116
+ - [StaggeredTripleDifference](https://diff-diff.readthedocs.io/en/stable/api/staggered.html#staggeredtripledifference) - Ortiz-Villavicencio & Sant'Anna (2025) staggered DDD with group-time ATT
117
+ - [WooldridgeDiD](https://diff-diff.readthedocs.io/en/stable/api/wooldridge_etwfe.html) - Wooldridge (2023, 2025) ETWFE: saturated OLS, logit/Poisson QMLE (ASF-based ATT). Alias `ETWFE`.
118
+ - [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html) - Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
119
+
120
+ ## Diagnostics & Sensitivity
121
+
122
+ - [Parallel Trends Testing](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - simple and Wasserstein-robust parallel trends tests, equivalence testing (TOST)
123
+ - [Placebo Tests](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html) - placebo timing, group, permutation, leave-one-out
124
+ - [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html) - Rambachan & Roth (2023) sensitivity analysis: robust CI under PT violations, breakdown values
125
+ - [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html) - Roth (2022) minimum detectable violation and power curves
126
+ - [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html) - analytical and simulation-based MDE, sample size, power curves for study design
127
+
128
+ ## Survey Support
129
+
130
+ Most estimators accept an optional `survey_design` parameter (or `survey=` / `weights=` for `HeterogeneousAdoptionDiD`) for design-based variance estimation. Coverage and supported weight types vary by estimator - see the [Survey Design Support compatibility matrix](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html#survey-design-support) for the per-estimator support table.
131
+
132
+ - **Design elements available across the supported set**: strata, PSU, FPC, lonely PSU handling, nest. Weight types vary by estimator: some surfaces (e.g. CallawaySantAnna, StackedDiD, the HAD continuous path) accept `pweight` only; others accept `pweight` / `fweight` / `aweight`.
133
+ - **Variance methods**: Taylor Series Linearization (TSL via Binder 1983), replicate weights (BRR / Fay / JK1 / JKn / SDR), survey-aware bootstrap
134
+ - **Diagnostics**: DEFF per coefficient, effective n, subpopulation analysis, weight trimming, CV on estimates
135
+ - **Repeated cross-sections**: `CallawaySantAnna(panel=False)` for BRFSS, ACS, CPS
136
+
137
+ No other Python or R DiD package offers design-based variance estimation for modern heterogeneity-robust estimators.
138
+
139
+ ## Requirements
140
+
141
+ - Python 3.9 - 3.14
142
+ - numpy >= 1.20
143
+ - pandas >= 1.3
144
+ - scipy >= 1.7
145
+
146
+ ## Development
147
+
148
+ ```bash
149
+ # Install with dev dependencies
150
+ pip install -e ".[dev]"
151
+
152
+ # Run tests
153
+ pytest
154
+
155
+ # Format code
156
+ black diff_diff tests
157
+ ruff check diff_diff tests
158
+ ```
159
+
160
+ ## References
161
+
162
+ This library implements methods from a wide body of econometric and causal-inference research. See the full bibliography on [Read the Docs](https://diff-diff.readthedocs.io/en/stable/references.html) for citations spanning DiD foundations, modern staggered estimators, sensitivity analysis, and synthetic controls.
163
+
164
+ ## Citing diff-diff
165
+
166
+ If you use diff-diff in your research, please cite it:
167
+
168
+ ```bibtex
169
+ @software{diff_diff,
170
+ title = {diff-diff: Difference-in-Differences Causal Inference for Python},
171
+ author = {Gerber, Isaac},
172
+ year = {2026},
173
+ url = {https://github.com/igerber/diff-diff},
174
+ doi = {10.5281/zenodo.19646175},
175
+ license = {MIT},
176
+ }
177
+ ```
178
+
179
+ The DOI above is the Zenodo concept DOI - it always resolves to the latest release. To cite a specific version, look up its versioned DOI on [the Zenodo project page](https://doi.org/10.5281/zenodo.19646175).
180
+
181
+ See [`CITATION.cff`](https://github.com/igerber/diff-diff/blob/main/CITATION.cff) for the full citation metadata.
182
+
183
+ **Note on authorship**: academic citation (`CITATION.cff`, the BibTeX above) lists individual authors with ORCIDs per scholarly convention. Package metadata surfaces (`pyproject.toml`, Sphinx docs) list "diff-diff contributors" to acknowledge the collective - see [`CONTRIBUTORS.md`](https://github.com/igerber/diff-diff/blob/main/CONTRIBUTORS.md) for the full list.
184
+
185
+ ## License
186
+
187
+ MIT License
@@ -151,6 +151,7 @@ from diff_diff.survey import (
151
151
  SurveyDesign,
152
152
  SurveyMetadata,
153
153
  compute_deff_diagnostics,
154
+ make_pweight_design,
154
155
  )
155
156
  from diff_diff.staggered import (
156
157
  CallawaySantAnna,
@@ -250,7 +251,13 @@ from diff_diff.diagnostic_report import (
250
251
  DiagnosticReportResults,
251
252
  )
252
253
  from diff_diff._guides_api import get_llm_guide
253
- from diff_diff.profile import Alert, PanelProfile, profile_panel
254
+ from diff_diff.profile import (
255
+ Alert,
256
+ OutcomeShape,
257
+ PanelProfile,
258
+ TreatmentDoseShape,
259
+ profile_panel,
260
+ )
254
261
  from diff_diff.datasets import (
255
262
  clear_cache,
256
263
  list_datasets,
@@ -280,7 +287,7 @@ ETWFE = WooldridgeDiD
280
287
  DCDH = ChaisemartinDHaultfoeuille
281
288
  HAD = HeterogeneousAdoptionDiD
282
289
 
283
- __version__ = "3.3.0"
290
+ __version__ = "3.3.2"
284
291
  __all__ = [
285
292
  # Estimators
286
293
  "DifferenceInDifferences",
@@ -439,6 +446,7 @@ __all__ = [
439
446
  "SurveyMetadata",
440
447
  "DEFFDiagnostics",
441
448
  "compute_deff_diagnostics",
449
+ "make_pweight_design",
442
450
  # Rust backend
443
451
  "HAS_RUST_BACKEND",
444
452
  # Linear algebra helpers
@@ -498,6 +506,8 @@ __all__ = [
498
506
  "profile_panel",
499
507
  "PanelProfile",
500
508
  "Alert",
509
+ "OutcomeShape",
510
+ "TreatmentDoseShape",
501
511
  # LLM guide accessor
502
512
  "get_llm_guide",
503
513
  ]