diffindiff 2.1.1__tar.gz → 2.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffindiff-2.1.1 → diffindiff-2.2.1}/PKG-INFO +9 -9
- {diffindiff-2.1.1 → diffindiff-2.2.1}/README.md +8 -8
- diffindiff-2.2.1/diffindiff/config.py +478 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/didanalysis.py +567 -814
- diffindiff-2.2.1/diffindiff/didanalysis_helper.py +905 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/diddata.py +348 -207
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/didtools.py +312 -56
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/tests/tests_diffindiff.py +24 -9
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff.egg-info/PKG-INFO +9 -9
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff.egg-info/requires.txt +2 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/setup.py +5 -2
- diffindiff-2.1.1/diffindiff/config.py +0 -179
- diffindiff-2.1.1/diffindiff/didanalysis_helper.py +0 -400
- {diffindiff-2.1.1 → diffindiff-2.2.1}/MANIFEST.in +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/__init__.py +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/tests/__init__.py +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/tests/data/Corona_Hesse.xlsx +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/tests/data/counties_DE.csv +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff/tests/data/curfew_DE.csv +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff.egg-info/SOURCES.txt +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff.egg-info/dependency_links.txt +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/diffindiff.egg-info/top_level.txt +0 -0
- {diffindiff-2.1.1 → diffindiff-2.2.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.1
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -8,7 +8,7 @@ Description-Content-Type: text/markdown
|
|
|
8
8
|
|
|
9
9
|
# diffindiff: Difference-in-Differences (DiD) Analysis Python Library
|
|
10
10
|
|
|
11
|
-
This Python library is designed for performing Difference-in-Differences (DiD) analyses in a convenient way. It allows users to construct datasets, define treatment and control groups, and set treatment periods. DiD model analyses may be conducted with both datasets created by built-in functions and ready-to-use external datasets. Both simultaneous and staggered adoption are supported. The library allows for various extensions, such as two-way fixed effects models, group- or individual-specific effects,
|
|
11
|
+
This Python library is designed for performing Difference-in-Differences (DiD) analyses in a convenient way. It allows users to construct datasets, define treatment and control groups, and set treatment periods. DiD model analyses may be conducted with both datasets created by built-in functions and ready-to-use external datasets. Both simultaneous and staggered adoption are supported. The library allows for various extensions, such as two-way fixed effects models, group- or individual-specific effects, post-treatment periods, and triple-difference estimations. Additionally, it includes functions for visualizing results, such as plotting DiD coefficients with confidence intervals and illustrating the temporal evolution of staggered treatments. Furthermore, several functions for rigorous treatment setting and data diagnostics are incorporated.
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
## Author
|
|
@@ -16,11 +16,9 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
|
|
|
16
16
|
Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
## Updates v2.
|
|
19
|
+
## Updates v2.2.1
|
|
20
20
|
- Bugfixes:
|
|
21
|
-
-
|
|
22
|
-
- Default verbose = False
|
|
23
|
-
- Notes and warnings related to treatment diagnostics independent of verbose parameter
|
|
21
|
+
- Always converting unit and time col to str and calculating unit-time-index col
|
|
24
22
|
|
|
25
23
|
|
|
26
24
|
## Features
|
|
@@ -62,15 +60,17 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
62
60
|
- Card D, Krueger AD (1994) Minimum Wages and Employment: A Case Study of the Fast Food Industry in New Jersey and Pennsylvania. *The American Economic Review* 84(4): 772-793. [JSTOR](https://www.jstor.org/stable/2677856)
|
|
63
61
|
- de Haas S, Götz G, Heim S (2022) Measuring the effect of COVID‑19‑related night curfews in a bundled intervention within Germany. *Scientific Reports* 12: 19732. [10.1038/s41598-022-24086-9](https://doi.org/10.1038/s41598-022-24086-9)
|
|
64
62
|
- Goodman-Bacon A (2021) Difference-in-differences with variation in treatment timing. *Journal of Econometrics* 225(2): 254-277. [10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
|
|
65
|
-
- Greene WH (2012) *Econometric Analysis*.
|
|
63
|
+
- Greene WH (2012) *Econometric Analysis*.
|
|
66
64
|
- Goldfarb A, Tucker C, Wang Y (2022) Conducting Research in Marketing with Quasi-Experiments. *Journal of Marketing* 86(3): 1-19. [10.1177/00222429221082977](https://doi.org/10.1177/00222429221082977)
|
|
67
65
|
- Isporhing IE, Lipfert M, Pestel N (2021) Does re-opening schools contribute to the spread of SARS-CoV-2? Evidence from staggered summer breaks in Germany. *Journal of Public Economics* 198: 104426. [10.1016/j.jpubeco.2021.104426](https://doi.org/10.1016/j.jpubeco.2021.104426)
|
|
68
66
|
- Li KT, Luo L, Pattabhiramaiah A (2024) Causal Inference with Quasi-Experimental Data. *IMPACT at JMR* November 13, 2024. [AMA](https://www.ama.org/marketing-news/causal-inference-with-quasi-experimental-data/)
|
|
67
|
+
- Olden A (2018) What do you buy when no one's watching? The effect of self-service checkouts on the composition of sales in retail. Discussion paper FOR 3/18, Norwegian School of Economics, Norway. [http://hdl.handle.net/11250/2490886](http://hdl.handle.net/11250/2490886)
|
|
69
68
|
- Olden A, Moen J (2022) The triple difference estimator. *The Econometrics Journal* 25(3): 531-553. [10.1093/ectj/utac010](https://doi.org/10.1093/ectj/utac010)
|
|
69
|
+
- Strassmann A, Çolak Y, Serra-Burriel M, Nordestgaard BG, Turk A, Afzal S, Puhan MA (2023) Nationwide indoor smoking ban and impact on smoking behaviour and lung function: a two-population natural experiment. *Thorax* 78(2): 144-150. [10.1136/thoraxjnl-2021-218436](https://doi.org/10.1136/thoraxjnl-2021-218436)
|
|
70
70
|
- Villa JM (2016) diff: Simplifying the estimation of difference-in-differences treatment effects. *The Stata Journal* 16(1): 52-71. [10.1177/1536867X1601600108](https://doi.org/10.1177/1536867X1601600108)
|
|
71
71
|
- von Bismarck-Osten C, Borusyak K, Schönberg U (2022) The role of schools in transmission of the SARS-CoV-2 virus: quasi-experimental evidence from Germany. *Economic Policy* 37(109): 87–130. [10.1093/epolic/eiac001](https://doi.org/10.1093/epolic/eiac001)
|
|
72
|
-
- Wieland T (
|
|
73
|
-
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
72
|
+
- Wieland T (2025) Assessing the effectiveness of non-pharmaceutical interventions in the SARS-CoV-2 pandemic: results of a natural experiment regarding Baden-Württemberg (Germany) and Switzerland in the second infection wave. *Journal of Public Health: From Theory to Practice* 33(11): 2497-2511. [10.1007/s10389-024-02218-x](https://doi.org/10.1007/s10389-024-02218-x)
|
|
73
|
+
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
## Examples
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# diffindiff: Difference-in-Differences (DiD) Analysis Python Library
|
|
2
2
|
|
|
3
|
-
This Python library is designed for performing Difference-in-Differences (DiD) analyses in a convenient way. It allows users to construct datasets, define treatment and control groups, and set treatment periods. DiD model analyses may be conducted with both datasets created by built-in functions and ready-to-use external datasets. Both simultaneous and staggered adoption are supported. The library allows for various extensions, such as two-way fixed effects models, group- or individual-specific effects,
|
|
3
|
+
This Python library is designed for performing Difference-in-Differences (DiD) analyses in a convenient way. It allows users to construct datasets, define treatment and control groups, and set treatment periods. DiD model analyses may be conducted with both datasets created by built-in functions and ready-to-use external datasets. Both simultaneous and staggered adoption are supported. The library allows for various extensions, such as two-way fixed effects models, group- or individual-specific effects, post-treatment periods, and triple-difference estimations. Additionally, it includes functions for visualizing results, such as plotting DiD coefficients with confidence intervals and illustrating the temporal evolution of staggered treatments. Furthermore, several functions for rigorous treatment setting and data diagnostics are incorporated.
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
## Author
|
|
@@ -8,11 +8,9 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
|
|
|
8
8
|
Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
## Updates v2.
|
|
11
|
+
## Updates v2.2.1
|
|
12
12
|
- Bugfixes:
|
|
13
|
-
-
|
|
14
|
-
- Default verbose = False
|
|
15
|
-
- Notes and warnings related to treatment diagnostics independent of verbose parameter
|
|
13
|
+
- Always converting unit and time col to str and calculating unit-time-index col
|
|
16
14
|
|
|
17
15
|
|
|
18
16
|
## Features
|
|
@@ -54,15 +52,17 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
54
52
|
- Card D, Krueger AD (1994) Minimum Wages and Employment: A Case Study of the Fast Food Industry in New Jersey and Pennsylvania. *The American Economic Review* 84(4): 772-793. [JSTOR](https://www.jstor.org/stable/2677856)
|
|
55
53
|
- de Haas S, Götz G, Heim S (2022) Measuring the effect of COVID‑19‑related night curfews in a bundled intervention within Germany. *Scientific Reports* 12: 19732. [10.1038/s41598-022-24086-9](https://doi.org/10.1038/s41598-022-24086-9)
|
|
56
54
|
- Goodman-Bacon A (2021) Difference-in-differences with variation in treatment timing. *Journal of Econometrics* 225(2): 254-277. [10.1016/j.jeconom.2021.03.014](https://doi.org/10.1016/j.jeconom.2021.03.014)
|
|
57
|
-
- Greene WH (2012) *Econometric Analysis*.
|
|
55
|
+
- Greene WH (2012) *Econometric Analysis*.
|
|
58
56
|
- Goldfarb A, Tucker C, Wang Y (2022) Conducting Research in Marketing with Quasi-Experiments. *Journal of Marketing* 86(3): 1-19. [10.1177/00222429221082977](https://doi.org/10.1177/00222429221082977)
|
|
59
57
|
- Isporhing IE, Lipfert M, Pestel N (2021) Does re-opening schools contribute to the spread of SARS-CoV-2? Evidence from staggered summer breaks in Germany. *Journal of Public Economics* 198: 104426. [10.1016/j.jpubeco.2021.104426](https://doi.org/10.1016/j.jpubeco.2021.104426)
|
|
60
58
|
- Li KT, Luo L, Pattabhiramaiah A (2024) Causal Inference with Quasi-Experimental Data. *IMPACT at JMR* November 13, 2024. [AMA](https://www.ama.org/marketing-news/causal-inference-with-quasi-experimental-data/)
|
|
59
|
+
- Olden A (2018) What do you buy when no one's watching? The effect of self-service checkouts on the composition of sales in retail. Discussion paper FOR 3/18, Norwegian School of Economics, Norway. [http://hdl.handle.net/11250/2490886](http://hdl.handle.net/11250/2490886)
|
|
61
60
|
- Olden A, Moen J (2022) The triple difference estimator. *The Econometrics Journal* 25(3): 531-553. [10.1093/ectj/utac010](https://doi.org/10.1093/ectj/utac010)
|
|
61
|
+
- Strassmann A, Çolak Y, Serra-Burriel M, Nordestgaard BG, Turk A, Afzal S, Puhan MA (2023) Nationwide indoor smoking ban and impact on smoking behaviour and lung function: a two-population natural experiment. *Thorax* 78(2): 144-150. [10.1136/thoraxjnl-2021-218436](https://doi.org/10.1136/thoraxjnl-2021-218436)
|
|
62
62
|
- Villa JM (2016) diff: Simplifying the estimation of difference-in-differences treatment effects. *The Stata Journal* 16(1): 52-71. [10.1177/1536867X1601600108](https://doi.org/10.1177/1536867X1601600108)
|
|
63
63
|
- von Bismarck-Osten C, Borusyak K, Schönberg U (2022) The role of schools in transmission of the SARS-CoV-2 virus: quasi-experimental evidence from Germany. *Economic Policy* 37(109): 87–130. [10.1093/epolic/eiac001](https://doi.org/10.1093/epolic/eiac001)
|
|
64
|
-
- Wieland T (
|
|
65
|
-
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
64
|
+
- Wieland T (2025) Assessing the effectiveness of non-pharmaceutical interventions in the SARS-CoV-2 pandemic: results of a natural experiment regarding Baden-Württemberg (Germany) and Switzerland in the second infection wave. *Journal of Public Health: From Theory to Practice* 33(11): 2497-2511. [10.1007/s10389-024-02218-x](https://doi.org/10.1007/s10389-024-02218-x)
|
|
65
|
+
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
66
66
|
|
|
67
67
|
|
|
68
68
|
## Examples
|
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
#-----------------------------------------------------------------------
|
|
2
|
+
# Name: config (diffindiff package)
|
|
3
|
+
# Purpose: Configuration for the diffindiff package
|
|
4
|
+
# Author: Thomas Wieland
|
|
5
|
+
# ORCID: 0000-0001-5168-9846
|
|
6
|
+
# mail: geowieland@googlemail.com
|
|
7
|
+
# Version: 1.0.3
|
|
8
|
+
# Last update: 2025-12-06 10:48
|
|
9
|
+
# Copyright (c) 2025 Thomas Wieland
|
|
10
|
+
#-----------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
# Basic config:
|
|
13
|
+
|
|
14
|
+
PACKAGE_VERSION = "2.2.1"
|
|
15
|
+
|
|
16
|
+
VERBOSE = True
|
|
17
|
+
|
|
18
|
+
ROUND_STATISTIC = 3
|
|
19
|
+
ROUND_PERCENT = 2
|
|
20
|
+
|
|
21
|
+
AUTO_SWITCH_TO_PREPOST = True
|
|
22
|
+
|
|
23
|
+
# Description texts:
|
|
24
|
+
|
|
25
|
+
DID_DESCRIPTION = "Difference-in-Differences Analysis"
|
|
26
|
+
DDD_DESCRIPTION = "Triple-Difference Analysis"
|
|
27
|
+
|
|
28
|
+
TREATMENT_DESCRIPTION = "Treatment"
|
|
29
|
+
|
|
30
|
+
GROUP_DESCRIPTION = "Group"
|
|
31
|
+
|
|
32
|
+
TREATMENT_GROUP_DESCRIPTION = f"{TREATMENT_DESCRIPTION} {GROUP_DESCRIPTION}"
|
|
33
|
+
CONTROL_GROUP_DESCRIPTION = f"Control {GROUP_DESCRIPTION}"
|
|
34
|
+
GROUPS_DESCRIPTION = f"{TREATMENT_DESCRIPTION} and {CONTROL_GROUP_DESCRIPTION}"
|
|
35
|
+
|
|
36
|
+
TIME_PERIODS_DESCRIPTION = "Time periods"
|
|
37
|
+
TREATMENT_PERIOD_DESCRIPTION = f"{TREATMENT_DESCRIPTION} period"
|
|
38
|
+
STUDY_PERIOD_DESCRIPTION = "Study period"
|
|
39
|
+
PREPOST_DESCRIPTION = "Pre-post"
|
|
40
|
+
AFTER_TREATMENT_PERIOD_DESCRIPTION = "After-treatment period"
|
|
41
|
+
|
|
42
|
+
UNITS_DESCRIPTION = "Units"
|
|
43
|
+
|
|
44
|
+
N_DESCRIPTION = "Number of observations"
|
|
45
|
+
|
|
46
|
+
DDD_GROUP_DESCRIPTION = f"{GROUP_DESCRIPTION} segmentation"
|
|
47
|
+
|
|
48
|
+
TREATMENT_SIMULTANEOUS_DESCRIPTION = "Simultaneous"
|
|
49
|
+
TREATMENT_STAGGERED_DESCRIPTION = "Staggered"
|
|
50
|
+
NO_TREATMENT_CG_DESCRIPTION = f"No-treatment {CONTROL_GROUP_DESCRIPTION}"
|
|
51
|
+
|
|
52
|
+
PREPOST_PANELDATA_DESCRIPTION = f"{PREPOST_DESCRIPTION} data"
|
|
53
|
+
MULTIPERIOD_PANELDATA_DESCRIPTION = "Multi-period panel data"
|
|
54
|
+
|
|
55
|
+
DIDDATA_SUMMARY_LABELS = [
|
|
56
|
+
TREATMENT_DESCRIPTION,
|
|
57
|
+
UNITS_DESCRIPTION,
|
|
58
|
+
TREATMENT_GROUP_DESCRIPTION,
|
|
59
|
+
CONTROL_GROUP_DESCRIPTION,
|
|
60
|
+
DDD_GROUP_DESCRIPTION,
|
|
61
|
+
STUDY_PERIOD_DESCRIPTION,
|
|
62
|
+
TREATMENT_PERIOD_DESCRIPTION,
|
|
63
|
+
N_DESCRIPTION
|
|
64
|
+
]
|
|
65
|
+
DIDDATA_SUMMARY_MAX_WIDTH = max(len(label) for label in DIDDATA_SUMMARY_LABELS) + 1
|
|
66
|
+
|
|
67
|
+
# Data management:
|
|
68
|
+
|
|
69
|
+
DELIMITER = "_"
|
|
70
|
+
DELIMITER_INTERACT = "x"
|
|
71
|
+
|
|
72
|
+
COL_ABBREV = "col"
|
|
73
|
+
|
|
74
|
+
DUMMY_PREFIX = "DUMMY"
|
|
75
|
+
LOG_PREFIX = "log"
|
|
76
|
+
OBSERVED_SUFFIX = "observed"
|
|
77
|
+
EXPECTED_SUFFIX = "expected"
|
|
78
|
+
PREDICTED_SUFFIX = "pred"
|
|
79
|
+
CI_LOWER_SUFFIX = "CI_lower"
|
|
80
|
+
CI_UPPER_SUFFIX = "CI_upper"
|
|
81
|
+
PI_LOWER_SUFFIX = "PI_lower"
|
|
82
|
+
PI_UPPER_SUFFIX = "PI_upper"
|
|
83
|
+
|
|
84
|
+
TG_COL = "TG"
|
|
85
|
+
CG_COL = "CG"
|
|
86
|
+
BG_COL = "BG"
|
|
87
|
+
TT_COL = "TT"
|
|
88
|
+
ATT_COL = "ATT"
|
|
89
|
+
TIME_COL = "t"
|
|
90
|
+
UNIT_COL = "unit"
|
|
91
|
+
UNIT_TIME_COL = f"{UNIT_COL}{DELIMITER}{TIME_COL}"
|
|
92
|
+
TIME_COUNTER_COL = "time_counter"
|
|
93
|
+
TREATMENT_COL = f"{TG_COL}{DELIMITER_INTERACT}{TT_COL}"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Modeling config:
|
|
97
|
+
|
|
98
|
+
# Coefficients/effects types:
|
|
99
|
+
|
|
100
|
+
TREATMENT_EFFECTS_DESCRIPTION = "Difference-in-Differences coefficients"
|
|
101
|
+
|
|
102
|
+
EFFECTS_TYPES = {
|
|
103
|
+
"ATE": {
|
|
104
|
+
"description": "Average treatment effect",
|
|
105
|
+
"model_results_key": "average_treatment_effects",
|
|
106
|
+
"summary_treatment_effects": True,
|
|
107
|
+
"summary_description": "{description} {coef}"
|
|
108
|
+
},
|
|
109
|
+
"AATE": {
|
|
110
|
+
"description": "Average after-treatment effect",
|
|
111
|
+
"model_results_key": "average_after_treatment_effects",
|
|
112
|
+
"summary_treatment_effects": True,
|
|
113
|
+
"summary_description": "{description} {coef}"
|
|
114
|
+
},
|
|
115
|
+
"beta_0": {
|
|
116
|
+
"description": "Control group baseline",
|
|
117
|
+
"model_results_key": "control_group_baseline",
|
|
118
|
+
"summary_treatment_effects": True,
|
|
119
|
+
"summary_description": "{description}"
|
|
120
|
+
},
|
|
121
|
+
"beta_1": {
|
|
122
|
+
"description": f"{TREATMENT_GROUP_DESCRIPTION} deviation",
|
|
123
|
+
"model_results_key": "treatment_group_deviation",
|
|
124
|
+
"summary_treatment_effects": True,
|
|
125
|
+
"summary_description": "{description}"
|
|
126
|
+
},
|
|
127
|
+
"delta_0": {
|
|
128
|
+
"description": "Non-treatment time effect",
|
|
129
|
+
"model_results_key": "non_treatment_time_effect",
|
|
130
|
+
"summary_treatment_effects": True,
|
|
131
|
+
"summary_description": "{description} {coef}"
|
|
132
|
+
},
|
|
133
|
+
"ATT": {
|
|
134
|
+
"description": "After-treatment time effect",
|
|
135
|
+
"model_results_key": "after_treatment_time_effects",
|
|
136
|
+
"summary_treatment_effects": True,
|
|
137
|
+
"summary_description": "{description} {coef}"
|
|
138
|
+
},
|
|
139
|
+
"FE": {
|
|
140
|
+
"description": "Fixed effects",
|
|
141
|
+
"model_results_key": "fixed_effects",
|
|
142
|
+
"summary_treatment_effects": False,
|
|
143
|
+
"types": {
|
|
144
|
+
0: {
|
|
145
|
+
"FE": "unit",
|
|
146
|
+
"dummy_prefix": "UNIT",
|
|
147
|
+
"model_config_key": "FE_unit",
|
|
148
|
+
"model_results_key": "FE_unit",
|
|
149
|
+
"description": "Fixed effects for observational units"
|
|
150
|
+
},
|
|
151
|
+
1: {
|
|
152
|
+
"FE": "time",
|
|
153
|
+
"dummy_prefix": "TIME",
|
|
154
|
+
"model_config_key": "FE_time",
|
|
155
|
+
"model_results_key": "FE_time",
|
|
156
|
+
"description": "Fixed effects for time points"
|
|
157
|
+
},
|
|
158
|
+
2: {
|
|
159
|
+
"FE": "group",
|
|
160
|
+
"dummy_prefix": "GROUP",
|
|
161
|
+
"model_config_key": "FE_group",
|
|
162
|
+
"model_results_key": "FE_group",
|
|
163
|
+
"description": "Fixed effects for groups"
|
|
164
|
+
},
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
"ITT": {
|
|
168
|
+
"description": "Individual time trends",
|
|
169
|
+
"model_results_key": "individual_time_trends",
|
|
170
|
+
"model_config_key": "ITT",
|
|
171
|
+
"summary_treatment_effects": False
|
|
172
|
+
},
|
|
173
|
+
"ITE": {
|
|
174
|
+
"description": "Individual treatment effects",
|
|
175
|
+
"model_results_key": "individual_treatment_effects",
|
|
176
|
+
"model_config_key": "ITE",
|
|
177
|
+
"summary_treatment_effects": True,
|
|
178
|
+
"summary_description": "{coef}"
|
|
179
|
+
},
|
|
180
|
+
"GTT": {
|
|
181
|
+
"description": "Group time trends",
|
|
182
|
+
"model_results_key": "group_time_trends",
|
|
183
|
+
"model_config_key": "GTT",
|
|
184
|
+
"summary_treatment_effects": False
|
|
185
|
+
},
|
|
186
|
+
"GTE": {
|
|
187
|
+
"description": "Group treatment effects",
|
|
188
|
+
"model_results_key": "group_treatment_effects",
|
|
189
|
+
"model_config_key": "GTE",
|
|
190
|
+
"summary_treatment_effects": True,
|
|
191
|
+
"summary_description": "{coef}"
|
|
192
|
+
},
|
|
193
|
+
"spillover": {
|
|
194
|
+
"description": "Treatment spillover effect",
|
|
195
|
+
"model_results_key": "treatment_spillover_effects",
|
|
196
|
+
"model_config_key": "spillover_effects",
|
|
197
|
+
"summary_treatment_effects": True,
|
|
198
|
+
"summary_description": "{description} {coef}"
|
|
199
|
+
},
|
|
200
|
+
"covariates": {
|
|
201
|
+
"description": "Covariates",
|
|
202
|
+
"model_results_key": "covariates_effects",
|
|
203
|
+
"model_config_key": "covariates",
|
|
204
|
+
"summary_treatment_effects": False,
|
|
205
|
+
"summary_description": "{coef}"
|
|
206
|
+
},
|
|
207
|
+
}
|
|
208
|
+
EFFECTS_TYPES_MODEL_RESULTS = [value["model_results_key"] for value in EFFECTS_TYPES.values() if "model_results_key" in value]
|
|
209
|
+
EFFECTS_TYPES_MODEL_RESULTS_SUMMARY = [value["model_results_key"] for value in EFFECTS_TYPES.values() if "model_results_key" in value and value["summary_treatment_effects"]]
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
EFFECTS_TYPES_DDD = {
|
|
213
|
+
"TDATE": {
|
|
214
|
+
"description": "Triple-Difference Average treatment effect",
|
|
215
|
+
"model_results_key": "TDATE"
|
|
216
|
+
},
|
|
217
|
+
"beta_2": {
|
|
218
|
+
"description": "Benefit group deviation",
|
|
219
|
+
"model_results_key": "benefit_group_deviation"
|
|
220
|
+
},
|
|
221
|
+
"beta_4": {
|
|
222
|
+
"description": "Treated benefit group deviation",
|
|
223
|
+
"model_results_key": "treated_benefit_group_deviation"
|
|
224
|
+
},
|
|
225
|
+
"beta_6": {
|
|
226
|
+
"description": "Benefit group non-treatment time effect",
|
|
227
|
+
"model_results_key": "benefit_non_treatment_time_effect"
|
|
228
|
+
},
|
|
229
|
+
list(EFFECTS_TYPES.keys())[0]: EFFECTS_TYPES[list(EFFECTS_TYPES.keys())[0]],
|
|
230
|
+
list(EFFECTS_TYPES.keys())[2]: EFFECTS_TYPES[list(EFFECTS_TYPES.keys())[2]],
|
|
231
|
+
list(EFFECTS_TYPES.keys())[3]: EFFECTS_TYPES[list(EFFECTS_TYPES.keys())[3]],
|
|
232
|
+
list(EFFECTS_TYPES.keys())[4]: EFFECTS_TYPES[list(EFFECTS_TYPES.keys())[4]],
|
|
233
|
+
list(EFFECTS_TYPES.keys())[6]: EFFECTS_TYPES[list(EFFECTS_TYPES.keys())[6]],
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
EFFECTS_TYPES_DDD_MODEL_RESULTS = [value["model_results_key"] for value in EFFECTS_TYPES_DDD.values() if "model_results_key" in value]
|
|
237
|
+
|
|
238
|
+
FE_TYPES = [value["FE"] for value in EFFECTS_TYPES["FE"]["types"].values()]
|
|
239
|
+
|
|
240
|
+
# Time trends:
|
|
241
|
+
TIME_TRENDS_TYPES = [
|
|
242
|
+
list(EFFECTS_TYPES.keys())[7],
|
|
243
|
+
list(EFFECTS_TYPES.keys())[9]
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
# Specific effects:
|
|
247
|
+
SPECIFIC_EFFFECTS_TYPES = [
|
|
248
|
+
list(EFFECTS_TYPES.keys())[8],
|
|
249
|
+
list(EFFECTS_TYPES.keys())[10]
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
OLS_MODEL_RESULTS = {
|
|
253
|
+
"coef_name": {
|
|
254
|
+
"model_results_key": "Coefficient",
|
|
255
|
+
"summary_description": "Coefficient",
|
|
256
|
+
},
|
|
257
|
+
"coef": {
|
|
258
|
+
"model_results_key": "Estimate",
|
|
259
|
+
"summary_description": "Estimate",
|
|
260
|
+
},
|
|
261
|
+
"coef_standard_errors": {
|
|
262
|
+
"model_results_key": "SE",
|
|
263
|
+
"summary_description": "SE",
|
|
264
|
+
},
|
|
265
|
+
"coef_teststatistic": {
|
|
266
|
+
"model_results_key": "t",
|
|
267
|
+
"summary_description": "t"
|
|
268
|
+
},
|
|
269
|
+
"coef_p": {
|
|
270
|
+
"model_results_key": "p",
|
|
271
|
+
"summary_description": "p"
|
|
272
|
+
},
|
|
273
|
+
"coef_confint_lower": {
|
|
274
|
+
"model_results_key": "CI_lower",
|
|
275
|
+
"summary_description": "CI lower"
|
|
276
|
+
},
|
|
277
|
+
"coef_confint_upper": {
|
|
278
|
+
"model_results_key": "CI_upper",
|
|
279
|
+
"summary_description": "CI upper"
|
|
280
|
+
},
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
ML_MODEL_RESULTS = {
|
|
284
|
+
"coef_name": {
|
|
285
|
+
"model_results_key": OLS_MODEL_RESULTS["coef_name"]["model_results_key"],
|
|
286
|
+
"summary_description": OLS_MODEL_RESULTS["coef_name"]["summary_description"],
|
|
287
|
+
},
|
|
288
|
+
"coef": {
|
|
289
|
+
"model_results_key": OLS_MODEL_RESULTS["coef"]["model_results_key"],
|
|
290
|
+
"summary_description": OLS_MODEL_RESULTS["coef"]["summary_description"],
|
|
291
|
+
},
|
|
292
|
+
"coef_standard_errors": {
|
|
293
|
+
"model_results_key": OLS_MODEL_RESULTS["coef_standard_errors"]["model_results_key"],
|
|
294
|
+
"summary_description": OLS_MODEL_RESULTS["coef_standard_errors"]["summary_description"],
|
|
295
|
+
},
|
|
296
|
+
"coef_teststatistic": {
|
|
297
|
+
"model_results_key": "z",
|
|
298
|
+
"summary_description": "z"
|
|
299
|
+
},
|
|
300
|
+
"coef_p": {
|
|
301
|
+
"model_results_key": OLS_MODEL_RESULTS["coef_p"]["model_results_key"],
|
|
302
|
+
"summary_description": OLS_MODEL_RESULTS["coef_p"]["summary_description"]
|
|
303
|
+
},
|
|
304
|
+
"coef_confint_lower": {
|
|
305
|
+
"model_results_key": OLS_MODEL_RESULTS["coef_confint_lower"]["model_results_key"],
|
|
306
|
+
"summary_description": OLS_MODEL_RESULTS["coef_confint_lower"]["summary_description"]
|
|
307
|
+
},
|
|
308
|
+
"coef_confint_upper": {
|
|
309
|
+
"model_results_key": OLS_MODEL_RESULTS["coef_confint_upper"]["model_results_key"],
|
|
310
|
+
"summary_description": OLS_MODEL_RESULTS["coef_confint_upper"]["summary_description"]
|
|
311
|
+
},
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
# Model fit metrics:
|
|
315
|
+
|
|
316
|
+
MODEL_FIT_METRICS_DESCRIPTION = "Model fit metric"
|
|
317
|
+
|
|
318
|
+
MODEL_FIT_METRICS = {
|
|
319
|
+
"SSR": {
|
|
320
|
+
"description": "Sum of squared residuals",
|
|
321
|
+
"show_in_summary": False
|
|
322
|
+
},
|
|
323
|
+
"SAR": {
|
|
324
|
+
"description": "Sum of absolute residuals",
|
|
325
|
+
"show_in_summary": False
|
|
326
|
+
},
|
|
327
|
+
"SQT": {
|
|
328
|
+
"description": "Total variance of dependent variable",
|
|
329
|
+
"show_in_summary": False
|
|
330
|
+
},
|
|
331
|
+
"RSQ": {
|
|
332
|
+
"description": "R-Squared",
|
|
333
|
+
"show_in_summary": True
|
|
334
|
+
},
|
|
335
|
+
"RSQ_ADJ": {
|
|
336
|
+
"description": "R-Squared adjusted",
|
|
337
|
+
"show_in_summary": True
|
|
338
|
+
},
|
|
339
|
+
"MSE": {
|
|
340
|
+
"description": "Mean squared error",
|
|
341
|
+
"show_in_summary": True
|
|
342
|
+
},
|
|
343
|
+
"RMSE": {
|
|
344
|
+
"description": "Root mean squared error",
|
|
345
|
+
"show_in_summary": True
|
|
346
|
+
},
|
|
347
|
+
"MAE": {
|
|
348
|
+
"description": "Mean absolute error",
|
|
349
|
+
"show_in_summary": True
|
|
350
|
+
},
|
|
351
|
+
"MAPE": {
|
|
352
|
+
"description": "Mean absolute percentage error",
|
|
353
|
+
"show_in_summary": True
|
|
354
|
+
},
|
|
355
|
+
"MAPE_SYM": {
|
|
356
|
+
"description": "Symmetric mean absolute percentage error",
|
|
357
|
+
"show_in_summary": False
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# Treatment diagnostics:
|
|
362
|
+
|
|
363
|
+
TREATMENT_DIAGNOSTICS_DESCRIPTION = f"{TREATMENT_DESCRIPTION} diagnostics"
|
|
364
|
+
|
|
365
|
+
TREATMENT_DIAGNOSTICS = {
|
|
366
|
+
"treatment": {
|
|
367
|
+
"description": TREATMENT_DESCRIPTION,
|
|
368
|
+
"show_in_summary": True
|
|
369
|
+
},
|
|
370
|
+
"is_notreatment": {
|
|
371
|
+
"description": NO_TREATMENT_CG_DESCRIPTION,
|
|
372
|
+
"show_in_summary": True,
|
|
373
|
+
},
|
|
374
|
+
"treatment_group": {
|
|
375
|
+
"description": TREATMENT_GROUP_DESCRIPTION,
|
|
376
|
+
"show_in_summary": False
|
|
377
|
+
},
|
|
378
|
+
"control_group": {
|
|
379
|
+
"description": CONTROL_GROUP_DESCRIPTION,
|
|
380
|
+
"show_in_summary": False
|
|
381
|
+
},
|
|
382
|
+
"is_parallel": {
|
|
383
|
+
"description": "Parallel trends (pre)",
|
|
384
|
+
"show_in_summary": True
|
|
385
|
+
},
|
|
386
|
+
"is_simultaneous": {
|
|
387
|
+
"description": f"{TREATMENT_SIMULTANEOUS_DESCRIPTION} treatment",
|
|
388
|
+
"show_in_summary": False
|
|
389
|
+
},
|
|
390
|
+
"adoption_type": {
|
|
391
|
+
"description": "Type of adoption",
|
|
392
|
+
"show_in_summary": True
|
|
393
|
+
},
|
|
394
|
+
"is_binary": {
|
|
395
|
+
"description": "Binary treatment",
|
|
396
|
+
"show_in_summary": False
|
|
397
|
+
},
|
|
398
|
+
"treatment_format": {
|
|
399
|
+
"description": f"{TREATMENT_DESCRIPTION} format",
|
|
400
|
+
"show_in_summary": True
|
|
401
|
+
},
|
|
402
|
+
"treatment_group_size": {
|
|
403
|
+
"description": f"{TREATMENT_GROUP_DESCRIPTION} (N)",
|
|
404
|
+
"show_in_summary": True
|
|
405
|
+
},
|
|
406
|
+
"control_group_size": {
|
|
407
|
+
"description": f"{CONTROL_GROUP_DESCRIPTION} (N)",
|
|
408
|
+
"show_in_summary": True
|
|
409
|
+
},
|
|
410
|
+
"is_multiple_treatment_period": {
|
|
411
|
+
"description": "Multiple treatment periods",
|
|
412
|
+
"show_in_summary": True
|
|
413
|
+
},
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
# Input data diagnostics:
|
|
417
|
+
|
|
418
|
+
DATA_DIAGNOSTICS_DESCRIPTION = "Input data diagnostics"
|
|
419
|
+
|
|
420
|
+
DATA_DIAGNOSTICS = {
|
|
421
|
+
"is_balanced": {
|
|
422
|
+
"description": "Balanced panel data",
|
|
423
|
+
"show_in_summary": True
|
|
424
|
+
},
|
|
425
|
+
"is_missing": {
|
|
426
|
+
"description": "Missing values",
|
|
427
|
+
"show_in_summary": True
|
|
428
|
+
},
|
|
429
|
+
"drop_missing": {
|
|
430
|
+
"description": "Drop missing values",
|
|
431
|
+
"show_in_summary": False
|
|
432
|
+
},
|
|
433
|
+
"missing_replace_by_zero": {
|
|
434
|
+
"description": "Replace missing values by zero",
|
|
435
|
+
"show_in_summary": False
|
|
436
|
+
},
|
|
437
|
+
"is_prepost": {
|
|
438
|
+
"description": PREPOST_PANELDATA_DESCRIPTION,
|
|
439
|
+
"show_in_summary": False
|
|
440
|
+
},
|
|
441
|
+
"data_type": {
|
|
442
|
+
"description": "Data type",
|
|
443
|
+
"show_in_summary": True
|
|
444
|
+
},
|
|
445
|
+
"outcome_col": {
|
|
446
|
+
"description": "Outcome variable",
|
|
447
|
+
"show_in_summary": True
|
|
448
|
+
},
|
|
449
|
+
"outcome_descriptives": {
|
|
450
|
+
"description": "Outcome descriptives",
|
|
451
|
+
"show_in_summary": True
|
|
452
|
+
},
|
|
453
|
+
"observations": {
|
|
454
|
+
"description": N_DESCRIPTION,
|
|
455
|
+
"show_in_summary": True
|
|
456
|
+
},
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
DIAGNOSTICS_COLUMN = "Result"
|
|
460
|
+
|
|
461
|
+
COVARIATES_DESCRIPTION = "Covariates"
|
|
462
|
+
|
|
463
|
+
# Predictions:
|
|
464
|
+
|
|
465
|
+
PREDICTIONS_SUMMARY_FRAME_COLS = {
|
|
466
|
+
"mean": "Predicted mean",
|
|
467
|
+
"mean_se": "Predicted mean SE",
|
|
468
|
+
"mean_ci_lower": "Lower CI of mean",
|
|
469
|
+
"mean_ci_upper": "Upper CI of mean",
|
|
470
|
+
"obs_ci_lower": "Lower prediction interval",
|
|
471
|
+
"obs_ci_upper": "Upper prediction interval",
|
|
472
|
+
}
|
|
473
|
+
PREDICTIONS_SUMMARY_FRAME_COLS_LIST = list(PREDICTIONS_SUMMARY_FRAME_COLS.keys())
|
|
474
|
+
PREDICTIONS_SUMMARY_FRAME_DESCRIPTIONS = list(PREDICTIONS_SUMMARY_FRAME_COLS.values())
|
|
475
|
+
|
|
476
|
+
# Counterfactual:
|
|
477
|
+
COUNTERFAC_SUFFIX_CF = "counterfac"
|
|
478
|
+
COUNTERFAC_SUFFIX_PRED_CF = f"{DELIMITER}{PREDICTED_SUFFIX}{DELIMITER}{COUNTERFAC_SUFFIX_CF}"
|