diffindiff 2.2.2__tar.gz → 2.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffindiff-2.2.2 → diffindiff-2.2.4}/PKG-INFO +5 -4
- {diffindiff-2.2.2 → diffindiff-2.2.4}/README.md +4 -3
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/didanalysis.py +23 -6
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/didanalysis_helper.py +31 -9
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/diddata.py +26 -29
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/didtools.py +93 -36
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff.egg-info/PKG-INFO +5 -4
- {diffindiff-2.2.2 → diffindiff-2.2.4}/setup.py +1 -1
- {diffindiff-2.2.2 → diffindiff-2.2.4}/MANIFEST.in +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/__init__.py +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/config.py +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/tests/__init__.py +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/tests/data/Corona_Hesse.xlsx +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/tests/data/counties_DE.csv +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/tests/data/curfew_DE.csv +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/tests/tests_diffindiff.py +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff.egg-info/SOURCES.txt +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff.egg-info/dependency_links.txt +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff.egg-info/requires.txt +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff.egg-info/top_level.txt +0 -0
- {diffindiff-2.2.2 → diffindiff-2.2.4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.4
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -16,10 +16,11 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
|
|
|
16
16
|
Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
## Updates v2.2.
|
|
19
|
+
## Updates v2.2.4
|
|
20
20
|
- Bugfixes:
|
|
21
|
-
- Spillover treatment works now
|
|
22
|
-
-
|
|
21
|
+
- Spillover treatment really really works now (only relevant in rare cases)
|
|
22
|
+
- Fixed merging in diddata.DiffData.add_covariates() (only relevant in rare cases)
|
|
23
|
+
- Dropping missing values consequently (only relevant in rare cases)
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
## Features
|
|
@@ -8,10 +8,11 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
|
|
|
8
8
|
Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
## Updates v2.2.
|
|
11
|
+
## Updates v2.2.4
|
|
12
12
|
- Bugfixes:
|
|
13
|
-
- Spillover treatment works now
|
|
14
|
-
-
|
|
13
|
+
- Spillover treatment really really works now (only relevant in rare cases)
|
|
14
|
+
- Fixed merging in diddata.DiffData.add_covariates() (only relevant in rare cases)
|
|
15
|
+
- Dropping missing values consequently (only relevant in rare cases)
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
## Features
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 2.2.
|
|
8
|
-
# Last update: 2025-12-
|
|
7
|
+
# Version: 2.2.2
|
|
8
|
+
# Last update: 2025-12-07 10:27
|
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -40,6 +40,9 @@ class DiffModel:
|
|
|
40
40
|
timestamp
|
|
41
41
|
]
|
|
42
42
|
|
|
43
|
+
def get_did_modeldata_df (self):
|
|
44
|
+
return pd.DataFrame(self.data[2])
|
|
45
|
+
|
|
43
46
|
def treatment_statistics(
|
|
44
47
|
self,
|
|
45
48
|
treatment: str = None,
|
|
@@ -890,7 +893,7 @@ class DiffModel:
|
|
|
890
893
|
treatment_diagnostics = model_config["treatment_diagnostics"]
|
|
891
894
|
no_treatments = model_config["no_treatments"]
|
|
892
895
|
outcome_col = model_config["outcome_col"]
|
|
893
|
-
outcome_col_predicted = outcome_col
|
|
896
|
+
outcome_col_predicted = f"{outcome_col}{config.PREDICTED_SUFFIX}"
|
|
894
897
|
|
|
895
898
|
if TG_col is None and treatment is None:
|
|
896
899
|
if no_treatments == 1:
|
|
@@ -925,8 +928,7 @@ class DiffModel:
|
|
|
925
928
|
if ("TG" in plot_intervals_groups and "CG" not in plot_intervals_groups) or ("CG" in plot_intervals_groups and "TG" not in plot_intervals_groups):
|
|
926
929
|
lines_labels_required = lines_labels_required+1
|
|
927
930
|
if "TG" in plot_intervals_groups and "CG" in plot_intervals_groups:
|
|
928
|
-
lines_labels_required = lines_labels_required+2
|
|
929
|
-
|
|
931
|
+
lines_labels_required = lines_labels_required+2
|
|
930
932
|
assert len(lines_col) == lines_col_required, f"Parameter 'lines_col' must be a list with {lines_col_required} entries"
|
|
931
933
|
assert len(lines_style) == lines_style_required, f"Parameter 'lines_style' must be a list with {lines_col_required} entries"
|
|
932
934
|
assert len(lines_labels) == lines_labels_required, f"Parameter 'lines_labels' must be a list with {lines_labels_required} entries"
|
|
@@ -1389,6 +1391,13 @@ def did_analysis(
|
|
|
1389
1391
|
*treatment_col
|
|
1390
1392
|
]
|
|
1391
1393
|
|
|
1394
|
+
data = tools.panel_index(
|
|
1395
|
+
data = data,
|
|
1396
|
+
unit_col = unit_col,
|
|
1397
|
+
time_col = time_col,
|
|
1398
|
+
verbose = verbose
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1392
1401
|
treatment_diagnostics_results = helper.treatment_diagnostics(
|
|
1393
1402
|
data = data,
|
|
1394
1403
|
unit_col=unit_col,
|
|
@@ -1728,6 +1737,7 @@ def did_analysis(
|
|
|
1728
1737
|
spillover = helper.create_spillover(
|
|
1729
1738
|
data=data,
|
|
1730
1739
|
unit_col=unit_col,
|
|
1740
|
+
time_col=time_col,
|
|
1731
1741
|
treatment_col=treatment_col,
|
|
1732
1742
|
spillover_treatment=spillover_treatment,
|
|
1733
1743
|
spillover_units=spillover_units
|
|
@@ -1925,7 +1935,14 @@ def ddd_analysis(
|
|
|
1925
1935
|
)
|
|
1926
1936
|
|
|
1927
1937
|
cols_relevant = cols_relevant + covariates
|
|
1928
|
-
|
|
1938
|
+
|
|
1939
|
+
data = tools.panel_index(
|
|
1940
|
+
data = data,
|
|
1941
|
+
unit_col = unit_col,
|
|
1942
|
+
time_col = time_col,
|
|
1943
|
+
verbose = verbose
|
|
1944
|
+
)
|
|
1945
|
+
|
|
1929
1946
|
treatment_diagnostics_results = helper.treatment_diagnostics(
|
|
1930
1947
|
data = data,
|
|
1931
1948
|
unit_col=unit_col,
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 1.0.
|
|
8
|
-
# Last update: 2025-12-
|
|
7
|
+
# Version: 1.0.5
|
|
8
|
+
# Last update: 2025-12-07 10:27
|
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -172,7 +172,9 @@ def create_specific_treatment_effects(
|
|
|
172
172
|
def create_spillover(
|
|
173
173
|
data: pd.DataFrame,
|
|
174
174
|
unit_col: str,
|
|
175
|
+
time_col: str,
|
|
175
176
|
treatment_col: list,
|
|
177
|
+
TT_col: str = None,
|
|
176
178
|
spillover_treatment: list = [],
|
|
177
179
|
spillover_units: list = [],
|
|
178
180
|
verbose: bool = config.VERBOSE
|
|
@@ -189,16 +191,36 @@ def create_spillover(
|
|
|
189
191
|
spillover_unit_vars = []
|
|
190
192
|
spillover_treatment_vars = []
|
|
191
193
|
|
|
192
|
-
for
|
|
194
|
+
for treatment in treatment_col:
|
|
195
|
+
|
|
196
|
+
if TT_col is None:
|
|
197
|
+
|
|
198
|
+
TT_col = config.TT_COL
|
|
193
199
|
|
|
194
|
-
|
|
195
|
-
|
|
200
|
+
data = tools.treatment_time_col(
|
|
201
|
+
data = data,
|
|
202
|
+
unit_col = unit_col,
|
|
203
|
+
time_col = time_col,
|
|
204
|
+
treatment_col = treatment,
|
|
205
|
+
create_TT_col = TT_col,
|
|
206
|
+
verbose = verbose
|
|
207
|
+
)[0]
|
|
196
208
|
|
|
197
|
-
|
|
198
|
-
|
|
209
|
+
sp_unit_col = f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"
|
|
210
|
+
sp_treatment_col = f"{config.SPILLOVER_PREFIX}{config.DELIMITER}{treatment}"
|
|
199
211
|
|
|
200
|
-
data
|
|
201
|
-
data
|
|
212
|
+
data[sp_unit_col] = 0
|
|
213
|
+
data[sp_treatment_col] = 0
|
|
214
|
+
|
|
215
|
+
spillover_unit_vars.append(sp_unit_col)
|
|
216
|
+
spillover_treatment_vars.append(sp_treatment_col)
|
|
217
|
+
|
|
218
|
+
data.loc[
|
|
219
|
+
data[unit_col].astype(str).isin(spillover_units),
|
|
220
|
+
sp_unit_col
|
|
221
|
+
] = 1
|
|
222
|
+
|
|
223
|
+
data[sp_treatment_col] = data[sp_unit_col]*data[TT_col]
|
|
202
224
|
|
|
203
225
|
spillover_treatment_vars_join = ' + '.join(spillover_treatment_vars)
|
|
204
226
|
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 2.1.
|
|
8
|
-
# Last update: 2025-12-
|
|
7
|
+
# Version: 2.1.5
|
|
8
|
+
# Last update: 2025-12-07 10:27
|
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -478,7 +478,7 @@ class DiffData:
|
|
|
478
478
|
):
|
|
479
479
|
|
|
480
480
|
if unit_col is None and time_col is None:
|
|
481
|
-
raise ValueError("unit_col and/or time_col must be stated")
|
|
481
|
+
raise ValueError("Parameter 'unit_col' and/or 'time_col' must be stated")
|
|
482
482
|
|
|
483
483
|
if verbose:
|
|
484
484
|
if len(variables) > 0:
|
|
@@ -488,16 +488,16 @@ class DiffData:
|
|
|
488
488
|
|
|
489
489
|
did_modeldata = self.get_did_modeldata_df()
|
|
490
490
|
|
|
491
|
+
additional_df = tools.panel_index(
|
|
492
|
+
data=additional_df,
|
|
493
|
+
unit_col=unit_col,
|
|
494
|
+
time_col=time_col,
|
|
495
|
+
verbose=verbose
|
|
496
|
+
)
|
|
497
|
+
|
|
491
498
|
existing_variables = []
|
|
492
499
|
|
|
493
|
-
if unit_col is not None and time_col is not None:
|
|
494
|
-
|
|
495
|
-
additional_df = tools.panel_index(
|
|
496
|
-
data=additional_df,
|
|
497
|
-
unit_col=unit_col,
|
|
498
|
-
time_col=time_col,
|
|
499
|
-
verbose=verbose
|
|
500
|
-
)
|
|
500
|
+
if unit_col is not None and time_col is not None:
|
|
501
501
|
|
|
502
502
|
if variables is None:
|
|
503
503
|
|
|
@@ -659,15 +659,15 @@ class DiffData:
|
|
|
659
659
|
|
|
660
660
|
new_merge = tools.panel_index(
|
|
661
661
|
data=new_merge,
|
|
662
|
-
unit_col=
|
|
663
|
-
time_col=
|
|
662
|
+
unit_col=config.UNIT_COL,
|
|
663
|
+
time_col=config.TIME_COL,
|
|
664
664
|
verbose=verbose
|
|
665
665
|
)
|
|
666
666
|
|
|
667
667
|
did_modeldata_old = tools.panel_index(
|
|
668
668
|
data=did_modeldata_old,
|
|
669
|
-
unit_col=
|
|
670
|
-
time_col=
|
|
669
|
+
unit_col=config.UNIT_COL,
|
|
670
|
+
time_col=config.TIME_COL,
|
|
671
671
|
verbose=verbose
|
|
672
672
|
)
|
|
673
673
|
|
|
@@ -1055,24 +1055,12 @@ def merge_data(
|
|
|
1055
1055
|
treatment_data_df,
|
|
1056
1056
|
how = "cross"
|
|
1057
1057
|
)
|
|
1058
|
-
|
|
1059
|
-
if drop_missing or missing_replace_by_zero:
|
|
1060
|
-
modeldata_ismissing = tools.is_missing(
|
|
1061
|
-
data = did_modeldata,
|
|
1062
|
-
drop_missing = drop_missing,
|
|
1063
|
-
missing_replace_by_zero = missing_replace_by_zero,
|
|
1064
|
-
verbose = False
|
|
1065
|
-
)
|
|
1066
|
-
did_modeldata = modeldata_ismissing[2]
|
|
1067
1058
|
|
|
1068
1059
|
did_modeldata[treatment_name] = did_modeldata[TG_col] * did_modeldata[TT_col]
|
|
1069
1060
|
|
|
1070
1061
|
if treatment_config["after_treatment_period"]:
|
|
1071
1062
|
did_modeldata[after_treatment_name] = did_modeldata[TG_col] * did_modeldata[ATT_col]
|
|
1072
1063
|
|
|
1073
|
-
if np.dtype(did_modeldata[config.TIME_COL]) != np.dtype(outcome_data[time_col]):
|
|
1074
|
-
print(f"WARNING: Time columns of treatment data and outcome data differ: {str(np.dtype(did_modeldata[config.TIME_COL]))}, {str(np.dtype(outcome_data[time_col]))}. This might induce an error while building the model dataset.")
|
|
1075
|
-
|
|
1076
1064
|
did_modeldata = tools.panel_index(
|
|
1077
1065
|
data=did_modeldata,
|
|
1078
1066
|
unit_col=config.UNIT_COL,
|
|
@@ -1086,7 +1074,7 @@ def merge_data(
|
|
|
1086
1074
|
time_col=time_col,
|
|
1087
1075
|
verbose=verbose
|
|
1088
1076
|
)
|
|
1089
|
-
|
|
1077
|
+
|
|
1090
1078
|
if keep_columns:
|
|
1091
1079
|
outcome_data_short = outcome_data
|
|
1092
1080
|
else:
|
|
@@ -1097,6 +1085,15 @@ def merge_data(
|
|
|
1097
1085
|
on=config.UNIT_TIME_COL,
|
|
1098
1086
|
how="left"
|
|
1099
1087
|
)
|
|
1088
|
+
|
|
1089
|
+
if drop_missing or missing_replace_by_zero:
|
|
1090
|
+
modeldata_ismissing = tools.is_missing(
|
|
1091
|
+
data = did_modeldata,
|
|
1092
|
+
drop_missing = drop_missing,
|
|
1093
|
+
missing_replace_by_zero = missing_replace_by_zero,
|
|
1094
|
+
verbose = False
|
|
1095
|
+
)
|
|
1096
|
+
did_modeldata = modeldata_ismissing[2]
|
|
1100
1097
|
|
|
1101
1098
|
outcome_col_original = outcome_col
|
|
1102
1099
|
unit_time_col_original = unit_id_col, time_col
|
|
@@ -1230,7 +1227,7 @@ def create_counterfactual(
|
|
|
1230
1227
|
unit_col = unit_col,
|
|
1231
1228
|
time_col = time_col,
|
|
1232
1229
|
treatment_col = treatment_col
|
|
1233
|
-
)
|
|
1230
|
+
)[0]
|
|
1234
1231
|
units = tools.unique(units_tt[unit_col])
|
|
1235
1232
|
|
|
1236
1233
|
if not isnotreatment[0]:
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 2.1.
|
|
8
|
-
# Last update: 2025-12-
|
|
7
|
+
# Version: 2.1.4
|
|
8
|
+
# Last update: 2025-12-07 10:27
|
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -54,25 +54,39 @@ def panel_index(
|
|
|
54
54
|
):
|
|
55
55
|
|
|
56
56
|
to_str = []
|
|
57
|
-
unit_x_time = True
|
|
58
57
|
|
|
59
|
-
if
|
|
60
|
-
data[unit_col]
|
|
61
|
-
|
|
58
|
+
if unit_col is not None:
|
|
59
|
+
if data[unit_col].dtype != 'object':
|
|
60
|
+
data[unit_col] = data[unit_col].astype(str)
|
|
61
|
+
to_str.append(unit_col)
|
|
62
|
+
else:
|
|
63
|
+
if verbose:
|
|
64
|
+
print("NOTE: No unit column was stated")
|
|
65
|
+
|
|
66
|
+
if time_col is not None:
|
|
67
|
+
if data[time_col].dtype != 'object':
|
|
68
|
+
data[time_col] = data[time_col].astype(str)
|
|
69
|
+
to_str.append(time_col)
|
|
70
|
+
else:
|
|
71
|
+
if verbose:
|
|
72
|
+
print("NOTE: No time column was stated")
|
|
62
73
|
|
|
63
|
-
if
|
|
64
|
-
|
|
65
|
-
to_str.append(time_col)
|
|
74
|
+
if verbose and len(to_str) > 0:
|
|
75
|
+
print(f"NOTE: The following columns were converted to str: {', '.join(to_str)}.")
|
|
66
76
|
|
|
67
77
|
if config.UNIT_TIME_COL not in data.columns:
|
|
68
|
-
|
|
69
|
-
|
|
78
|
+
|
|
79
|
+
if unit_col is not None and time_col is not None:
|
|
70
80
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
81
|
+
data[config.UNIT_TIME_COL] = data[unit_col]+config.DELIMITER+data[time_col]
|
|
82
|
+
|
|
83
|
+
if verbose:
|
|
84
|
+
print(f"NOTE: The following unit-time-index column was created: {config.UNIT_TIME_COL}.")
|
|
85
|
+
|
|
86
|
+
else:
|
|
87
|
+
|
|
88
|
+
if verbose:
|
|
89
|
+
print("No unit-time-index column was created.")
|
|
76
90
|
|
|
77
91
|
return data
|
|
78
92
|
|
|
@@ -170,8 +184,8 @@ def is_binary(
|
|
|
170
184
|
if verbose:
|
|
171
185
|
print("OK")
|
|
172
186
|
|
|
173
|
-
|
|
174
|
-
|
|
187
|
+
if not binary:
|
|
188
|
+
print(f"NOTE: treatment column '{treatment_col}' is not binary. Likely treatment format is: {treatment_format}.")
|
|
175
189
|
|
|
176
190
|
return [
|
|
177
191
|
binary,
|
|
@@ -268,8 +282,8 @@ def is_simultaneous(
|
|
|
268
282
|
if verbose:
|
|
269
283
|
print("OK")
|
|
270
284
|
|
|
271
|
-
|
|
272
|
-
|
|
285
|
+
if not simultaneous and data_isnotreatment[0]:
|
|
286
|
+
print(f"NOTE: treatment '{treatment_col}' is not simultaneous.")
|
|
273
287
|
|
|
274
288
|
if simultaneous and not data_isnotreatment[0]:
|
|
275
289
|
print(f"WARNING: treatment '{treatment_col}' is simultaneous and does not include a {config.NO_TREATMENT_CG_DESCRIPTION}")
|
|
@@ -303,8 +317,8 @@ def is_notreatment(
|
|
|
303
317
|
if verbose:
|
|
304
318
|
print("OK")
|
|
305
319
|
|
|
306
|
-
|
|
307
|
-
|
|
320
|
+
if not no_treatment:
|
|
321
|
+
print(f"NOTE: treatment '{treatment_col}' does not include a {config.NO_TREATMENT_CG_DESCRIPTION}.")
|
|
308
322
|
|
|
309
323
|
return [
|
|
310
324
|
no_treatment,
|
|
@@ -342,8 +356,8 @@ def treatment_group_col(
|
|
|
342
356
|
if verbose:
|
|
343
357
|
print("OK")
|
|
344
358
|
|
|
345
|
-
|
|
346
|
-
|
|
359
|
+
if create_TG_col_exists:
|
|
360
|
+
print(f"NOTE: Column {create_TG_col} already exists. Saved treatment group in column {config.TG_COL}{config.DELIMITER}{treatment_col}.")
|
|
347
361
|
|
|
348
362
|
return [
|
|
349
363
|
data,
|
|
@@ -351,6 +365,32 @@ def treatment_group_col(
|
|
|
351
365
|
create_TG_col
|
|
352
366
|
]
|
|
353
367
|
|
|
368
|
+
def treatment_time_col(
|
|
369
|
+
data: pd.DataFrame,
|
|
370
|
+
unit_col: str,
|
|
371
|
+
time_col: str,
|
|
372
|
+
treatment_col: str,
|
|
373
|
+
create_TT_col: str = "TT",
|
|
374
|
+
verbose: bool = config.VERBOSE
|
|
375
|
+
):
|
|
376
|
+
|
|
377
|
+
tt = treatment_times(
|
|
378
|
+
data = data,
|
|
379
|
+
unit_col = unit_col,
|
|
380
|
+
time_col = time_col,
|
|
381
|
+
treatment_col = treatment_col,
|
|
382
|
+
verbose = verbose
|
|
383
|
+
)[1]
|
|
384
|
+
|
|
385
|
+
data[create_TT_col] = 0
|
|
386
|
+
data.loc[data[time_col].isin(tt), create_TT_col] = 1
|
|
387
|
+
|
|
388
|
+
return [
|
|
389
|
+
data,
|
|
390
|
+
tt,
|
|
391
|
+
create_TT_col
|
|
392
|
+
]
|
|
393
|
+
|
|
354
394
|
def untreated_units(
|
|
355
395
|
data: pd.DataFrame,
|
|
356
396
|
unit_col: str,
|
|
@@ -460,10 +500,11 @@ def is_prepost(
|
|
|
460
500
|
if verbose:
|
|
461
501
|
print("OK")
|
|
462
502
|
|
|
463
|
-
if
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
503
|
+
if verbose:
|
|
504
|
+
if prepost:
|
|
505
|
+
print("NOTE: Panel data is pre-post.")
|
|
506
|
+
else:
|
|
507
|
+
print("NOTE: Panel data is multi-period panel data.")
|
|
467
508
|
|
|
468
509
|
return prepost
|
|
469
510
|
|
|
@@ -502,8 +543,8 @@ def is_multiple_treatment_period(
|
|
|
502
543
|
if verbose:
|
|
503
544
|
print("OK")
|
|
504
545
|
|
|
505
|
-
|
|
506
|
-
|
|
546
|
+
if units_multiple > 0:
|
|
547
|
+
print(f"NOTE: There are {units_multiple} observational units with multiple treatment periods with respect to treatment '{treatment_col}'.")
|
|
507
548
|
|
|
508
549
|
return [
|
|
509
550
|
multiple_treatment_period,
|
|
@@ -591,12 +632,22 @@ def treatment_times(
|
|
|
591
632
|
unit_col,
|
|
592
633
|
time_col,
|
|
593
634
|
treatment_col
|
|
594
|
-
]
|
|
635
|
+
],
|
|
636
|
+
verbose=verbose
|
|
595
637
|
)
|
|
596
638
|
|
|
639
|
+
is_multiple_treatment_period(
|
|
640
|
+
data = data,
|
|
641
|
+
unit_col = unit_col,
|
|
642
|
+
treatment_col = treatment_col,
|
|
643
|
+
verbose = verbose
|
|
644
|
+
)[0]
|
|
645
|
+
|
|
597
646
|
if verbose:
|
|
598
647
|
print(f"Identifying treatment times for treatment '{treatment_col}'", end = " ... ")
|
|
599
648
|
|
|
649
|
+
tt = list(unique(data.loc[data[treatment_col] == 1, time_col]))
|
|
650
|
+
|
|
600
651
|
units = unique(data[unit_col])
|
|
601
652
|
|
|
602
653
|
units_tt = pd.DataFrame(columns = [unit_col, "treatment_min", "treatment_max"])
|
|
@@ -628,7 +679,10 @@ def treatment_times(
|
|
|
628
679
|
if verbose:
|
|
629
680
|
print("OK")
|
|
630
681
|
|
|
631
|
-
return
|
|
682
|
+
return [
|
|
683
|
+
units_tt,
|
|
684
|
+
tt
|
|
685
|
+
]
|
|
632
686
|
|
|
633
687
|
def model_wrapper(
|
|
634
688
|
y,
|
|
@@ -833,8 +887,6 @@ def fit_metrics(
|
|
|
833
887
|
RSQ_ADJ = (1-(1-RSQ)*((observations-1)/(observations-indep_vars_no-1)))
|
|
834
888
|
|
|
835
889
|
else:
|
|
836
|
-
|
|
837
|
-
print("NOTE: As no number of independent vars was stated, no Adj. R-Squared is calculated.")
|
|
838
890
|
|
|
839
891
|
RSQ_ADJ = np.nan
|
|
840
892
|
|
|
@@ -854,8 +906,13 @@ def fit_metrics(
|
|
|
854
906
|
if verbose:
|
|
855
907
|
print("OK")
|
|
856
908
|
|
|
857
|
-
if
|
|
858
|
-
|
|
909
|
+
if verbose:
|
|
910
|
+
|
|
911
|
+
if RSQ_ADJ == np.nan:
|
|
912
|
+
print("NOTE: As no number of independent vars was stated, no Adj. R-Squared is calculated.")
|
|
913
|
+
|
|
914
|
+
if len(obs_exp_clean) < len(observed) or len(obs_exp_clean) < len(expected):
|
|
915
|
+
print("NOTE: Vectors 'observed' and/or 'expected' contain NaNs which were dropped.")
|
|
859
916
|
|
|
860
917
|
modelfit_results = [
|
|
861
918
|
model_residuals,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.4
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -16,10 +16,11 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
|
|
|
16
16
|
Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
## Updates v2.2.
|
|
19
|
+
## Updates v2.2.4
|
|
20
20
|
- Bugfixes:
|
|
21
|
-
- Spillover treatment works now
|
|
22
|
-
-
|
|
21
|
+
- Spillover treatment really really works now (only relevant in rare cases)
|
|
22
|
+
- Fixed merging in diddata.DiffData.add_covariates() (only relevant in rare cases)
|
|
23
|
+
- Dropping missing values consequently (only relevant in rare cases)
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
## Features
|
|
@@ -8,7 +8,7 @@ def read_README():
|
|
|
8
8
|
|
|
9
9
|
setup(
|
|
10
10
|
name='diffindiff',
|
|
11
|
-
version='2.2.
|
|
11
|
+
version='2.2.4',
|
|
12
12
|
description='diffindiff: Python library for convenient Difference-in-Differences Analyses',
|
|
13
13
|
packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
|
|
14
14
|
include_package_data=True,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|