diffindiff 2.0.1__tar.gz → 2.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffindiff-2.0.1 → diffindiff-2.0.3}/PKG-INFO +6 -6
- {diffindiff-2.0.1 → diffindiff-2.0.3}/README.md +4 -4
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/didanalysis.py +107 -69
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/diddata.py +14 -9
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/didtools.py +85 -41
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/tests/tests_diffindiff.py +9 -7
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff.egg-info/PKG-INFO +6 -6
- {diffindiff-2.0.1 → diffindiff-2.0.3}/setup.py +1 -1
- {diffindiff-2.0.1 → diffindiff-2.0.3}/MANIFEST.in +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/__init__.py +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/tests/__init__.py +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/tests/data/Corona_Hesse.xlsx +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/tests/data/counties_DE.csv +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff/tests/data/curfew_DE.csv +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff.egg-info/SOURCES.txt +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff.egg-info/dependency_links.txt +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff.egg-info/requires.txt +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/diffindiff.egg-info/top_level.txt +0 -0
- {diffindiff-2.0.1 → diffindiff-2.0.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -38,17 +38,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
38
38
|
- Create predictive counterfactuals
|
|
39
39
|
- **DiD analysis**:
|
|
40
40
|
- Perfom standard DiD analysis
|
|
41
|
-
- Model
|
|
41
|
+
- Model extensions:
|
|
42
42
|
- Staggered adoption
|
|
43
43
|
- Multiple treatments
|
|
44
44
|
- Two-way fixed effects models
|
|
45
45
|
- Group- or individual-specific treatment effects
|
|
46
46
|
- Group- or individual-specific time trends
|
|
47
47
|
- Including covariates
|
|
48
|
-
-
|
|
48
|
+
- Including fter-treatment period
|
|
49
49
|
- Triple Difference (DDD)
|
|
50
50
|
- Own counterfactuals
|
|
51
|
-
- Bonferroni correction
|
|
51
|
+
- Bonferroni correction for treatment effects
|
|
52
|
+
- Placebo test
|
|
52
53
|
- **Visualization**:
|
|
53
54
|
- Plot observed and expected time course of treatment and control group
|
|
54
55
|
- Plot expected time course of treatment group and counterfactual
|
|
@@ -60,7 +61,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
60
61
|
- Test for type of adoption
|
|
61
62
|
- Test whether the panel dataset is balanced
|
|
62
63
|
- Test for parallel trend assumption
|
|
63
|
-
- Placebo test
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
## Literature
|
|
@@ -16,17 +16,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
16
16
|
- Create predictive counterfactuals
|
|
17
17
|
- **DiD analysis**:
|
|
18
18
|
- Perfom standard DiD analysis
|
|
19
|
-
- Model
|
|
19
|
+
- Model extensions:
|
|
20
20
|
- Staggered adoption
|
|
21
21
|
- Multiple treatments
|
|
22
22
|
- Two-way fixed effects models
|
|
23
23
|
- Group- or individual-specific treatment effects
|
|
24
24
|
- Group- or individual-specific time trends
|
|
25
25
|
- Including covariates
|
|
26
|
-
-
|
|
26
|
+
- Including fter-treatment period
|
|
27
27
|
- Triple Difference (DDD)
|
|
28
28
|
- Own counterfactuals
|
|
29
|
-
- Bonferroni correction
|
|
29
|
+
- Bonferroni correction for treatment effects
|
|
30
|
+
- Placebo test
|
|
30
31
|
- **Visualization**:
|
|
31
32
|
- Plot observed and expected time course of treatment and control group
|
|
32
33
|
- Plot expected time course of treatment group and counterfactual
|
|
@@ -38,7 +39,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
38
39
|
- Test for type of adoption
|
|
39
40
|
- Test whether the panel dataset is balanced
|
|
40
41
|
- Test for parallel trend assumption
|
|
41
|
-
- Placebo test
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
## Literature
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
# Name: didanalysis (diffindiff)
|
|
1
|
+
#-----------------------------------------------------------------------
|
|
2
|
+
# Name: didanalysis (diffindiff package)
|
|
3
3
|
# Purpose: Analysis functions for difference-in-differences analyses
|
|
4
|
-
# Author: Thomas Wieland
|
|
5
|
-
#
|
|
6
|
-
#
|
|
4
|
+
# Author: Thomas Wieland
|
|
5
|
+
# ORCID: 0000-0001-5168-9846
|
|
6
|
+
# mail: geowieland@googlemail.com
|
|
7
|
+
# Version: 2.0.3
|
|
8
|
+
# Last update: 2025-04-18 10:24
|
|
7
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
8
|
-
|
|
10
|
+
#-----------------------------------------------------------------------
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
import pandas as pd
|
|
@@ -25,7 +27,8 @@ class DiffModel:
|
|
|
25
27
|
did_modeldata,
|
|
26
28
|
did_modelpredictions,
|
|
27
29
|
did_model_statistics,
|
|
28
|
-
did_olsmodel
|
|
30
|
+
did_olsmodel,
|
|
31
|
+
did_prediction_intervals
|
|
29
32
|
):
|
|
30
33
|
|
|
31
34
|
self.data = [
|
|
@@ -34,7 +37,8 @@ class DiffModel:
|
|
|
34
37
|
did_modeldata,
|
|
35
38
|
did_modelpredictions,
|
|
36
39
|
did_model_statistics,
|
|
37
|
-
did_olsmodel
|
|
40
|
+
did_olsmodel,
|
|
41
|
+
did_prediction_intervals
|
|
38
42
|
]
|
|
39
43
|
|
|
40
44
|
def treatment_statistics(
|
|
@@ -82,7 +86,7 @@ class DiffModel:
|
|
|
82
86
|
after_treatment_period_start = None
|
|
83
87
|
after_treatment_period_end = None
|
|
84
88
|
after_treatment_period_N = None
|
|
85
|
-
if len(model_config["after_treatment_col"]) > 0:
|
|
89
|
+
if len(model_config["after_treatment_col"]) > 0 and after_treatment_col is not None:
|
|
86
90
|
after_treatment_period_start = treatment_period_end+pd.Timedelta(days=1)
|
|
87
91
|
after_treatment_period_start = pd.to_datetime(after_treatment_period_start)
|
|
88
92
|
after_treatment_period_end = pd.to_datetime(study_period_end)
|
|
@@ -364,7 +368,7 @@ class DiffModel:
|
|
|
364
368
|
|
|
365
369
|
for key, value in covariates_effects.items():
|
|
366
370
|
covariates_effects_rows.append({
|
|
367
|
-
"
|
|
371
|
+
"": value["Coefficient"],
|
|
368
372
|
"Estimate": value["Estimate"],
|
|
369
373
|
"SE": value["SE"],
|
|
370
374
|
"t": value["t"],
|
|
@@ -523,13 +527,15 @@ class DiffModel:
|
|
|
523
527
|
covariates_effects_df["CI lower"] = covariates_effects_df["CI lower"].map(lambda x: f"{x:,.3f}")
|
|
524
528
|
covariates_effects_df["CI upper"] = covariates_effects_df["CI upper"].map(lambda x: f"{x:,.3f}")
|
|
525
529
|
covariates_effects_df.iloc[:, 0] = covariates_effects_df.iloc[:, 0].apply(lambda x: f"{x:<{max_width_column1}}")
|
|
530
|
+
print("Covariates")
|
|
526
531
|
print(covariates_effects_df.to_string(index=False))
|
|
527
|
-
if not show_covariates:
|
|
532
|
+
if not show_covariates or no_covariates == 0:
|
|
528
533
|
if no_covariates > 0:
|
|
529
534
|
print ("Covariates YES")
|
|
530
535
|
else:
|
|
531
536
|
print ("Covariates NO")
|
|
532
537
|
|
|
538
|
+
print("")
|
|
533
539
|
print("Fixed effects")
|
|
534
540
|
if model_config["FE_unit"]:
|
|
535
541
|
print (" Units YES")
|
|
@@ -566,7 +572,7 @@ class DiffModel:
|
|
|
566
572
|
print(treatment_diagnostics_df_t)
|
|
567
573
|
|
|
568
574
|
print("-" * total_width)
|
|
569
|
-
print ("Input data
|
|
575
|
+
print ("Input data diagnostixx") # TODO ?? AENDERN
|
|
570
576
|
if modeldata_isbalanced:
|
|
571
577
|
print ("Balanced panel data YES")
|
|
572
578
|
else:
|
|
@@ -756,16 +762,21 @@ class DiffModel:
|
|
|
756
762
|
ols_model = self.data[5]
|
|
757
763
|
return ols_model
|
|
758
764
|
|
|
765
|
+
def prediction_intervals(self):
|
|
766
|
+
|
|
767
|
+
prediction_intervals = self.data[6]
|
|
768
|
+
return prediction_intervals
|
|
769
|
+
|
|
759
770
|
def placebo(
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
771
|
+
self,
|
|
772
|
+
treatment: str = None,
|
|
773
|
+
after_treatment_col: str = None,
|
|
774
|
+
TG_col: str = None,
|
|
775
|
+
TT_col: str = None,
|
|
776
|
+
divide: float = 0.5,
|
|
777
|
+
resample: float = 1.0,
|
|
778
|
+
random_state = 71
|
|
779
|
+
):
|
|
769
780
|
|
|
770
781
|
model_config = self.data[1]
|
|
771
782
|
model_data = self.data[2]
|
|
@@ -796,9 +807,9 @@ class DiffModel:
|
|
|
796
807
|
TT_col_ = "TT_" + treatment
|
|
797
808
|
TGxTT_ = "Placebo_" + treatment
|
|
798
809
|
if TG_col is None and TG_col_ not in model_config["TG_col"]:
|
|
799
|
-
raise ValueError("
|
|
810
|
+
raise ValueError("Cannot find treatment group identification variable for treatment " + treatment + ". Please state TG_col = [treatment_group_dummy].")
|
|
800
811
|
if TT_col is None and TT_col_ not in model_config["TT_col"]:
|
|
801
|
-
raise ValueError("
|
|
812
|
+
raise ValueError("Cannot findt treatment time variable for treatment " + treatment + ". Please state TG_col = [treatment_time_dummy].")
|
|
802
813
|
|
|
803
814
|
unit_col = model_config["unit_col"]
|
|
804
815
|
time_col = model_config["time_col"]
|
|
@@ -1127,19 +1138,20 @@ class DiffModel:
|
|
|
1127
1138
|
return model_data_TG_CG
|
|
1128
1139
|
|
|
1129
1140
|
def plot_counterfactual(
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1141
|
+
self,
|
|
1142
|
+
treatment: str = None,
|
|
1143
|
+
after_treatment_col: str = None,
|
|
1144
|
+
x_label: str = "Time",
|
|
1145
|
+
y_label: str = "Outcome",
|
|
1146
|
+
y_lim = None,
|
|
1147
|
+
plot_title: str = "Treatment group Counterfactual",
|
|
1148
|
+
lines_col: list = ["blue", "green"],
|
|
1149
|
+
lines_style: list = ["solid", "dashed"],
|
|
1150
|
+
lines_labels: list = ["TG", "TG counterfactual"],
|
|
1151
|
+
plot_legend: bool = True,
|
|
1152
|
+
plot_grid: bool = True,
|
|
1153
|
+
plot_size: list = [12, 6]
|
|
1154
|
+
):
|
|
1143
1155
|
|
|
1144
1156
|
model_config = self.data[1]
|
|
1145
1157
|
outcome_col = model_config["outcome_col"]
|
|
@@ -1158,7 +1170,10 @@ class DiffModel:
|
|
|
1158
1170
|
else:
|
|
1159
1171
|
raise ValueError ("Model object has no column for treatment group with respect to ", str(no_treatments), " treatments. Choose one with parameter treatment.")
|
|
1160
1172
|
|
|
1161
|
-
model_data_mod = self.counterfactual(
|
|
1173
|
+
model_data_mod = self.counterfactual(
|
|
1174
|
+
treatment = treatment,
|
|
1175
|
+
after_treatment_col = after_treatment_col
|
|
1176
|
+
)
|
|
1162
1177
|
|
|
1163
1178
|
if treatment is not None:
|
|
1164
1179
|
|
|
@@ -1182,6 +1197,8 @@ class DiffModel:
|
|
|
1182
1197
|
|
|
1183
1198
|
treatment = treatment_diagnostics[0]["treatment"]
|
|
1184
1199
|
|
|
1200
|
+
treatment_group = [str(x) for x in treatment_group]
|
|
1201
|
+
|
|
1185
1202
|
TG_col = "TG_" + treatment
|
|
1186
1203
|
|
|
1187
1204
|
model_data_mod[TG_col] = 0
|
|
@@ -1344,20 +1361,41 @@ def did_analysis(
|
|
|
1344
1361
|
intercept = False
|
|
1345
1362
|
TG_col = []
|
|
1346
1363
|
print ("NOTE: Quasi-experiment includes more than one treatment. Unit fixed effects are used instead of control group baseline and treatment group deviation.")
|
|
1347
|
-
|
|
1348
|
-
if ITE:
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1364
|
+
|
|
1365
|
+
if ITE:
|
|
1366
|
+
FE_unit = True
|
|
1367
|
+
print ("NOTE: Model includes individual treatment effects. Unit fixed effects are included.")
|
|
1368
|
+
if GTE:
|
|
1369
|
+
GTE = False
|
|
1370
|
+
print ("NOTE: Both group and individual treatment effects were stated. Switching to individual treatment effects only.")
|
|
1371
|
+
if ITT:
|
|
1372
|
+
FE_unit = True
|
|
1373
|
+
TT_col = []
|
|
1374
|
+
print ("NOTE: Model includes individual time trends. Unit fixed effects are included. Treatment time variable is dropped.")
|
|
1375
|
+
if FE_time:
|
|
1376
|
+
FE_time = False
|
|
1377
|
+
print ("NOTE: Time fixed effects are dropped.")
|
|
1378
|
+
if GTT:
|
|
1379
|
+
GTT = False
|
|
1380
|
+
print ("NOTE: Both group and individual time trends were stated. Switching to individual time trends only.")
|
|
1381
|
+
|
|
1353
1382
|
if staggered_adoption:
|
|
1354
1383
|
FE_unit = True
|
|
1355
1384
|
FE_time = True
|
|
1356
1385
|
print ("NOTE: Quasi-experiment includes one or more staggered treatments. Two-way fixed effects model is used.")
|
|
1357
1386
|
|
|
1358
|
-
|
|
1387
|
+
FE_group = False
|
|
1388
|
+
if group_by is not None and group_by != "":
|
|
1389
|
+
FE_group = True
|
|
1390
|
+
|
|
1391
|
+
if FE_unit:
|
|
1359
1392
|
TG_col = []
|
|
1393
|
+
if FE_time:
|
|
1360
1394
|
TT_col = []
|
|
1395
|
+
if FE_group:
|
|
1396
|
+
TG_col = []
|
|
1397
|
+
intercept = False
|
|
1398
|
+
print ("NOTE: Quasi-experiment includes group fixed effects. Control group baseline and treatment group deviation are dropped.")
|
|
1361
1399
|
|
|
1362
1400
|
if after_treatment_col is not None or (isinstance (after_treatment_col, list) and len(after_treatment_col) > 0):
|
|
1363
1401
|
if isinstance (after_treatment_col, str):
|
|
@@ -1458,20 +1496,14 @@ def did_analysis(
|
|
|
1458
1496
|
outcome_col = "log_"+f'{outcome_col}'
|
|
1459
1497
|
|
|
1460
1498
|
did_formula = f'{outcome_col} ~ {" + ".join(treatment_col)}'
|
|
1461
|
-
|
|
1462
|
-
if TG_col is not None
|
|
1499
|
+
|
|
1500
|
+
if TG_col is not None and len(TG_col) > 0:
|
|
1463
1501
|
did_formula = did_formula + f' + {" + ".join(TG_col)}'
|
|
1464
|
-
if TT_col is not None
|
|
1465
|
-
did_formula = did_formula + f' + {" + ".join(TT_col)}'
|
|
1466
|
-
|
|
1467
|
-
if ITT:
|
|
1468
|
-
FE_unit = True
|
|
1469
|
-
FE_time = False
|
|
1470
|
-
if ITE:
|
|
1471
|
-
FE_unit = True
|
|
1502
|
+
if TT_col is not None and len(TT_col) > 0:
|
|
1503
|
+
did_formula = did_formula + f' + {" + ".join(TT_col)}'
|
|
1472
1504
|
|
|
1473
1505
|
if len(after_treatment_col) > 0:
|
|
1474
|
-
did_formula = did_formula + f'+ {" + ".join(after_treatment_col)}'
|
|
1506
|
+
did_formula = did_formula + f' + {" + ".join(after_treatment_col)}'
|
|
1475
1507
|
|
|
1476
1508
|
if FE_unit:
|
|
1477
1509
|
unit_col_todummies = diffindiff.didtools.to_dummies(
|
|
@@ -1481,7 +1513,7 @@ def did_analysis(
|
|
|
1481
1513
|
drop_first = intercept
|
|
1482
1514
|
)
|
|
1483
1515
|
data = unit_col_todummies[0]
|
|
1484
|
-
did_formula = did_formula + f'+ {unit_col_todummies[1]}'
|
|
1516
|
+
did_formula = did_formula + f' + {unit_col_todummies[1]}'
|
|
1485
1517
|
dummy_unit_vars = list(unit_col_todummies[2]["UNIT_"+unit_col].values)
|
|
1486
1518
|
dummy_unit_original = list(unit_col_todummies[2][unit_col].values)
|
|
1487
1519
|
|
|
@@ -1493,7 +1525,7 @@ def did_analysis(
|
|
|
1493
1525
|
drop_first = intercept
|
|
1494
1526
|
)
|
|
1495
1527
|
data = time_col_todummies[0]
|
|
1496
|
-
did_formula = did_formula + f'+ {time_col_todummies[1]}'
|
|
1528
|
+
did_formula = did_formula + f' + {time_col_todummies[1]}'
|
|
1497
1529
|
dummy_time_vars = list(time_col_todummies[2]["TIME_"+time_col].values)
|
|
1498
1530
|
dummy_time_original = list(time_col_todummies[2][time_col].values)
|
|
1499
1531
|
|
|
@@ -1526,8 +1558,8 @@ def did_analysis(
|
|
|
1526
1558
|
new_col_name = f"{col}_x_time"
|
|
1527
1559
|
group_x_time = group_x_time.rename(columns={col: new_col_name})
|
|
1528
1560
|
data = pd.concat([data, group_x_time], axis = 1)
|
|
1529
|
-
GTT_columns_groupxtime = '+'.join(group_x_time.columns)
|
|
1530
|
-
did_formula = did_formula + f'+{GTE_columns_group}+{GTT_columns_groupxtime}'
|
|
1561
|
+
GTT_columns_groupxtime = ' + '.join(group_x_time.columns)
|
|
1562
|
+
did_formula = did_formula + f' + {GTE_columns_group} + {GTT_columns_groupxtime}'
|
|
1531
1563
|
|
|
1532
1564
|
if ITT:
|
|
1533
1565
|
if "date_counter" not in data.columns:
|
|
@@ -1542,7 +1574,7 @@ def did_analysis(
|
|
|
1542
1574
|
new_col_name = f"{col}_x_time"
|
|
1543
1575
|
unit_x_time = unit_x_time.rename(columns={col: new_col_name})
|
|
1544
1576
|
data = pd.concat([data, unit_x_time], axis = 1)
|
|
1545
|
-
ITT_columns_unitxtime = '+'.join(unit_x_time.columns)
|
|
1577
|
+
ITT_columns_unitxtime = ' + '.join(unit_x_time.columns)
|
|
1546
1578
|
did_formula = did_formula + f' + {ITT_columns_unitxtime}'
|
|
1547
1579
|
|
|
1548
1580
|
if GTE:
|
|
@@ -1556,8 +1588,8 @@ def did_analysis(
|
|
|
1556
1588
|
new_col_name = f"{treatment}_{col}_x_time"
|
|
1557
1589
|
group_x_treatment = group_x_treatment.rename(columns={col: new_col_name})
|
|
1558
1590
|
data = pd.concat([data, group_x_treatment], axis = 1)
|
|
1559
|
-
GTE_columns_groupxtreatment = '+'.join(group_x_treatment.columns)
|
|
1560
|
-
did_formula = did_formula + f'+{GTE_columns_group}+{GTE_columns_groupxtreatment}'
|
|
1591
|
+
GTE_columns_groupxtreatment = ' + '.join(group_x_treatment.columns)
|
|
1592
|
+
did_formula = did_formula + f' + {GTE_columns_group} + {GTE_columns_groupxtreatment}'
|
|
1561
1593
|
|
|
1562
1594
|
if ITE:
|
|
1563
1595
|
unit_x_treatment = pd.DataFrame()
|
|
@@ -1574,7 +1606,7 @@ def did_analysis(
|
|
|
1574
1606
|
if group_by in covariates:
|
|
1575
1607
|
covariates.remove(group_by)
|
|
1576
1608
|
covariates_join = ' + '.join(covariates)
|
|
1577
|
-
did_formula = did_formula + f'+{covariates_join}'
|
|
1609
|
+
did_formula = did_formula + f' +{covariates_join}'
|
|
1578
1610
|
|
|
1579
1611
|
if len(group_benefit) > 0:
|
|
1580
1612
|
group_benefit = diffindiff.didtools.unique(group_benefit)
|
|
@@ -1597,10 +1629,11 @@ def did_analysis(
|
|
|
1597
1629
|
group_benefit = []
|
|
1598
1630
|
DDD = False
|
|
1599
1631
|
|
|
1600
|
-
|
|
1601
|
-
|
|
1632
|
+
did_formula = did_formula[:-1] if did_formula.endswith(" ") else did_formula
|
|
1633
|
+
did_formula = did_formula[:-1] if did_formula.endswith("+") else did_formula
|
|
1634
|
+
did_formula = did_formula[:-1] if did_formula.endswith(" ") else did_formula
|
|
1602
1635
|
if not intercept:
|
|
1603
|
-
did_formula = did_formula + f' -1'
|
|
1636
|
+
did_formula = did_formula + f' -1'
|
|
1604
1637
|
|
|
1605
1638
|
analysis_description = "Difference in Differences (DiD) Analysis"
|
|
1606
1639
|
if DDD:
|
|
@@ -1622,6 +1655,7 @@ def did_analysis(
|
|
|
1622
1655
|
"pre_post": pre_post,
|
|
1623
1656
|
"FE_unit": FE_unit,
|
|
1624
1657
|
"FE_time": FE_time,
|
|
1658
|
+
"FE_group": FE_group,
|
|
1625
1659
|
"intercept": intercept,
|
|
1626
1660
|
"ITT": ITT,
|
|
1627
1661
|
"GTT": GTT,
|
|
@@ -1831,7 +1865,7 @@ def did_analysis(
|
|
|
1831
1865
|
FE_group_coef = {}
|
|
1832
1866
|
for i, group_dummy in enumerate(FE_group_vars):
|
|
1833
1867
|
FE_group_coef[i] = {
|
|
1834
|
-
"Coefficient":
|
|
1868
|
+
"Coefficient": dummy_group_original[i],
|
|
1835
1869
|
"Estimate": ols_coefficients[group_dummy],
|
|
1836
1870
|
"SE": float(coef_standarderrors[group_dummy]),
|
|
1837
1871
|
"t": float(coef_t[group_dummy]),
|
|
@@ -1955,7 +1989,10 @@ def did_analysis(
|
|
|
1955
1989
|
model_results["covariates_effects"] = covariates_effects
|
|
1956
1990
|
|
|
1957
1991
|
model_predictions = ols_model.predict()
|
|
1958
|
-
|
|
1992
|
+
|
|
1993
|
+
prediction_intervals = ols_model.get_prediction()
|
|
1994
|
+
prediction_intervals = prediction_intervals.summary_frame(alpha = confint_alpha)
|
|
1995
|
+
|
|
1959
1996
|
model_statistics = {
|
|
1960
1997
|
"rsquared": ols_model.rsquared,
|
|
1961
1998
|
"rsquared_adj": ols_model.rsquared_adj,
|
|
@@ -1968,7 +2005,8 @@ def did_analysis(
|
|
|
1968
2005
|
data,
|
|
1969
2006
|
model_predictions,
|
|
1970
2007
|
model_statistics,
|
|
1971
|
-
ols_model
|
|
2008
|
+
ols_model,
|
|
2009
|
+
prediction_intervals
|
|
1972
2010
|
)
|
|
1973
2011
|
|
|
1974
2012
|
return did_model_output
|
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
# Name: diddata (diffindiff)
|
|
1
|
+
#-----------------------------------------------------------------------
|
|
2
|
+
# Name: diddata (diffindiff package)
|
|
3
3
|
# Purpose: Creating data for Difference-in-Differences Analysis
|
|
4
|
-
# Author: Thomas Wieland
|
|
5
|
-
#
|
|
6
|
-
#
|
|
4
|
+
# Author: Thomas Wieland
|
|
5
|
+
# ORCID: 0000-0001-5168-9846
|
|
6
|
+
# mail: geowieland@googlemail.com
|
|
7
|
+
# Version: 2.0.3
|
|
8
|
+
# Last update: 2025-04-18 10:24
|
|
7
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
8
|
-
|
|
10
|
+
#-----------------------------------------------------------------------
|
|
11
|
+
|
|
9
12
|
|
|
10
13
|
import pandas as pd
|
|
11
14
|
import numpy as np
|
|
@@ -950,7 +953,7 @@ def create_counterfactual(
|
|
|
950
953
|
)
|
|
951
954
|
control_group = isnotreatment[2]
|
|
952
955
|
|
|
953
|
-
units_tt = didtools.treatment_times(
|
|
956
|
+
units_tt = diffindiff.didtools.treatment_times(
|
|
954
957
|
data = data,
|
|
955
958
|
unit_col = unit_col,
|
|
956
959
|
time_col = time_col,
|
|
@@ -959,7 +962,7 @@ def create_counterfactual(
|
|
|
959
962
|
units = diffindiff.didtools.unique(units_tt[unit_col])
|
|
960
963
|
|
|
961
964
|
if not isnotreatment[0]:
|
|
962
|
-
print ("No no-treatment control group. Counterfactual will not cover full treatment time.")
|
|
965
|
+
print ("NOTE: No no-treatment control group. Counterfactual will not cover full treatment time.")
|
|
963
966
|
|
|
964
967
|
data_TG = pd.DataFrame(columns = data.columns)
|
|
965
968
|
for unit in units:
|
|
@@ -980,7 +983,9 @@ def create_counterfactual(
|
|
|
980
983
|
[data_TG, data_CG],
|
|
981
984
|
ignore_index=True
|
|
982
985
|
)
|
|
983
|
-
|
|
986
|
+
|
|
987
|
+
data_cf[X] = data_cf[X].apply(pd.to_numeric, errors='coerce')
|
|
988
|
+
|
|
984
989
|
counterfactual_pred = diffindiff.didtools.model_wrapper(
|
|
985
990
|
y = data_cf[y],
|
|
986
991
|
X = data_cf[X],
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
# Name: didtools (diffindiff)
|
|
3
|
-
# Purpose:
|
|
4
|
-
# Author: Thomas Wieland
|
|
5
|
-
#
|
|
6
|
-
#
|
|
1
|
+
#-----------------------------------------------------------------------
|
|
2
|
+
# Name: didtools (diffindiff package)
|
|
3
|
+
# Purpose: Additional tools for Difference-in-Differences Analysis
|
|
4
|
+
# Author: Thomas Wieland
|
|
5
|
+
# ORCID: 0000-0001-5168-9846
|
|
6
|
+
# mail: geowieland@googlemail.com
|
|
7
|
+
# Version: 2.0.3
|
|
8
|
+
# Last update: 2025-04-18 12:08
|
|
7
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
8
|
-
|
|
10
|
+
#-----------------------------------------------------------------------
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
import pandas as pd
|
|
@@ -34,11 +36,11 @@ def check_columns(
|
|
|
34
36
|
raise ValueError(f"Data do not contain column(s): {', '.join(missing_columns)}")
|
|
35
37
|
|
|
36
38
|
def is_balanced(
|
|
37
|
-
data,
|
|
38
|
-
unit_col,
|
|
39
|
-
time_col,
|
|
40
|
-
outcome_col,
|
|
41
|
-
other_cols = None
|
|
39
|
+
data: pd.DataFrame,
|
|
40
|
+
unit_col: str,
|
|
41
|
+
time_col: str,
|
|
42
|
+
outcome_col: str,
|
|
43
|
+
other_cols: list = None
|
|
42
44
|
):
|
|
43
45
|
|
|
44
46
|
unit_freq = data[unit_col].nunique()
|
|
@@ -58,8 +60,8 @@ def is_balanced(
|
|
|
58
60
|
return True
|
|
59
61
|
|
|
60
62
|
def is_binary(
|
|
61
|
-
data,
|
|
62
|
-
treatment_col
|
|
63
|
+
data: pd.DataFrame,
|
|
64
|
+
treatment_col: str
|
|
63
65
|
):
|
|
64
66
|
|
|
65
67
|
unique_values = set(data[treatment_col].dropna().unique())
|
|
@@ -76,7 +78,7 @@ def is_binary(
|
|
|
76
78
|
return [False, "Unknown"]
|
|
77
79
|
|
|
78
80
|
def is_missing(
|
|
79
|
-
data,
|
|
81
|
+
data: pd.DataFrame,
|
|
80
82
|
drop_missing: bool = True,
|
|
81
83
|
missing_replace_by_zero: bool = False
|
|
82
84
|
):
|
|
@@ -104,10 +106,10 @@ def is_missing(
|
|
|
104
106
|
]
|
|
105
107
|
|
|
106
108
|
def is_simultaneous(
|
|
107
|
-
data,
|
|
108
|
-
unit_col,
|
|
109
|
-
time_col,
|
|
110
|
-
treatment_col,
|
|
109
|
+
data: pd.DataFrame,
|
|
110
|
+
unit_col: str,
|
|
111
|
+
time_col: str,
|
|
112
|
+
treatment_col: str,
|
|
111
113
|
pre_post = False
|
|
112
114
|
):
|
|
113
115
|
|
|
@@ -125,9 +127,9 @@ def is_simultaneous(
|
|
|
125
127
|
return col_identical
|
|
126
128
|
|
|
127
129
|
def is_notreatment(
|
|
128
|
-
data,
|
|
129
|
-
unit_col,
|
|
130
|
-
treatment_col
|
|
130
|
+
data: pd.DataFrame,
|
|
131
|
+
unit_col: str,
|
|
132
|
+
treatment_col: str
|
|
131
133
|
):
|
|
132
134
|
|
|
133
135
|
data_relevant = data[[unit_col, treatment_col]]
|
|
@@ -150,12 +152,52 @@ def is_notreatment(
|
|
|
150
152
|
control_group
|
|
151
153
|
]
|
|
152
154
|
|
|
155
|
+
def treatment_group_col(
|
|
156
|
+
data: pd.DataFrame,
|
|
157
|
+
unit_col: str,
|
|
158
|
+
treatment_col: str,
|
|
159
|
+
create_TG_col: str = "TG"
|
|
160
|
+
):
|
|
161
|
+
|
|
162
|
+
isnotreatment = is_notreatment(
|
|
163
|
+
data = data,
|
|
164
|
+
unit_col = unit_col,
|
|
165
|
+
treatment_col = treatment_col
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if not isnotreatment[0]:
|
|
169
|
+
print ("Model data does not contain a no-treatment control group. Treatment group column is constant = 1.")
|
|
170
|
+
|
|
171
|
+
if create_TG_col in data.columns:
|
|
172
|
+
create_TG_col = "TG_"+treatment_col
|
|
173
|
+
print ("Column " + create_TG_col + " already exists. Saving treatment group in column TG_" + treatment_col)
|
|
174
|
+
|
|
175
|
+
treatment_group = isnotreatment[1]
|
|
176
|
+
|
|
177
|
+
data[create_TG_col] = 0
|
|
178
|
+
data.loc[data[unit_col].astype(str).isin(treatment_group), create_TG_col] = 1
|
|
179
|
+
|
|
180
|
+
return [
|
|
181
|
+
data,
|
|
182
|
+
isnotreatment[0],
|
|
183
|
+
create_TG_col
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
def untreated_units(
|
|
187
|
+
data: pd.DataFrame,
|
|
188
|
+
unit_col: str,
|
|
189
|
+
time_col: str,
|
|
190
|
+
treatment_col: str
|
|
191
|
+
):
|
|
192
|
+
# TODO ??
|
|
193
|
+
pass
|
|
194
|
+
|
|
153
195
|
def is_parallel(
|
|
154
|
-
data,
|
|
155
|
-
unit_col,
|
|
156
|
-
time_col,
|
|
157
|
-
treatment_col,
|
|
158
|
-
outcome_col,
|
|
196
|
+
data: pd.DataFrame,
|
|
197
|
+
unit_col: str,
|
|
198
|
+
time_col: str,
|
|
199
|
+
treatment_col: str,
|
|
200
|
+
outcome_col: str,
|
|
159
201
|
pre_post = False,
|
|
160
202
|
alpha = 0.05
|
|
161
203
|
):
|
|
@@ -206,10 +248,10 @@ def is_parallel(
|
|
|
206
248
|
]
|
|
207
249
|
|
|
208
250
|
def date_counter(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
251
|
+
df: pd.DataFrame,
|
|
252
|
+
date_col: str,
|
|
253
|
+
new_col: str = "date_counter"
|
|
254
|
+
):
|
|
213
255
|
|
|
214
256
|
dates = df[date_col].unique()
|
|
215
257
|
|
|
@@ -226,6 +268,7 @@ def date_counter(
|
|
|
226
268
|
return df
|
|
227
269
|
|
|
228
270
|
def unique(data):
|
|
271
|
+
|
|
229
272
|
if data is None or (isinstance(data, (list, np.ndarray, pd.Series, pd.DataFrame)) and len(data) == 0):
|
|
230
273
|
return []
|
|
231
274
|
|
|
@@ -269,8 +312,9 @@ def model_wrapper(
|
|
|
269
312
|
lgbm_learning_rate = 0.1,
|
|
270
313
|
random_state = 71
|
|
271
314
|
):
|
|
272
|
-
|
|
273
|
-
|
|
315
|
+
|
|
316
|
+
if model_type not in ["ols", "olsbg", "dtbg", "rf", "gb", "knn", "svr", "xgb", "lgbm"]:
|
|
317
|
+
raise ValueError("Please enter a valid model type ('ols', 'olsbg', 'dtbg', 'rf', 'gb', 'knn', 'svr', 'xgb', 'lgbm')")
|
|
274
318
|
|
|
275
319
|
X_train, X_test, y_train, y_test = train_test_split(
|
|
276
320
|
X,
|
|
@@ -348,10 +392,10 @@ def model_wrapper(
|
|
|
348
392
|
]
|
|
349
393
|
|
|
350
394
|
def treatment_times(
|
|
351
|
-
data,
|
|
352
|
-
unit_col,
|
|
353
|
-
time_col,
|
|
354
|
-
treatment_col
|
|
395
|
+
data: pd.DataFrame,
|
|
396
|
+
unit_col: str,
|
|
397
|
+
time_col: str,
|
|
398
|
+
treatment_col: str
|
|
355
399
|
):
|
|
356
400
|
|
|
357
401
|
check_columns(
|
|
@@ -389,10 +433,10 @@ def clean_column_name(value):
|
|
|
389
433
|
return value.strip('_')
|
|
390
434
|
|
|
391
435
|
def to_dummies(
|
|
392
|
-
data,
|
|
393
|
-
col,
|
|
394
|
-
drop_first = False,
|
|
395
|
-
prefix = "DUMMY"
|
|
436
|
+
data: pd.DataFrame,
|
|
437
|
+
col: str,
|
|
438
|
+
drop_first: bool = False,
|
|
439
|
+
prefix: str = "DUMMY"
|
|
396
440
|
):
|
|
397
441
|
|
|
398
442
|
unique_values = data[col].astype(str).unique()
|
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
# Name: tests_diffindiff
|
|
1
|
+
#-----------------------------------------------------------------------
|
|
2
|
+
# Name: tests_diffindiff (diffindiff package)
|
|
4
3
|
# Purpose: Tests and examples for the diffindiff package
|
|
5
|
-
# Author: Thomas Wieland
|
|
6
|
-
#
|
|
7
|
-
#
|
|
4
|
+
# Author: Thomas Wieland
|
|
5
|
+
# ORCID: 0000-0001-5168-9846
|
|
6
|
+
# mail: geowieland@googlemail.com
|
|
7
|
+
# Version: 2.0.3
|
|
8
|
+
# Last update: 2025-04-18 10:24
|
|
8
9
|
# Copyright (c) 2025 Thomas Wieland
|
|
9
|
-
|
|
10
|
+
#-----------------------------------------------------------------------
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
import pandas as pd
|
|
13
14
|
from diffindiff.didanalysis import DiffModel, did_analysis
|
|
14
15
|
from diffindiff.diddata import DiffGroups, create_groups, DiffTreatment, create_treatment, DiffData, merge_data, create_data
|
|
16
|
+
from diffindiff.didtools import treatment_group_col
|
|
15
17
|
|
|
16
18
|
# Example 1: Effect of a curfew in German counties in the first
|
|
17
19
|
# wave of the COVID-19 pandemic (DiD pre-post analysis)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.3
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -38,17 +38,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
38
38
|
- Create predictive counterfactuals
|
|
39
39
|
- **DiD analysis**:
|
|
40
40
|
- Perfom standard DiD analysis
|
|
41
|
-
- Model
|
|
41
|
+
- Model extensions:
|
|
42
42
|
- Staggered adoption
|
|
43
43
|
- Multiple treatments
|
|
44
44
|
- Two-way fixed effects models
|
|
45
45
|
- Group- or individual-specific treatment effects
|
|
46
46
|
- Group- or individual-specific time trends
|
|
47
47
|
- Including covariates
|
|
48
|
-
-
|
|
48
|
+
- Including fter-treatment period
|
|
49
49
|
- Triple Difference (DDD)
|
|
50
50
|
- Own counterfactuals
|
|
51
|
-
- Bonferroni correction
|
|
51
|
+
- Bonferroni correction for treatment effects
|
|
52
|
+
- Placebo test
|
|
52
53
|
- **Visualization**:
|
|
53
54
|
- Plot observed and expected time course of treatment and control group
|
|
54
55
|
- Plot expected time course of treatment group and counterfactual
|
|
@@ -60,7 +61,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
60
61
|
- Test for type of adoption
|
|
61
62
|
- Test whether the panel dataset is balanced
|
|
62
63
|
- Test for parallel trend assumption
|
|
63
|
-
- Placebo test
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
## Literature
|
|
@@ -7,7 +7,7 @@ def read_README():
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name='diffindiff',
|
|
10
|
-
version='2.0.
|
|
10
|
+
version='2.0.3',
|
|
11
11
|
description='diffindiff: Python library for convenient Difference-in-Differences Analyses',
|
|
12
12
|
packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
|
|
13
13
|
include_package_data=True,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|