diffindiff 2.0.1__tar.gz → 2.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: diffindiff
3
- Version: 2.0.1
3
+ Version: 2.0.3
4
4
  Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
@@ -38,17 +38,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
38
38
  - Create predictive counterfactuals
39
39
  - **DiD analysis**:
40
40
  - Perfom standard DiD analysis
41
- - Model Extensions:
41
+ - Model extensions:
42
42
  - Staggered adoption
43
43
  - Multiple treatments
44
44
  - Two-way fixed effects models
45
45
  - Group- or individual-specific treatment effects
46
46
  - Group- or individual-specific time trends
47
47
  - Including covariates
48
- - After-treatment period
48
+ - Including fter-treatment period
49
49
  - Triple Difference (DDD)
50
50
  - Own counterfactuals
51
- - Bonferroni correction
51
+ - Bonferroni correction for treatment effects
52
+ - Placebo test
52
53
  - **Visualization**:
53
54
  - Plot observed and expected time course of treatment and control group
54
55
  - Plot expected time course of treatment group and counterfactual
@@ -60,7 +61,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
60
61
  - Test for type of adoption
61
62
  - Test whether the panel dataset is balanced
62
63
  - Test for parallel trend assumption
63
- - Placebo test
64
64
 
65
65
 
66
66
  ## Literature
@@ -16,17 +16,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
16
16
  - Create predictive counterfactuals
17
17
  - **DiD analysis**:
18
18
  - Perfom standard DiD analysis
19
- - Model Extensions:
19
+ - Model extensions:
20
20
  - Staggered adoption
21
21
  - Multiple treatments
22
22
  - Two-way fixed effects models
23
23
  - Group- or individual-specific treatment effects
24
24
  - Group- or individual-specific time trends
25
25
  - Including covariates
26
- - After-treatment period
26
+ - Including fter-treatment period
27
27
  - Triple Difference (DDD)
28
28
  - Own counterfactuals
29
- - Bonferroni correction
29
+ - Bonferroni correction for treatment effects
30
+ - Placebo test
30
31
  - **Visualization**:
31
32
  - Plot observed and expected time course of treatment and control group
32
33
  - Plot expected time course of treatment group and counterfactual
@@ -38,7 +39,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
38
39
  - Test for type of adoption
39
40
  - Test whether the panel dataset is balanced
40
41
  - Test for parallel trend assumption
41
- - Placebo test
42
42
 
43
43
 
44
44
  ## Literature
@@ -1,11 +1,13 @@
1
- #-------------------------------------------------------------------------------
2
- # Name: didanalysis (diffindiff)
1
+ #-----------------------------------------------------------------------
2
+ # Name: didanalysis (diffindiff package)
3
3
  # Purpose: Analysis functions for difference-in-differences analyses
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.1
6
- # Last update: 2025-04-15 18:43
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.3
8
+ # Last update: 2025-04-18 10:24
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #-------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
9
11
 
10
12
 
11
13
  import pandas as pd
@@ -25,7 +27,8 @@ class DiffModel:
25
27
  did_modeldata,
26
28
  did_modelpredictions,
27
29
  did_model_statistics,
28
- did_olsmodel
30
+ did_olsmodel,
31
+ did_prediction_intervals
29
32
  ):
30
33
 
31
34
  self.data = [
@@ -34,7 +37,8 @@ class DiffModel:
34
37
  did_modeldata,
35
38
  did_modelpredictions,
36
39
  did_model_statistics,
37
- did_olsmodel
40
+ did_olsmodel,
41
+ did_prediction_intervals
38
42
  ]
39
43
 
40
44
  def treatment_statistics(
@@ -82,7 +86,7 @@ class DiffModel:
82
86
  after_treatment_period_start = None
83
87
  after_treatment_period_end = None
84
88
  after_treatment_period_N = None
85
- if len(model_config["after_treatment_col"]) > 0:
89
+ if len(model_config["after_treatment_col"]) > 0 and after_treatment_col is not None:
86
90
  after_treatment_period_start = treatment_period_end+pd.Timedelta(days=1)
87
91
  after_treatment_period_start = pd.to_datetime(after_treatment_period_start)
88
92
  after_treatment_period_end = pd.to_datetime(study_period_end)
@@ -364,7 +368,7 @@ class DiffModel:
364
368
 
365
369
  for key, value in covariates_effects.items():
366
370
  covariates_effects_rows.append({
367
- "Covariates": value["Coefficient"],
371
+ "": value["Coefficient"],
368
372
  "Estimate": value["Estimate"],
369
373
  "SE": value["SE"],
370
374
  "t": value["t"],
@@ -523,13 +527,15 @@ class DiffModel:
523
527
  covariates_effects_df["CI lower"] = covariates_effects_df["CI lower"].map(lambda x: f"{x:,.3f}")
524
528
  covariates_effects_df["CI upper"] = covariates_effects_df["CI upper"].map(lambda x: f"{x:,.3f}")
525
529
  covariates_effects_df.iloc[:, 0] = covariates_effects_df.iloc[:, 0].apply(lambda x: f"{x:<{max_width_column1}}")
530
+ print("Covariates")
526
531
  print(covariates_effects_df.to_string(index=False))
527
- if not show_covariates:
532
+ if not show_covariates or no_covariates == 0:
528
533
  if no_covariates > 0:
529
534
  print ("Covariates YES")
530
535
  else:
531
536
  print ("Covariates NO")
532
537
 
538
+ print("")
533
539
  print("Fixed effects")
534
540
  if model_config["FE_unit"]:
535
541
  print (" Units YES")
@@ -566,7 +572,7 @@ class DiffModel:
566
572
  print(treatment_diagnostics_df_t)
567
573
 
568
574
  print("-" * total_width)
569
- print ("Input data diagnostics")
575
+ print ("Input data diagnostixx") # TODO ?? AENDERN
570
576
  if modeldata_isbalanced:
571
577
  print ("Balanced panel data YES")
572
578
  else:
@@ -756,16 +762,21 @@ class DiffModel:
756
762
  ols_model = self.data[5]
757
763
  return ols_model
758
764
 
765
+ def prediction_intervals(self):
766
+
767
+ prediction_intervals = self.data[6]
768
+ return prediction_intervals
769
+
759
770
  def placebo(
760
- self,
761
- treatment: str = None,
762
- after_treatment_col: str = None,
763
- TG_col: str = None,
764
- TT_col: str = None,
765
- divide: float = 0.5,
766
- resample: float = 1.0,
767
- random_state = 71
768
- ):
771
+ self,
772
+ treatment: str = None,
773
+ after_treatment_col: str = None,
774
+ TG_col: str = None,
775
+ TT_col: str = None,
776
+ divide: float = 0.5,
777
+ resample: float = 1.0,
778
+ random_state = 71
779
+ ):
769
780
 
770
781
  model_config = self.data[1]
771
782
  model_data = self.data[2]
@@ -796,9 +807,9 @@ class DiffModel:
796
807
  TT_col_ = "TT_" + treatment
797
808
  TGxTT_ = "Placebo_" + treatment
798
809
  if TG_col is None and TG_col_ not in model_config["TG_col"]:
799
- raise ValueError("Model object does not include treatment group identification variable for treatment ", treatment)
810
+ raise ValueError("Cannot find treatment group identification variable for treatment " + treatment + ". Please state TG_col = [treatment_group_dummy].")
800
811
  if TT_col is None and TT_col_ not in model_config["TT_col"]:
801
- raise ValueError("Model object does not include treatment time variable for treatment ", treatment)
812
+ raise ValueError("Cannot findt treatment time variable for treatment " + treatment + ". Please state TG_col = [treatment_time_dummy].")
802
813
 
803
814
  unit_col = model_config["unit_col"]
804
815
  time_col = model_config["time_col"]
@@ -1127,19 +1138,20 @@ class DiffModel:
1127
1138
  return model_data_TG_CG
1128
1139
 
1129
1140
  def plot_counterfactual(
1130
- self,
1131
- treatment = None,
1132
- x_label: str = "Time",
1133
- y_label: str = "Outcome",
1134
- y_lim = None,
1135
- plot_title: str = "Treatment group Counterfactual",
1136
- lines_col: list = ["blue", "green"],
1137
- lines_style: list = ["solid", "dashed"],
1138
- lines_labels: list = ["TG", "TG counterfactual"],
1139
- plot_legend: bool = True,
1140
- plot_grid: bool = True,
1141
- plot_size: list = [12, 6]
1142
- ):
1141
+ self,
1142
+ treatment: str = None,
1143
+ after_treatment_col: str = None,
1144
+ x_label: str = "Time",
1145
+ y_label: str = "Outcome",
1146
+ y_lim = None,
1147
+ plot_title: str = "Treatment group Counterfactual",
1148
+ lines_col: list = ["blue", "green"],
1149
+ lines_style: list = ["solid", "dashed"],
1150
+ lines_labels: list = ["TG", "TG counterfactual"],
1151
+ plot_legend: bool = True,
1152
+ plot_grid: bool = True,
1153
+ plot_size: list = [12, 6]
1154
+ ):
1143
1155
 
1144
1156
  model_config = self.data[1]
1145
1157
  outcome_col = model_config["outcome_col"]
@@ -1158,7 +1170,10 @@ class DiffModel:
1158
1170
  else:
1159
1171
  raise ValueError ("Model object has no column for treatment group with respect to ", str(no_treatments), " treatments. Choose one with parameter treatment.")
1160
1172
 
1161
- model_data_mod = self.counterfactual()
1173
+ model_data_mod = self.counterfactual(
1174
+ treatment = treatment,
1175
+ after_treatment_col = after_treatment_col
1176
+ )
1162
1177
 
1163
1178
  if treatment is not None:
1164
1179
 
@@ -1182,6 +1197,8 @@ class DiffModel:
1182
1197
 
1183
1198
  treatment = treatment_diagnostics[0]["treatment"]
1184
1199
 
1200
+ treatment_group = [str(x) for x in treatment_group]
1201
+
1185
1202
  TG_col = "TG_" + treatment
1186
1203
 
1187
1204
  model_data_mod[TG_col] = 0
@@ -1344,20 +1361,41 @@ def did_analysis(
1344
1361
  intercept = False
1345
1362
  TG_col = []
1346
1363
  print ("NOTE: Quasi-experiment includes more than one treatment. Unit fixed effects are used instead of control group baseline and treatment group deviation.")
1347
-
1348
- if ITE:
1349
- GTE = False
1350
- if ITT:
1351
- GTT = False
1352
-
1364
+
1365
+ if ITE:
1366
+ FE_unit = True
1367
+ print ("NOTE: Model includes individual treatment effects. Unit fixed effects are included.")
1368
+ if GTE:
1369
+ GTE = False
1370
+ print ("NOTE: Both group and individual treatment effects were stated. Switching to individual treatment effects only.")
1371
+ if ITT:
1372
+ FE_unit = True
1373
+ TT_col = []
1374
+ print ("NOTE: Model includes individual time trends. Unit fixed effects are included. Treatment time variable is dropped.")
1375
+ if FE_time:
1376
+ FE_time = False
1377
+ print ("NOTE: Time fixed effects are dropped.")
1378
+ if GTT:
1379
+ GTT = False
1380
+ print ("NOTE: Both group and individual time trends were stated. Switching to individual time trends only.")
1381
+
1353
1382
  if staggered_adoption:
1354
1383
  FE_unit = True
1355
1384
  FE_time = True
1356
1385
  print ("NOTE: Quasi-experiment includes one or more staggered treatments. Two-way fixed effects model is used.")
1357
1386
 
1358
- if FE_unit and FE_time:
1387
+ FE_group = False
1388
+ if group_by is not None and group_by != "":
1389
+ FE_group = True
1390
+
1391
+ if FE_unit:
1359
1392
  TG_col = []
1393
+ if FE_time:
1360
1394
  TT_col = []
1395
+ if FE_group:
1396
+ TG_col = []
1397
+ intercept = False
1398
+ print ("NOTE: Quasi-experiment includes group fixed effects. Control group baseline and treatment group deviation are dropped.")
1361
1399
 
1362
1400
  if after_treatment_col is not None or (isinstance (after_treatment_col, list) and len(after_treatment_col) > 0):
1363
1401
  if isinstance (after_treatment_col, str):
@@ -1458,20 +1496,14 @@ def did_analysis(
1458
1496
  outcome_col = "log_"+f'{outcome_col}'
1459
1497
 
1460
1498
  did_formula = f'{outcome_col} ~ {" + ".join(treatment_col)}'
1461
-
1462
- if TG_col is not None or len(TG_col) > 0:
1499
+
1500
+ if TG_col is not None and len(TG_col) > 0:
1463
1501
  did_formula = did_formula + f' + {" + ".join(TG_col)}'
1464
- if TT_col is not None or len(TT_col) > 0:
1465
- did_formula = did_formula + f' + {" + ".join(TT_col)}'
1466
-
1467
- if ITT:
1468
- FE_unit = True
1469
- FE_time = False
1470
- if ITE:
1471
- FE_unit = True
1502
+ if TT_col is not None and len(TT_col) > 0:
1503
+ did_formula = did_formula + f' + {" + ".join(TT_col)}'
1472
1504
 
1473
1505
  if len(after_treatment_col) > 0:
1474
- did_formula = did_formula + f'+ {" + ".join(after_treatment_col)}'
1506
+ did_formula = did_formula + f' + {" + ".join(after_treatment_col)}'
1475
1507
 
1476
1508
  if FE_unit:
1477
1509
  unit_col_todummies = diffindiff.didtools.to_dummies(
@@ -1481,7 +1513,7 @@ def did_analysis(
1481
1513
  drop_first = intercept
1482
1514
  )
1483
1515
  data = unit_col_todummies[0]
1484
- did_formula = did_formula + f'+ {unit_col_todummies[1]}'
1516
+ did_formula = did_formula + f' + {unit_col_todummies[1]}'
1485
1517
  dummy_unit_vars = list(unit_col_todummies[2]["UNIT_"+unit_col].values)
1486
1518
  dummy_unit_original = list(unit_col_todummies[2][unit_col].values)
1487
1519
 
@@ -1493,7 +1525,7 @@ def did_analysis(
1493
1525
  drop_first = intercept
1494
1526
  )
1495
1527
  data = time_col_todummies[0]
1496
- did_formula = did_formula + f'+ {time_col_todummies[1]}'
1528
+ did_formula = did_formula + f' + {time_col_todummies[1]}'
1497
1529
  dummy_time_vars = list(time_col_todummies[2]["TIME_"+time_col].values)
1498
1530
  dummy_time_original = list(time_col_todummies[2][time_col].values)
1499
1531
 
@@ -1526,8 +1558,8 @@ def did_analysis(
1526
1558
  new_col_name = f"{col}_x_time"
1527
1559
  group_x_time = group_x_time.rename(columns={col: new_col_name})
1528
1560
  data = pd.concat([data, group_x_time], axis = 1)
1529
- GTT_columns_groupxtime = '+'.join(group_x_time.columns)
1530
- did_formula = did_formula + f'+{GTE_columns_group}+{GTT_columns_groupxtime}'
1561
+ GTT_columns_groupxtime = ' + '.join(group_x_time.columns)
1562
+ did_formula = did_formula + f' + {GTE_columns_group} + {GTT_columns_groupxtime}'
1531
1563
 
1532
1564
  if ITT:
1533
1565
  if "date_counter" not in data.columns:
@@ -1542,7 +1574,7 @@ def did_analysis(
1542
1574
  new_col_name = f"{col}_x_time"
1543
1575
  unit_x_time = unit_x_time.rename(columns={col: new_col_name})
1544
1576
  data = pd.concat([data, unit_x_time], axis = 1)
1545
- ITT_columns_unitxtime = '+'.join(unit_x_time.columns)
1577
+ ITT_columns_unitxtime = ' + '.join(unit_x_time.columns)
1546
1578
  did_formula = did_formula + f' + {ITT_columns_unitxtime}'
1547
1579
 
1548
1580
  if GTE:
@@ -1556,8 +1588,8 @@ def did_analysis(
1556
1588
  new_col_name = f"{treatment}_{col}_x_time"
1557
1589
  group_x_treatment = group_x_treatment.rename(columns={col: new_col_name})
1558
1590
  data = pd.concat([data, group_x_treatment], axis = 1)
1559
- GTE_columns_groupxtreatment = '+'.join(group_x_treatment.columns)
1560
- did_formula = did_formula + f'+{GTE_columns_group}+{GTE_columns_groupxtreatment}'
1591
+ GTE_columns_groupxtreatment = ' + '.join(group_x_treatment.columns)
1592
+ did_formula = did_formula + f' + {GTE_columns_group} + {GTE_columns_groupxtreatment}'
1561
1593
 
1562
1594
  if ITE:
1563
1595
  unit_x_treatment = pd.DataFrame()
@@ -1574,7 +1606,7 @@ def did_analysis(
1574
1606
  if group_by in covariates:
1575
1607
  covariates.remove(group_by)
1576
1608
  covariates_join = ' + '.join(covariates)
1577
- did_formula = did_formula + f'+{covariates_join}'
1609
+ did_formula = did_formula + f' +{covariates_join}'
1578
1610
 
1579
1611
  if len(group_benefit) > 0:
1580
1612
  group_benefit = diffindiff.didtools.unique(group_benefit)
@@ -1597,10 +1629,11 @@ def did_analysis(
1597
1629
  group_benefit = []
1598
1630
  DDD = False
1599
1631
 
1600
- if GTE or GTT or ITE or ITT:
1601
- intercept = False
1632
+ did_formula = did_formula[:-1] if did_formula.endswith(" ") else did_formula
1633
+ did_formula = did_formula[:-1] if did_formula.endswith("+") else did_formula
1634
+ did_formula = did_formula[:-1] if did_formula.endswith(" ") else did_formula
1602
1635
  if not intercept:
1603
- did_formula = did_formula + f' -1'
1636
+ did_formula = did_formula + f' -1'
1604
1637
 
1605
1638
  analysis_description = "Difference in Differences (DiD) Analysis"
1606
1639
  if DDD:
@@ -1622,6 +1655,7 @@ def did_analysis(
1622
1655
  "pre_post": pre_post,
1623
1656
  "FE_unit": FE_unit,
1624
1657
  "FE_time": FE_time,
1658
+ "FE_group": FE_group,
1625
1659
  "intercept": intercept,
1626
1660
  "ITT": ITT,
1627
1661
  "GTT": GTT,
@@ -1831,7 +1865,7 @@ def did_analysis(
1831
1865
  FE_group_coef = {}
1832
1866
  for i, group_dummy in enumerate(FE_group_vars):
1833
1867
  FE_group_coef[i] = {
1834
- "Coefficient": group_dummy,
1868
+ "Coefficient": dummy_group_original[i],
1835
1869
  "Estimate": ols_coefficients[group_dummy],
1836
1870
  "SE": float(coef_standarderrors[group_dummy]),
1837
1871
  "t": float(coef_t[group_dummy]),
@@ -1955,7 +1989,10 @@ def did_analysis(
1955
1989
  model_results["covariates_effects"] = covariates_effects
1956
1990
 
1957
1991
  model_predictions = ols_model.predict()
1958
-
1992
+
1993
+ prediction_intervals = ols_model.get_prediction()
1994
+ prediction_intervals = prediction_intervals.summary_frame(alpha = confint_alpha)
1995
+
1959
1996
  model_statistics = {
1960
1997
  "rsquared": ols_model.rsquared,
1961
1998
  "rsquared_adj": ols_model.rsquared_adj,
@@ -1968,7 +2005,8 @@ def did_analysis(
1968
2005
  data,
1969
2006
  model_predictions,
1970
2007
  model_statistics,
1971
- ols_model
2008
+ ols_model,
2009
+ prediction_intervals
1972
2010
  )
1973
2011
 
1974
2012
  return did_model_output
@@ -1,11 +1,14 @@
1
- #-------------------------------------------------------------------------------
2
- # Name: diddata (diffindiff)
1
+ #-----------------------------------------------------------------------
2
+ # Name: diddata (diffindiff package)
3
3
  # Purpose: Creating data for Difference-in-Differences Analysis
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.1
6
- # Last update: 2025-04-15 18:43
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.3
8
+ # Last update: 2025-04-18 10:24
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #-------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
11
+
9
12
 
10
13
  import pandas as pd
11
14
  import numpy as np
@@ -950,7 +953,7 @@ def create_counterfactual(
950
953
  )
951
954
  control_group = isnotreatment[2]
952
955
 
953
- units_tt = didtools.treatment_times(
956
+ units_tt = diffindiff.didtools.treatment_times(
954
957
  data = data,
955
958
  unit_col = unit_col,
956
959
  time_col = time_col,
@@ -959,7 +962,7 @@ def create_counterfactual(
959
962
  units = diffindiff.didtools.unique(units_tt[unit_col])
960
963
 
961
964
  if not isnotreatment[0]:
962
- print ("No no-treatment control group. Counterfactual will not cover full treatment time.")
965
+ print ("NOTE: No no-treatment control group. Counterfactual will not cover full treatment time.")
963
966
 
964
967
  data_TG = pd.DataFrame(columns = data.columns)
965
968
  for unit in units:
@@ -980,7 +983,9 @@ def create_counterfactual(
980
983
  [data_TG, data_CG],
981
984
  ignore_index=True
982
985
  )
983
-
986
+
987
+ data_cf[X] = data_cf[X].apply(pd.to_numeric, errors='coerce')
988
+
984
989
  counterfactual_pred = diffindiff.didtools.model_wrapper(
985
990
  y = data_cf[y],
986
991
  X = data_cf[X],
@@ -1,11 +1,13 @@
1
- # -------------------------------------------------------------------------------
2
- # Name: didtools (diffindiff)
3
- # Purpose: Creating data for Difference-in-Differences Analysis
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.1
6
- # Last update: 2025-04-15 18:44
1
+ #-----------------------------------------------------------------------
2
+ # Name: didtools (diffindiff package)
3
+ # Purpose: Additional tools for Difference-in-Differences Analysis
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.3
8
+ # Last update: 2025-04-18 12:08
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #-------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
9
11
 
10
12
 
11
13
  import pandas as pd
@@ -34,11 +36,11 @@ def check_columns(
34
36
  raise ValueError(f"Data do not contain column(s): {', '.join(missing_columns)}")
35
37
 
36
38
  def is_balanced(
37
- data,
38
- unit_col,
39
- time_col,
40
- outcome_col,
41
- other_cols = None
39
+ data: pd.DataFrame,
40
+ unit_col: str,
41
+ time_col: str,
42
+ outcome_col: str,
43
+ other_cols: list = None
42
44
  ):
43
45
 
44
46
  unit_freq = data[unit_col].nunique()
@@ -58,8 +60,8 @@ def is_balanced(
58
60
  return True
59
61
 
60
62
  def is_binary(
61
- data,
62
- treatment_col
63
+ data: pd.DataFrame,
64
+ treatment_col: str
63
65
  ):
64
66
 
65
67
  unique_values = set(data[treatment_col].dropna().unique())
@@ -76,7 +78,7 @@ def is_binary(
76
78
  return [False, "Unknown"]
77
79
 
78
80
  def is_missing(
79
- data,
81
+ data: pd.DataFrame,
80
82
  drop_missing: bool = True,
81
83
  missing_replace_by_zero: bool = False
82
84
  ):
@@ -104,10 +106,10 @@ def is_missing(
104
106
  ]
105
107
 
106
108
  def is_simultaneous(
107
- data,
108
- unit_col,
109
- time_col,
110
- treatment_col,
109
+ data: pd.DataFrame,
110
+ unit_col: str,
111
+ time_col: str,
112
+ treatment_col: str,
111
113
  pre_post = False
112
114
  ):
113
115
 
@@ -125,9 +127,9 @@ def is_simultaneous(
125
127
  return col_identical
126
128
 
127
129
  def is_notreatment(
128
- data,
129
- unit_col,
130
- treatment_col
130
+ data: pd.DataFrame,
131
+ unit_col: str,
132
+ treatment_col: str
131
133
  ):
132
134
 
133
135
  data_relevant = data[[unit_col, treatment_col]]
@@ -150,12 +152,52 @@ def is_notreatment(
150
152
  control_group
151
153
  ]
152
154
 
155
+ def treatment_group_col(
156
+ data: pd.DataFrame,
157
+ unit_col: str,
158
+ treatment_col: str,
159
+ create_TG_col: str = "TG"
160
+ ):
161
+
162
+ isnotreatment = is_notreatment(
163
+ data = data,
164
+ unit_col = unit_col,
165
+ treatment_col = treatment_col
166
+ )
167
+
168
+ if not isnotreatment[0]:
169
+ print ("Model data does not contain a no-treatment control group. Treatment group column is constant = 1.")
170
+
171
+ if create_TG_col in data.columns:
172
+ create_TG_col = "TG_"+treatment_col
173
+ print ("Column " + create_TG_col + " already exists. Saving treatment group in column TG_" + treatment_col)
174
+
175
+ treatment_group = isnotreatment[1]
176
+
177
+ data[create_TG_col] = 0
178
+ data.loc[data[unit_col].astype(str).isin(treatment_group), create_TG_col] = 1
179
+
180
+ return [
181
+ data,
182
+ isnotreatment[0],
183
+ create_TG_col
184
+ ]
185
+
186
+ def untreated_units(
187
+ data: pd.DataFrame,
188
+ unit_col: str,
189
+ time_col: str,
190
+ treatment_col: str
191
+ ):
192
+ # TODO ??
193
+ pass
194
+
153
195
  def is_parallel(
154
- data,
155
- unit_col,
156
- time_col,
157
- treatment_col,
158
- outcome_col,
196
+ data: pd.DataFrame,
197
+ unit_col: str,
198
+ time_col: str,
199
+ treatment_col: str,
200
+ outcome_col: str,
159
201
  pre_post = False,
160
202
  alpha = 0.05
161
203
  ):
@@ -206,10 +248,10 @@ def is_parallel(
206
248
  ]
207
249
 
208
250
  def date_counter(
209
- df,
210
- date_col,
211
- new_col = "date_counter"
212
- ):
251
+ df: pd.DataFrame,
252
+ date_col: str,
253
+ new_col: str = "date_counter"
254
+ ):
213
255
 
214
256
  dates = df[date_col].unique()
215
257
 
@@ -226,6 +268,7 @@ def date_counter(
226
268
  return df
227
269
 
228
270
  def unique(data):
271
+
229
272
  if data is None or (isinstance(data, (list, np.ndarray, pd.Series, pd.DataFrame)) and len(data) == 0):
230
273
  return []
231
274
 
@@ -269,8 +312,9 @@ def model_wrapper(
269
312
  lgbm_learning_rate = 0.1,
270
313
  random_state = 71
271
314
  ):
272
- if model_type not in ["ols", "olsbg", "dtbg", "rf", "gb", "knn", "svr", "xgb", "lgbm", "catboost"]:
273
- raise ValueError("Please enter a valid model type")
315
+
316
+ if model_type not in ["ols", "olsbg", "dtbg", "rf", "gb", "knn", "svr", "xgb", "lgbm"]:
317
+ raise ValueError("Please enter a valid model type ('ols', 'olsbg', 'dtbg', 'rf', 'gb', 'knn', 'svr', 'xgb', 'lgbm')")
274
318
 
275
319
  X_train, X_test, y_train, y_test = train_test_split(
276
320
  X,
@@ -348,10 +392,10 @@ def model_wrapper(
348
392
  ]
349
393
 
350
394
  def treatment_times(
351
- data,
352
- unit_col,
353
- time_col,
354
- treatment_col
395
+ data: pd.DataFrame,
396
+ unit_col: str,
397
+ time_col: str,
398
+ treatment_col: str
355
399
  ):
356
400
 
357
401
  check_columns(
@@ -389,10 +433,10 @@ def clean_column_name(value):
389
433
  return value.strip('_')
390
434
 
391
435
  def to_dummies(
392
- data,
393
- col,
394
- drop_first = False,
395
- prefix = "DUMMY"
436
+ data: pd.DataFrame,
437
+ col: str,
438
+ drop_first: bool = False,
439
+ prefix: str = "DUMMY"
396
440
  ):
397
441
 
398
442
  unique_values = data[col].astype(str).unique()
@@ -1,17 +1,19 @@
1
- #------------------------------------------------------------------------------------------
2
-
3
- # Name: tests_diffindiff
1
+ #-----------------------------------------------------------------------
2
+ # Name: tests_diffindiff (diffindiff package)
4
3
  # Purpose: Tests and examples for the diffindiff package
5
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
6
- # Version: 2.0.1
7
- # Last update: 2025-04-15 18:43
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.3
8
+ # Last update: 2025-04-18 10:24
8
9
  # Copyright (c) 2025 Thomas Wieland
9
- #------------------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
10
11
 
11
12
 
12
13
  import pandas as pd
13
14
  from diffindiff.didanalysis import DiffModel, did_analysis
14
15
  from diffindiff.diddata import DiffGroups, create_groups, DiffTreatment, create_treatment, DiffData, merge_data, create_data
16
+ from diffindiff.didtools import treatment_group_col
15
17
 
16
18
  # Example 1: Effect of a curfew in German counties in the first
17
19
  # wave of the COVID-19 pandemic (DiD pre-post analysis)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: diffindiff
3
- Version: 2.0.1
3
+ Version: 2.0.3
4
4
  Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
@@ -38,17 +38,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
38
38
  - Create predictive counterfactuals
39
39
  - **DiD analysis**:
40
40
  - Perfom standard DiD analysis
41
- - Model Extensions:
41
+ - Model extensions:
42
42
  - Staggered adoption
43
43
  - Multiple treatments
44
44
  - Two-way fixed effects models
45
45
  - Group- or individual-specific treatment effects
46
46
  - Group- or individual-specific time trends
47
47
  - Including covariates
48
- - After-treatment period
48
+ - Including fter-treatment period
49
49
  - Triple Difference (DDD)
50
50
  - Own counterfactuals
51
- - Bonferroni correction
51
+ - Bonferroni correction for treatment effects
52
+ - Placebo test
52
53
  - **Visualization**:
53
54
  - Plot observed and expected time course of treatment and control group
54
55
  - Plot expected time course of treatment group and counterfactual
@@ -60,7 +61,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
60
61
  - Test for type of adoption
61
62
  - Test whether the panel dataset is balanced
62
63
  - Test for parallel trend assumption
63
- - Placebo test
64
64
 
65
65
 
66
66
  ## Literature
@@ -7,7 +7,7 @@ def read_README():
7
7
 
8
8
  setup(
9
9
  name='diffindiff',
10
- version='2.0.1',
10
+ version='2.0.3',
11
11
  description='diffindiff: Python library for convenient Difference-in-Differences Analyses',
12
12
  packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
13
13
  include_package_data=True,
File without changes
File without changes