diffindiff 2.0.3__tar.gz → 2.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffindiff
3
- Version: 2.0.3
3
+ Version: 2.0.5
4
4
  Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
@@ -45,7 +45,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
45
45
  - Group- or individual-specific treatment effects
46
46
  - Group- or individual-specific time trends
47
47
  - Including covariates
48
- - Including fter-treatment period
48
+ - Including after-treatment period
49
49
  - Triple Difference (DDD)
50
50
  - Own counterfactuals
51
51
  - Bonferroni correction for treatment effects
@@ -23,7 +23,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
23
23
  - Group- or individual-specific treatment effects
24
24
  - Group- or individual-specific time trends
25
25
  - Including covariates
26
- - Including fter-treatment period
26
+ - Including after-treatment period
27
27
  - Triple Difference (DDD)
28
28
  - Own counterfactuals
29
29
  - Bonferroni correction for treatment effects
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 2.0.3
8
- # Last update: 2025-04-18 10:24
7
+ # Version: 2.0.5
8
+ # Last update: 2025-04-19 10:23
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -15,6 +15,7 @@ from statsmodels.formula.api import ols
15
15
  import numpy as np
16
16
  import matplotlib.pyplot as plt
17
17
  from matplotlib.dates import DateFormatter
18
+ import json
18
19
  import diffindiff.didtools
19
20
 
20
21
 
@@ -27,8 +28,7 @@ class DiffModel:
27
28
  did_modeldata,
28
29
  did_modelpredictions,
29
30
  did_model_statistics,
30
- did_olsmodel,
31
- did_prediction_intervals
31
+ did_olsmodel
32
32
  ):
33
33
 
34
34
  self.data = [
@@ -37,9 +37,8 @@ class DiffModel:
37
37
  did_modeldata,
38
38
  did_modelpredictions,
39
39
  did_model_statistics,
40
- did_olsmodel,
41
- did_prediction_intervals
42
- ]
40
+ did_olsmodel
41
+ ]
43
42
 
44
43
  def treatment_statistics(
45
44
  self,
@@ -165,11 +164,11 @@ class DiffModel:
165
164
  treatment_diagnostics_rows.append({
166
165
  "Treatment": value["treatment"],
167
166
  "Type of adoption": adoption_type,
168
- "No-treatment control group": no_treatment,
169
- "Treatment group (N)": treatment_group_size,
170
- "Control group (N)": control_group_size,
167
+ "No-treatment control group": no_treatment,
171
168
  "Parallel trends (pre)": is_parallel,
172
- "Format": value["treatment_format"]
169
+ "Format": value["treatment_format"],
170
+ "Treatment group (N)": treatment_group_size,
171
+ "Control group (N)": control_group_size
173
172
  })
174
173
 
175
174
  if no_treatment == "NO" and adoption_type == "Simultaneous":
@@ -230,8 +229,8 @@ class DiffModel:
230
229
  })
231
230
  average_after_treatment_effects = pd.DataFrame(average_after_treatment_effects_rows)
232
231
  if len (average_after_treatment_effects) == 1:
233
- average_after_treatment_effects.at[0, ""] = "Average after treatment effect"
234
- treatment_effects_df = pd.concat([treatment_effects_df, average_after_treatment_effects], ignore_index=True)
232
+ average_after_treatment_effects.at[0, ""] = "Average after-treatment effect"
233
+ treatment_effects_df = pd.concat([treatment_effects_df, average_after_treatment_effects], ignore_index=True)
235
234
 
236
235
  if ("control_group_baseline" in model_results and not model_config["FE_unit"]) or ("control_group_baseline" in model_results and baseline_components):
237
236
  control_group_baseline = model_results["control_group_baseline"]
@@ -287,7 +286,25 @@ class DiffModel:
287
286
  if len (non_treatment_time_effect) == 1:
288
287
  non_treatment_time_effect.at[0, ""] = "Non-treatment time effect"
289
288
  treatment_effects_df = pd.concat([treatment_effects_df, non_treatment_time_effect], ignore_index=True)
290
-
289
+
290
+ if "after_treatment_time_effects" in model_results:
291
+ after_treatment_time_effects = model_results["after_treatment_time_effects"]
292
+ after_treatment_time_effects_rows = []
293
+ for key, value in after_treatment_time_effects.items():
294
+ after_treatment_time_effects_rows.append({
295
+ "": "After-treatment time effect " + value["Coefficient"],
296
+ "Estimate": value["Estimate"],
297
+ "SE": value["SE"],
298
+ "t": value["t"],
299
+ "p": value["p"],
300
+ "CI lower": value["CI_lower"],
301
+ "CI upper": value["CI_upper"]
302
+ })
303
+ after_treatment_time_effects = pd.DataFrame(after_treatment_time_effects_rows)
304
+ if len (after_treatment_time_effects) == 1:
305
+ after_treatment_time_effects.at[0, ""] = "After-treatment time effect"
306
+ treatment_effects_df = pd.concat([treatment_effects_df, after_treatment_time_effects], ignore_index=True)
307
+
291
308
  if "individual_treatment_effects" in model_results:
292
309
 
293
310
  individual_treatment_effects = model_results["individual_treatment_effects"]
@@ -535,7 +552,6 @@ class DiffModel:
535
552
  else:
536
553
  print ("Covariates NO")
537
554
 
538
- print("")
539
555
  print("Fixed effects")
540
556
  if model_config["FE_unit"]:
541
557
  print (" Units YES")
@@ -570,9 +586,21 @@ class DiffModel:
570
586
  index = treatment_diagnostics_df.columns)
571
587
  treatment_diagnostics_df_t = treatment_diagnostics_df_t.iloc[1:]
572
588
  print(treatment_diagnostics_df_t)
589
+ if model_config["no_treatments"] > 1:
590
+ untreated = diffindiff.didtools.untreated_units(
591
+ data = model_data,
592
+ unit_col = model_config["unit_col"],
593
+ treatment_col = model_config["treatment_col"]
594
+ )
595
+ print ("Units with >=1 treatment(s): " + str(untreated[0]) + ", non-treated units: " + str(untreated[1]))
596
+ if len(no_control_conditions) > 0:
597
+ if len(no_control_conditions) == 1:
598
+ print("NOTE: Treatment " + no_control_conditions[0], " has no control conditions")
599
+ else:
600
+ print("NOTE: Treatments " + ", ".join(no_control_conditions), "have no control conditions")
573
601
 
574
602
  print("-" * total_width)
575
- print ("Input data diagnostixx") # TODO ?? AENDERN
603
+ print ("Input data diagnostics")
576
604
  if modeldata_isbalanced:
577
605
  print ("Balanced panel data YES")
578
606
  else:
@@ -591,12 +619,6 @@ class DiffModel:
591
619
  print ("Adj. R-Squared " + str(round(model_statistics["rsquared_adj"], 3)))
592
620
  print ("===============================================================")
593
621
 
594
- if len(no_control_conditions) > 0:
595
- if len(no_control_conditions) == 1:
596
- print("NOTE: Treatment " + ",".join(no_control_conditions), "has no control conditions")
597
- else:
598
- print("NOTE: Treatments " + ", ".join(no_control_conditions), "have no control conditions")
599
-
600
622
  return self
601
623
 
602
624
  def plot_treatment_effects(
@@ -762,9 +784,16 @@ class DiffModel:
762
784
  ols_model = self.data[5]
763
785
  return ols_model
764
786
 
765
- def prediction_intervals(self):
787
+ def prediction_intervals(
788
+ self,
789
+ confint_alpha = 0.05
790
+ ):
791
+
792
+ ols_model = self.data[5]
793
+
794
+ prediction_intervals = ols_model.get_prediction()
795
+ prediction_intervals = prediction_intervals.summary_frame(alpha = confint_alpha)
766
796
 
767
- prediction_intervals = self.data[6]
768
797
  return prediction_intervals
769
798
 
770
799
  def placebo(
@@ -807,9 +836,13 @@ class DiffModel:
807
836
  TT_col_ = "TT_" + treatment
808
837
  TGxTT_ = "Placebo_" + treatment
809
838
  if TG_col is None and TG_col_ not in model_config["TG_col"]:
810
- raise ValueError("Cannot find treatment group identification variable for treatment " + treatment + ". Please state TG_col = [treatment_group_dummy].")
839
+ raise ValueError("No treatment group identification variable for treatment " + treatment + ". Please state TG_col = [treatment_group_dummy].")
811
840
  if TT_col is None and TT_col_ not in model_config["TT_col"]:
812
- raise ValueError("Cannot findt treatment time variable for treatment " + treatment + ". Please state TG_col = [treatment_time_dummy].")
841
+ raise ValueError("No treatment time variable for treatment " + treatment + ". Please state TG_col = [treatment_time_dummy].")
842
+ if TG_col is not None:
843
+ TG_col_ = TG_col
844
+ if TT_col is not None:
845
+ TT_col_ = TT_col
813
846
 
814
847
  unit_col = model_config["unit_col"]
815
848
  time_col = model_config["time_col"]
@@ -1270,6 +1303,7 @@ def did_analysis(
1270
1303
  TG_col: list = [],
1271
1304
  TT_col: list = [],
1272
1305
  after_treatment_col: list = [],
1306
+ ATT_col: list = [],
1273
1307
  pre_post: bool = False,
1274
1308
  log_outcome: bool = False,
1275
1309
  log_outcome_add = 0.01,
@@ -1407,6 +1441,16 @@ def did_analysis(
1407
1441
  )
1408
1442
  cols_relevant = cols_relevant + after_treatment_col
1409
1443
 
1444
+ if ATT_col is not None or (isinstance (ATT_col, list) and len(ATT_col) > 0):
1445
+ if isinstance (ATT_col, str):
1446
+ ATT_col = [ATT_col]
1447
+ ATT_col = [entry for entry in ATT_col if entry is not None]
1448
+ diffindiff.didtools.check_columns(
1449
+ df = data,
1450
+ columns = ATT_col
1451
+ )
1452
+ cols_relevant = cols_relevant + ATT_col
1453
+
1410
1454
  if TG_col is not None or (isinstance (TG_col, list) and len(TG_col) > 0):
1411
1455
  if isinstance (TG_col, str):
1412
1456
  TG_col = [TG_col]
@@ -1504,6 +1548,8 @@ def did_analysis(
1504
1548
 
1505
1549
  if len(after_treatment_col) > 0:
1506
1550
  did_formula = did_formula + f' + {" + ".join(after_treatment_col)}'
1551
+ if len(ATT_col) > 0:
1552
+ did_formula = did_formula + f' + {" + ".join(ATT_col)}'
1507
1553
 
1508
1554
  if FE_unit:
1509
1555
  unit_col_todummies = diffindiff.didtools.to_dummies(
@@ -1652,6 +1698,7 @@ def did_analysis(
1652
1698
  "freq": freq,
1653
1699
  "date_format": date_format,
1654
1700
  "after_treatment_col": after_treatment_col,
1701
+ "ATT_col": ATT_col,
1655
1702
  "pre_post": pre_post,
1656
1703
  "FE_unit": FE_unit,
1657
1704
  "FE_time": FE_time,
@@ -1759,6 +1806,20 @@ def did_analysis(
1759
1806
  "CI_upper": float(coef_conf_intervals.loc[after_treatment, 1]),
1760
1807
  }
1761
1808
  model_results["average_after_treatment_effects"] = AATE
1809
+
1810
+ if (any(col in ols_coefficients for col in ATT_col)):
1811
+ ATT = {}
1812
+ for i, ATT_ in enumerate(ATT_col):
1813
+ ATT[i] = {
1814
+ "Coefficient": ATT_,
1815
+ "Estimate": ols_coefficients[ATT_],
1816
+ "SE": float(coef_standarderrors[ATT_]),
1817
+ "t": float(coef_t[ATT_]),
1818
+ "p": float(coef_p[ATT_]),
1819
+ "CI_lower": float(coef_conf_intervals.loc[ATT_, 0]),
1820
+ "CI_upper": float(coef_conf_intervals.loc[ATT_, 1]),
1821
+ }
1822
+ model_results["after_treatment_time_effects"] = ATT
1762
1823
 
1763
1824
  if DDD:
1764
1825
 
@@ -1990,9 +2051,6 @@ def did_analysis(
1990
2051
 
1991
2052
  model_predictions = ols_model.predict()
1992
2053
 
1993
- prediction_intervals = ols_model.get_prediction()
1994
- prediction_intervals = prediction_intervals.summary_frame(alpha = confint_alpha)
1995
-
1996
2054
  model_statistics = {
1997
2055
  "rsquared": ols_model.rsquared,
1998
2056
  "rsquared_adj": ols_model.rsquared_adj,
@@ -2005,8 +2063,7 @@ def did_analysis(
2005
2063
  data,
2006
2064
  model_predictions,
2007
2065
  model_statistics,
2008
- ols_model,
2009
- prediction_intervals
2066
+ ols_model
2010
2067
  )
2011
2068
 
2012
2069
  return did_model_output
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 2.0.3
8
- # Last update: 2025-04-18 10:24
7
+ # Version: 2.0.5
8
+ # Last update: 2025-04-19 10:23
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -163,7 +163,7 @@ class DiffTreatment:
163
163
  else:
164
164
  print (" Treatment Period: " + str(value["treatment_period_start"]) + " - " + str(value["treatment_period_end"])+ (" (") + str(value["treatment_period"]) + " " + treatment_meta["frequency"] + ")")
165
165
  if treatment_config[key]["after_treatment_period"]:
166
- print (" After treatment period: " + str(value["treatment_period_end"]) + " - " + str(value["study_period_end"]) + " (" + str(value["after_treatment_period_N"]) + " " + treatment_meta["frequency"] + ")")
166
+ print (" After-treatment period: " + str(value["treatment_period_end"]) + " - " + str(value["study_period_end"]) + " (" + str(value["after_treatment_period_N"]) + " " + treatment_meta["frequency"] + ")")
167
167
 
168
168
  def create_treatment (
169
169
  study_period,
@@ -722,12 +722,15 @@ class DiffData:
722
722
  TG_col = [None]*len(treatment_cols)
723
723
  treatment_col = [None]*len(treatment_cols)
724
724
  after_treatment_col = [None]*len(treatment_cols)
725
+ ATT_col = [None]*len(treatment_cols)
725
726
  for key, value in treatment_cols.items():
726
727
  TG_col[key] = groups_config[key]["TG_col"]
727
728
  TT_col[key] = value["TT_col"]
728
729
  treatment_col[key] = value["treatment_name"]
729
730
  if value["after_treatment_name"] is not None:
730
731
  after_treatment_col[key] = value["after_treatment_name"]
732
+ if value["ATT_col"] is not None:
733
+ ATT_col[key] = value["ATT_col"]
731
734
 
732
735
  group_benefit = []
733
736
  if groups_config[0]["DDD"]:
@@ -742,6 +745,7 @@ class DiffData:
742
745
  time_col = "t",
743
746
  outcome_col = outcome_col_original,
744
747
  after_treatment_col = after_treatment_col,
748
+ ATT_col = ATT_col,
745
749
  pre_post = treatment_meta["pre_post"],
746
750
  log_outcome = log_outcome,
747
751
  FE_unit = FE_unit,
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 2.0.3
8
- # Last update: 2025-04-18 12:08
7
+ # Version: 2.0.5
8
+ # Last update: 2025-04-19 10:23
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -186,11 +186,22 @@ def treatment_group_col(
186
186
  def untreated_units(
187
187
  data: pd.DataFrame,
188
188
  unit_col: str,
189
- time_col: str,
190
- treatment_col: str
189
+ treatment_col: list
191
190
  ):
192
- # TODO ??
193
- pass
191
+
192
+ unit_sum = data.groupby(unit_col)[treatment_col].sum().sum(axis=1).reset_index(name="sum")
193
+
194
+ units_treated = unit_sum.loc[unit_sum["sum"] > 0, unit_col]
195
+ units_nontreated = unit_sum.loc[unit_sum["sum"] == 0, unit_col]
196
+ no_units_treated = len(units_treated)
197
+ no_units_nontreated = len(units_nontreated)
198
+
199
+ return [
200
+ no_units_treated,
201
+ no_units_nontreated,
202
+ units_treated,
203
+ units_nontreated
204
+ ]
194
205
 
195
206
  def is_parallel(
196
207
  data: pd.DataFrame,
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 2.0.3
8
- # Last update: 2025-04-18 10:24
7
+ # Version: 2.0.5
8
+ # Last update: 2025-04-19 10:23
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -13,7 +13,7 @@
13
13
  import pandas as pd
14
14
  from diffindiff.didanalysis import DiffModel, did_analysis
15
15
  from diffindiff.diddata import DiffGroups, create_groups, DiffTreatment, create_treatment, DiffData, merge_data, create_data
16
- from diffindiff.didtools import treatment_group_col
16
+
17
17
 
18
18
  # Example 1: Effect of a curfew in German counties in the first
19
19
  # wave of the COVID-19 pandemic (DiD pre-post analysis)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffindiff
3
- Version: 2.0.3
3
+ Version: 2.0.5
4
4
  Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
@@ -45,7 +45,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
45
45
  - Group- or individual-specific treatment effects
46
46
  - Group- or individual-specific time trends
47
47
  - Including covariates
48
- - Including fter-treatment period
48
+ - Including after-treatment period
49
49
  - Triple Difference (DDD)
50
50
  - Own counterfactuals
51
51
  - Bonferroni correction for treatment effects
@@ -7,7 +7,7 @@ def read_README():
7
7
 
8
8
  setup(
9
9
  name='diffindiff',
10
- version='2.0.3',
10
+ version='2.0.5',
11
11
  description='diffindiff: Python library for convenient Difference-in-Differences Analyses',
12
12
  packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
13
13
  include_package_data=True,
File without changes
File without changes