diffindiff 2.0.2__tar.gz → 2.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,24 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: diffindiff
3
- Version: 2.0.2
3
+ Version: 2.0.4
4
4
  Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
7
7
  Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy
9
+ Requires-Dist: pandas
10
+ Requires-Dist: statsmodels
11
+ Requires-Dist: matplotlib
12
+ Requires-Dist: datetime
13
+ Requires-Dist: scikit-learn
14
+ Requires-Dist: xgboost
15
+ Requires-Dist: lightgbm
16
+ Dynamic: author
17
+ Dynamic: author-email
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: requires-dist
21
+ Dynamic: summary
8
22
 
9
23
  # diffindiff: Difference-in-Differences (DiD) Analysis Python Library
10
24
 
@@ -24,17 +38,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
24
38
  - Create predictive counterfactuals
25
39
  - **DiD analysis**:
26
40
  - Perfom standard DiD analysis
27
- - Model Extensions:
41
+ - Model extensions:
28
42
  - Staggered adoption
29
43
  - Multiple treatments
30
44
  - Two-way fixed effects models
31
45
  - Group- or individual-specific treatment effects
32
46
  - Group- or individual-specific time trends
33
47
  - Including covariates
34
- - After-treatment period
48
+ - Including fter-treatment period
35
49
  - Triple Difference (DDD)
36
50
  - Own counterfactuals
37
- - Bonferroni correction
51
+ - Bonferroni correction for treatment effects
52
+ - Placebo test
38
53
  - **Visualization**:
39
54
  - Plot observed and expected time course of treatment and control group
40
55
  - Plot expected time course of treatment group and counterfactual
@@ -46,7 +61,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
46
61
  - Test for type of adoption
47
62
  - Test whether the panel dataset is balanced
48
63
  - Test for parallel trend assumption
49
- - Placebo test
50
64
 
51
65
 
52
66
  ## Literature
@@ -16,17 +16,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
16
16
  - Create predictive counterfactuals
17
17
  - **DiD analysis**:
18
18
  - Perfom standard DiD analysis
19
- - Model Extensions:
19
+ - Model extensions:
20
20
  - Staggered adoption
21
21
  - Multiple treatments
22
22
  - Two-way fixed effects models
23
23
  - Group- or individual-specific treatment effects
24
24
  - Group- or individual-specific time trends
25
25
  - Including covariates
26
- - After-treatment period
26
+ - Including fter-treatment period
27
27
  - Triple Difference (DDD)
28
28
  - Own counterfactuals
29
- - Bonferroni correction
29
+ - Bonferroni correction for treatment effects
30
+ - Placebo test
30
31
  - **Visualization**:
31
32
  - Plot observed and expected time course of treatment and control group
32
33
  - Plot expected time course of treatment group and counterfactual
@@ -38,7 +39,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
38
39
  - Test for type of adoption
39
40
  - Test whether the panel dataset is balanced
40
41
  - Test for parallel trend assumption
41
- - Placebo test
42
42
 
43
43
 
44
44
  ## Literature
@@ -1,11 +1,13 @@
1
- #-------------------------------------------------------------------------------
2
- # Name: didanalysis (diffindiff)
1
+ #-----------------------------------------------------------------------
2
+ # Name: didanalysis (diffindiff package)
3
3
  # Purpose: Analysis functions for difference-in-differences analyses
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.2
6
- # Last update: 2025-04-16 17:10
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.4
8
+ # Last update: 2025-04-18 15:55
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #-------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
9
11
 
10
12
 
11
13
  import pandas as pd
@@ -163,11 +165,11 @@ class DiffModel:
163
165
  treatment_diagnostics_rows.append({
164
166
  "Treatment": value["treatment"],
165
167
  "Type of adoption": adoption_type,
166
- "No-treatment control group": no_treatment,
167
- "Treatment group (N)": treatment_group_size,
168
- "Control group (N)": control_group_size,
168
+ "No-treatment control group": no_treatment,
169
169
  "Parallel trends (pre)": is_parallel,
170
- "Format": value["treatment_format"]
170
+ "Format": value["treatment_format"],
171
+ "Treatment group (N)": treatment_group_size,
172
+ "Control group (N)": control_group_size
171
173
  })
172
174
 
173
175
  if no_treatment == "NO" and adoption_type == "Simultaneous":
@@ -366,7 +368,7 @@ class DiffModel:
366
368
 
367
369
  for key, value in covariates_effects.items():
368
370
  covariates_effects_rows.append({
369
- "Covariates": value["Coefficient"],
371
+ "": value["Coefficient"],
370
372
  "Estimate": value["Estimate"],
371
373
  "SE": value["SE"],
372
374
  "t": value["t"],
@@ -525,8 +527,9 @@ class DiffModel:
525
527
  covariates_effects_df["CI lower"] = covariates_effects_df["CI lower"].map(lambda x: f"{x:,.3f}")
526
528
  covariates_effects_df["CI upper"] = covariates_effects_df["CI upper"].map(lambda x: f"{x:,.3f}")
527
529
  covariates_effects_df.iloc[:, 0] = covariates_effects_df.iloc[:, 0].apply(lambda x: f"{x:<{max_width_column1}}")
530
+ print("Covariates")
528
531
  print(covariates_effects_df.to_string(index=False))
529
- if not show_covariates:
532
+ if not show_covariates or no_covariates == 0:
530
533
  if no_covariates > 0:
531
534
  print ("Covariates YES")
532
535
  else:
@@ -566,6 +569,13 @@ class DiffModel:
566
569
  index = treatment_diagnostics_df.columns)
567
570
  treatment_diagnostics_df_t = treatment_diagnostics_df_t.iloc[1:]
568
571
  print(treatment_diagnostics_df_t)
572
+ if model_config["no_treatments"] > 1:
573
+ untreated = diffindiff.didtools.untreated_units(
574
+ data = model_data,
575
+ unit_col = model_config["unit_col"],
576
+ treatment_col = model_config["treatment_col"]
577
+ )
578
+ print ("Units with >=1 treatment(s): " + str(untreated[0]) + ", non-treated units: " + str(untreated[1]))
569
579
 
570
580
  print("-" * total_width)
571
581
  print ("Input data diagnostics")
@@ -1,11 +1,14 @@
1
- #-------------------------------------------------------------------------------
2
- # Name: diddata (diffindiff)
1
+ #-----------------------------------------------------------------------
2
+ # Name: diddata (diffindiff package)
3
3
  # Purpose: Creating data for Difference-in-Differences Analysis
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.2
6
- # Last update: 2025-04-16 17:10
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.4
8
+ # Last update: 2025-04-18 15:38
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #-------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
11
+
9
12
 
10
13
  import pandas as pd
11
14
  import numpy as np
@@ -1,11 +1,13 @@
1
- # -------------------------------------------------------------------------------
2
- # Name: didtools (diffindiff)
3
- # Purpose: Creating data for Difference-in-Differences Analysis
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.2
6
- # Last update: 2025-04-16 17:10
1
+ #-----------------------------------------------------------------------
2
+ # Name: didtools (diffindiff package)
3
+ # Purpose: Additional tools for Difference-in-Differences Analysis
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.4
8
+ # Last update: 2025-04-18 15:38
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #-------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
9
11
 
10
12
 
11
13
  import pandas as pd
@@ -34,11 +36,11 @@ def check_columns(
34
36
  raise ValueError(f"Data do not contain column(s): {', '.join(missing_columns)}")
35
37
 
36
38
  def is_balanced(
37
- data,
38
- unit_col,
39
- time_col,
40
- outcome_col,
41
- other_cols = None
39
+ data: pd.DataFrame,
40
+ unit_col: str,
41
+ time_col: str,
42
+ outcome_col: str,
43
+ other_cols: list = None
42
44
  ):
43
45
 
44
46
  unit_freq = data[unit_col].nunique()
@@ -58,8 +60,8 @@ def is_balanced(
58
60
  return True
59
61
 
60
62
  def is_binary(
61
- data,
62
- treatment_col
63
+ data: pd.DataFrame,
64
+ treatment_col: str
63
65
  ):
64
66
 
65
67
  unique_values = set(data[treatment_col].dropna().unique())
@@ -76,7 +78,7 @@ def is_binary(
76
78
  return [False, "Unknown"]
77
79
 
78
80
  def is_missing(
79
- data,
81
+ data: pd.DataFrame,
80
82
  drop_missing: bool = True,
81
83
  missing_replace_by_zero: bool = False
82
84
  ):
@@ -104,10 +106,10 @@ def is_missing(
104
106
  ]
105
107
 
106
108
  def is_simultaneous(
107
- data,
108
- unit_col,
109
- time_col,
110
- treatment_col,
109
+ data: pd.DataFrame,
110
+ unit_col: str,
111
+ time_col: str,
112
+ treatment_col: str,
111
113
  pre_post = False
112
114
  ):
113
115
 
@@ -125,9 +127,9 @@ def is_simultaneous(
125
127
  return col_identical
126
128
 
127
129
  def is_notreatment(
128
- data,
129
- unit_col,
130
- treatment_col
130
+ data: pd.DataFrame,
131
+ unit_col: str,
132
+ treatment_col: str
131
133
  ):
132
134
 
133
135
  data_relevant = data[[unit_col, treatment_col]]
@@ -150,12 +152,63 @@ def is_notreatment(
150
152
  control_group
151
153
  ]
152
154
 
155
+ def treatment_group_col(
156
+ data: pd.DataFrame,
157
+ unit_col: str,
158
+ treatment_col: str,
159
+ create_TG_col: str = "TG"
160
+ ):
161
+
162
+ isnotreatment = is_notreatment(
163
+ data = data,
164
+ unit_col = unit_col,
165
+ treatment_col = treatment_col
166
+ )
167
+
168
+ if not isnotreatment[0]:
169
+ print ("Model data does not contain a no-treatment control group. Treatment group column is constant = 1.")
170
+
171
+ if create_TG_col in data.columns:
172
+ create_TG_col = "TG_"+treatment_col
173
+ print ("Column " + create_TG_col + " already exists. Saving treatment group in column TG_" + treatment_col)
174
+
175
+ treatment_group = isnotreatment[1]
176
+
177
+ data[create_TG_col] = 0
178
+ data.loc[data[unit_col].astype(str).isin(treatment_group), create_TG_col] = 1
179
+
180
+ return [
181
+ data,
182
+ isnotreatment[0],
183
+ create_TG_col
184
+ ]
185
+
186
+ def untreated_units(
187
+ data: pd.DataFrame,
188
+ unit_col: str,
189
+ treatment_col: list
190
+ ):
191
+
192
+ unit_sum = data.groupby(unit_col)[treatment_col].sum().sum(axis=1).reset_index(name="sum")
193
+
194
+ units_treated = unit_sum.loc[unit_sum["sum"] > 0, unit_col]
195
+ units_nontreated = unit_sum.loc[unit_sum["sum"] == 0, unit_col]
196
+ no_units_treated = len(units_treated)
197
+ no_units_nontreated = len(units_nontreated)
198
+
199
+ return [
200
+ no_units_treated,
201
+ no_units_nontreated,
202
+ units_treated,
203
+ units_nontreated
204
+ ]
205
+
153
206
  def is_parallel(
154
- data,
155
- unit_col,
156
- time_col,
157
- treatment_col,
158
- outcome_col,
207
+ data: pd.DataFrame,
208
+ unit_col: str,
209
+ time_col: str,
210
+ treatment_col: str,
211
+ outcome_col: str,
159
212
  pre_post = False,
160
213
  alpha = 0.05
161
214
  ):
@@ -206,10 +259,10 @@ def is_parallel(
206
259
  ]
207
260
 
208
261
  def date_counter(
209
- df,
210
- date_col,
211
- new_col = "date_counter"
212
- ):
262
+ df: pd.DataFrame,
263
+ date_col: str,
264
+ new_col: str = "date_counter"
265
+ ):
213
266
 
214
267
  dates = df[date_col].unique()
215
268
 
@@ -226,6 +279,7 @@ def date_counter(
226
279
  return df
227
280
 
228
281
  def unique(data):
282
+
229
283
  if data is None or (isinstance(data, (list, np.ndarray, pd.Series, pd.DataFrame)) and len(data) == 0):
230
284
  return []
231
285
 
@@ -269,8 +323,9 @@ def model_wrapper(
269
323
  lgbm_learning_rate = 0.1,
270
324
  random_state = 71
271
325
  ):
272
- if model_type not in ["ols", "olsbg", "dtbg", "rf", "gb", "knn", "svr", "xgb", "lgbm", "catboost"]:
273
- raise ValueError("Please enter a valid model type")
326
+
327
+ if model_type not in ["ols", "olsbg", "dtbg", "rf", "gb", "knn", "svr", "xgb", "lgbm"]:
328
+ raise ValueError("Please enter a valid model type ('ols', 'olsbg', 'dtbg', 'rf', 'gb', 'knn', 'svr', 'xgb', 'lgbm')")
274
329
 
275
330
  X_train, X_test, y_train, y_test = train_test_split(
276
331
  X,
@@ -348,10 +403,10 @@ def model_wrapper(
348
403
  ]
349
404
 
350
405
  def treatment_times(
351
- data,
352
- unit_col,
353
- time_col,
354
- treatment_col
406
+ data: pd.DataFrame,
407
+ unit_col: str,
408
+ time_col: str,
409
+ treatment_col: str
355
410
  ):
356
411
 
357
412
  check_columns(
@@ -389,10 +444,10 @@ def clean_column_name(value):
389
444
  return value.strip('_')
390
445
 
391
446
  def to_dummies(
392
- data,
393
- col,
394
- drop_first = False,
395
- prefix = "DUMMY"
447
+ data: pd.DataFrame,
448
+ col: str,
449
+ drop_first: bool = False,
450
+ prefix: str = "DUMMY"
396
451
  ):
397
452
 
398
453
  unique_values = data[col].astype(str).unique()
@@ -1,16 +1,20 @@
1
- #------------------------------------------------------------------------------------------
2
- # Name: tests_diffindiff
1
+ #-----------------------------------------------------------------------
2
+ # Name: tests_diffindiff (diffindiff package)
3
3
  # Purpose: Tests and examples for the diffindiff package
4
- # Author: Thomas Wieland (mail: geowieland@googlemail.com, ORCID: 0000-0001-5168-9846)
5
- # Version: 2.0.2
6
- # Last update: 2025-04-16 17:10
4
+ # Author: Thomas Wieland
5
+ # ORCID: 0000-0001-5168-9846
6
+ # mail: geowieland@googlemail.com
7
+ # Version: 2.0.4
8
+ # Last update: 2025-04-18 15:38
7
9
  # Copyright (c) 2025 Thomas Wieland
8
- #------------------------------------------------------------------------------------------
10
+ #-----------------------------------------------------------------------
9
11
 
10
12
 
11
13
  import pandas as pd
12
14
  from diffindiff.didanalysis import DiffModel, did_analysis
13
15
  from diffindiff.diddata import DiffGroups, create_groups, DiffTreatment, create_treatment, DiffData, merge_data, create_data
16
+ from diffindiff.didtools import untreated_units
17
+
14
18
 
15
19
  # Example 1: Effect of a curfew in German counties in the first
16
20
  # wave of the COVID-19 pandemic (DiD pre-post analysis)
@@ -1,10 +1,24 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: diffindiff
3
- Version: 2.0.2
3
+ Version: 2.0.4
4
4
  Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
7
7
  Description-Content-Type: text/markdown
8
+ Requires-Dist: numpy
9
+ Requires-Dist: pandas
10
+ Requires-Dist: statsmodels
11
+ Requires-Dist: matplotlib
12
+ Requires-Dist: datetime
13
+ Requires-Dist: scikit-learn
14
+ Requires-Dist: xgboost
15
+ Requires-Dist: lightgbm
16
+ Dynamic: author
17
+ Dynamic: author-email
18
+ Dynamic: description
19
+ Dynamic: description-content-type
20
+ Dynamic: requires-dist
21
+ Dynamic: summary
8
22
 
9
23
  # diffindiff: Difference-in-Differences (DiD) Analysis Python Library
10
24
 
@@ -24,17 +38,18 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
24
38
  - Create predictive counterfactuals
25
39
  - **DiD analysis**:
26
40
  - Perfom standard DiD analysis
27
- - Model Extensions:
41
+ - Model extensions:
28
42
  - Staggered adoption
29
43
  - Multiple treatments
30
44
  - Two-way fixed effects models
31
45
  - Group- or individual-specific treatment effects
32
46
  - Group- or individual-specific time trends
33
47
  - Including covariates
34
- - After-treatment period
48
+ - Including fter-treatment period
35
49
  - Triple Difference (DDD)
36
50
  - Own counterfactuals
37
- - Bonferroni correction
51
+ - Bonferroni correction for treatment effects
52
+ - Placebo test
38
53
  - **Visualization**:
39
54
  - Plot observed and expected time course of treatment and control group
40
55
  - Plot expected time course of treatment group and counterfactual
@@ -46,7 +61,6 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
46
61
  - Test for type of adoption
47
62
  - Test whether the panel dataset is balanced
48
63
  - Test for parallel trend assumption
49
- - Placebo test
50
64
 
51
65
 
52
66
  ## Literature
@@ -7,7 +7,7 @@ def read_README():
7
7
 
8
8
  setup(
9
9
  name='diffindiff',
10
- version='2.0.2',
10
+ version='2.0.4',
11
11
  description='diffindiff: Python library for convenient Difference-in-Differences Analyses',
12
12
  packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
13
13
  include_package_data=True,
File without changes
File without changes