diffindiff 2.2.6__tar.gz → 2.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffindiff-2.2.6 → diffindiff-2.2.7}/PKG-INFO +7 -9
- {diffindiff-2.2.6 → diffindiff-2.2.7}/README.md +6 -8
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/config.py +6 -4
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/didanalysis.py +12 -6
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/didanalysis_helper.py +8 -4
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/diddata.py +158 -23
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/didtools.py +48 -14
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff.egg-info/PKG-INFO +7 -9
- {diffindiff-2.2.6 → diffindiff-2.2.7}/setup.py +1 -1
- {diffindiff-2.2.6 → diffindiff-2.2.7}/MANIFEST.in +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/__init__.py +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/tests/__init__.py +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/tests/data/Corona_Hesse.xlsx +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/tests/data/counties_DE.csv +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/tests/data/curfew_DE.csv +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/tests/tests_diffindiff.py +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff.egg-info/SOURCES.txt +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff.egg-info/dependency_links.txt +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff.egg-info/requires.txt +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff.egg-info/top_level.txt +0 -0
- {diffindiff-2.2.6 → diffindiff-2.2.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.7
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -27,7 +27,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
27
27
|
|
|
28
28
|
If you use this software, please cite:
|
|
29
29
|
|
|
30
|
-
Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.
|
|
30
|
+
Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.7) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
## Installation
|
|
@@ -167,11 +167,9 @@ See the /tests directory for usage examples of most of the included functions.
|
|
|
167
167
|
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
168
168
|
|
|
169
169
|
|
|
170
|
-
## What's new (v2.2.
|
|
170
|
+
## What's new (v2.2.7)
|
|
171
|
+
- Functions
|
|
172
|
+
- diddata.DiffData.define_treatment() for constructing a new treatment from a column in the dataframe
|
|
171
173
|
- Bugfixes:
|
|
172
|
-
-
|
|
173
|
-
-
|
|
174
|
-
- Removed unnecessary old dependencies and imports
|
|
175
|
-
- Other:
|
|
176
|
-
- Changed diddata.DiffGroups.add_segmentation() to return a message rather than raising an exception when the DiffGroups object already includes a benefit group
|
|
177
|
-
|
|
174
|
+
- didtools.treatment_times() and didtools.is_multiple_treatment_period() now also identify continuous treatments correctly
|
|
175
|
+
- Fixed problematic type conversion in didtools.fit_metrics()
|
|
@@ -19,7 +19,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
19
19
|
|
|
20
20
|
If you use this software, please cite:
|
|
21
21
|
|
|
22
|
-
Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.
|
|
22
|
+
Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.7) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
## Installation
|
|
@@ -159,11 +159,9 @@ See the /tests directory for usage examples of most of the included functions.
|
|
|
159
159
|
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
160
160
|
|
|
161
161
|
|
|
162
|
-
## What's new (v2.2.
|
|
162
|
+
## What's new (v2.2.7)
|
|
163
|
+
- Functions
|
|
164
|
+
- diddata.DiffData.define_treatment() for constructing a new treatment from a column in the dataframe
|
|
163
165
|
- Bugfixes:
|
|
164
|
-
-
|
|
165
|
-
-
|
|
166
|
-
- Removed unnecessary old dependencies and imports
|
|
167
|
-
- Other:
|
|
168
|
-
- Changed diddata.DiffGroups.add_segmentation() to return a message rather than raising an exception when the DiffGroups object already includes a benefit group
|
|
169
|
-
|
|
166
|
+
- didtools.treatment_times() and didtools.is_multiple_treatment_period() now also identify continuous treatments correctly
|
|
167
|
+
- Fixed problematic type conversion in didtools.fit_metrics()
|
|
@@ -4,23 +4,25 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 1.0.
|
|
8
|
-
# Last update: 2026-02-
|
|
7
|
+
# Version: 1.0.6
|
|
8
|
+
# Last update: 2026-02-26 18:04
|
|
9
9
|
# Copyright (c) 2025-2026 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
12
12
|
# Basic config:
|
|
13
13
|
|
|
14
14
|
PACKAGE_NAME = "diffindiff"
|
|
15
|
-
PACKAGE_VERSION = "2.2.
|
|
15
|
+
PACKAGE_VERSION = "2.2.7"
|
|
16
16
|
|
|
17
|
-
VERBOSE =
|
|
17
|
+
VERBOSE = True
|
|
18
18
|
|
|
19
19
|
ROUND_STATISTIC = 3
|
|
20
20
|
ROUND_PERCENT = 2
|
|
21
21
|
|
|
22
22
|
AUTO_SWITCH_TO_PREPOST = True
|
|
23
23
|
|
|
24
|
+
ACCEPT_CONTINUOUS_TREATMENTS = True
|
|
25
|
+
|
|
24
26
|
# Description texts:
|
|
25
27
|
|
|
26
28
|
DID_DESCRIPTION = "Difference-in-Differences Analysis"
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 2.2.
|
|
8
|
-
# Last update: 2026-02-
|
|
7
|
+
# Version: 2.2.4
|
|
8
|
+
# Last update: 2026-02-26 18:04
|
|
9
9
|
# Copyright (c) 2024-2026 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -1356,8 +1356,8 @@ def did_analysis(
|
|
|
1356
1356
|
missing_replace_by_zero: bool = False,
|
|
1357
1357
|
fit_by = "ols_fit",
|
|
1358
1358
|
verbose: bool = config.VERBOSE
|
|
1359
|
-
):
|
|
1360
|
-
|
|
1359
|
+
):
|
|
1360
|
+
|
|
1361
1361
|
tools.check_columns(
|
|
1362
1362
|
df = data,
|
|
1363
1363
|
columns = [
|
|
@@ -1384,6 +1384,12 @@ def did_analysis(
|
|
|
1384
1384
|
verbose = verbose
|
|
1385
1385
|
)
|
|
1386
1386
|
|
|
1387
|
+
tools.is_numeric(
|
|
1388
|
+
df = data,
|
|
1389
|
+
columns = treatment_col,
|
|
1390
|
+
verbose = verbose
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1387
1393
|
cols_relevant = [
|
|
1388
1394
|
unit_col,
|
|
1389
1395
|
time_col,
|
|
@@ -1807,7 +1813,7 @@ def did_analysis(
|
|
|
1807
1813
|
}
|
|
1808
1814
|
|
|
1809
1815
|
if bonferroni:
|
|
1810
|
-
confint_alpha = confint_alpha/no_treatments
|
|
1816
|
+
confint_alpha = confint_alpha/no_treatments
|
|
1811
1817
|
|
|
1812
1818
|
if fit_by == "ml":
|
|
1813
1819
|
fit_result = helper.ml_fit(
|
|
@@ -1824,7 +1830,7 @@ def did_analysis(
|
|
|
1824
1830
|
cluster_SE_by = cluster_SE_by,
|
|
1825
1831
|
verbose = verbose
|
|
1826
1832
|
)
|
|
1827
|
-
|
|
1833
|
+
|
|
1828
1834
|
model_results = helper.extract_model_results(
|
|
1829
1835
|
fit_result = fit_result,
|
|
1830
1836
|
TG_col = TG_col,
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 1.0.
|
|
8
|
-
# Last update: 2025-02-
|
|
7
|
+
# Version: 1.0.7
|
|
8
|
+
# Last update: 2025-02-26 18:02
|
|
9
9
|
# Copyright (c) 2025-2026 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -203,7 +203,7 @@ def create_spillover(
|
|
|
203
203
|
time_col = time_col,
|
|
204
204
|
treatment_col = treatment,
|
|
205
205
|
create_TT_col = TT_col,
|
|
206
|
-
verbose =
|
|
206
|
+
verbose = False
|
|
207
207
|
)[0]
|
|
208
208
|
|
|
209
209
|
sp_unit_col = f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"
|
|
@@ -396,7 +396,11 @@ def treatment_diagnostics(
|
|
|
396
396
|
)
|
|
397
397
|
|
|
398
398
|
if verbose:
|
|
399
|
-
|
|
399
|
+
|
|
400
|
+
if no_treatments > 1:
|
|
401
|
+
print(f"There are {no_treatments} treatments (simultaneous: {no_treatments-staggered_count}, staggered: {staggered_count}) with {untreated[0]} treated and {untreated[1]} untreated units.")
|
|
402
|
+
else:
|
|
403
|
+
print(f"There is {no_treatments} treatment (staggered: {staggered_count}) with {untreated[0]} treated and {untreated[1]} untreated units.")
|
|
400
404
|
|
|
401
405
|
return [
|
|
402
406
|
treatment_diagnostics_results,
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 2.1.
|
|
8
|
-
# Last update: 2026-02-
|
|
7
|
+
# Version: 2.1.8
|
|
8
|
+
# Last update: 2026-02-26 18:30
|
|
9
9
|
# Copyright (c) 2024-2026 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -486,7 +486,7 @@ class DiffData:
|
|
|
486
486
|
variables: list = None,
|
|
487
487
|
unit_col: str = None,
|
|
488
488
|
time_col: str = None,
|
|
489
|
-
verbose: bool =
|
|
489
|
+
verbose: bool = False
|
|
490
490
|
):
|
|
491
491
|
|
|
492
492
|
if unit_col is None and time_col is None:
|
|
@@ -579,6 +579,7 @@ class DiffData:
|
|
|
579
579
|
|
|
580
580
|
self.data[0] = did_modeldata
|
|
581
581
|
self.data[5] = variables
|
|
582
|
+
self.data[7][len(self.data[7])] = helper.create_timestamp(function="add_covariates")
|
|
582
583
|
|
|
583
584
|
if verbose:
|
|
584
585
|
print("OK")
|
|
@@ -622,7 +623,6 @@ class DiffData:
|
|
|
622
623
|
groups_data_old = did_groups_old.get_data()
|
|
623
624
|
|
|
624
625
|
did_modeldata_old = self.get_did_modeldata_df()
|
|
625
|
-
unit_id_col, time_col = self.get_unit_time_cols()
|
|
626
626
|
outcome_col_original = self.data[3]
|
|
627
627
|
unit_time_col_original = self.get_unit_time_cols()
|
|
628
628
|
covariates = self.get_covariates()
|
|
@@ -728,21 +728,157 @@ class DiffData:
|
|
|
728
728
|
timestamp = helper.create_timestamp(function="add_treatment")
|
|
729
729
|
)
|
|
730
730
|
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
731
|
+
if verbose:
|
|
732
|
+
print("OK")
|
|
733
|
+
|
|
734
|
+
self.data[0] = did_modeldata_new
|
|
735
|
+
self.data[1] = groups_new
|
|
736
|
+
self.data[2] = treatment_new
|
|
737
|
+
self.data[3] = outcome_col_original
|
|
738
|
+
self.data[4] = unit_time_col_original
|
|
739
|
+
self.data[5] = covariates
|
|
740
|
+
self.data[6] = treatment_cols_new
|
|
741
|
+
self.data[7][len(self.data[7])] = helper.create_timestamp(function="add_treatment")
|
|
742
|
+
|
|
743
|
+
return self
|
|
744
|
+
|
|
745
|
+
def define_treatment(
|
|
746
|
+
self,
|
|
747
|
+
treatment_name,
|
|
748
|
+
after_treatment_period: bool = False,
|
|
749
|
+
after_treatment_name = None,
|
|
750
|
+
verbose: bool = config.VERBOSE
|
|
751
|
+
):
|
|
752
|
+
|
|
753
|
+
if not treatment_name:
|
|
754
|
+
raise ValueError("When adding a treatment from the data, you need to specify a treatment column with parameter treament_name = [your_treatment].")
|
|
755
|
+
|
|
756
|
+
if treatment_name not in self.get_did_modeldata_df().columns:
|
|
757
|
+
raise KeyError(f"Column '{treatment_name}' not in data frame")
|
|
758
|
+
|
|
759
|
+
did_treatment_old = self.get_did_treatment()
|
|
760
|
+
treatment_config_old = did_treatment_old.get_config()
|
|
761
|
+
treatment_meta_old = did_treatment_old.get_metadata()
|
|
762
|
+
no_treatments_old = treatment_meta_old["no_treatments"]
|
|
763
|
+
|
|
764
|
+
did_groups_old = self.get_did_groups()
|
|
765
|
+
groups_config_old = did_groups_old.get_config()
|
|
766
|
+
groups_data_old = did_groups_old.get_data()
|
|
767
|
+
|
|
768
|
+
did_modeldata_old = self.get_did_modeldata_df()
|
|
769
|
+
outcome_col_original = self.data[3]
|
|
770
|
+
unit_time_col_original = self.get_unit_time_cols()
|
|
771
|
+
covariates = self.get_covariates()
|
|
772
|
+
|
|
773
|
+
treatment_cols = self.get_treatment_cols()
|
|
774
|
+
treatment_cols_new = treatment_cols
|
|
775
|
+
|
|
776
|
+
no_treatments = no_treatments_old+1
|
|
777
|
+
key_counter = no_treatments-1
|
|
778
|
+
|
|
779
|
+
tt = tools.treatment_times(
|
|
780
|
+
data = did_modeldata_old,
|
|
781
|
+
unit_col=config.UNIT_COL,
|
|
782
|
+
time_col=config.TIME_COL,
|
|
783
|
+
treatment_col=treatment_name,
|
|
784
|
+
verbose=verbose
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
tt_date = [datetime.strptime(t, treatment_meta_old["date_format"]) for t in tt[1]]
|
|
788
|
+
treatment_period_start = min(tt_date)
|
|
789
|
+
treatment_period_end = max(tt_date)
|
|
790
|
+
treatment_period_start = treatment_period_start.strftime("%Y-%m-%d")
|
|
791
|
+
treatment_period_end = treatment_period_end.strftime("%Y-%m-%d")
|
|
792
|
+
|
|
793
|
+
is_notreatment_result = tools.is_notreatment(
|
|
794
|
+
data = did_modeldata_old,
|
|
795
|
+
unit_col=config.UNIT_COL,
|
|
796
|
+
treatment_col=treatment_name,
|
|
797
|
+
verbose = verbose
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
treatment_group = is_notreatment_result[1]
|
|
801
|
+
control_group = is_notreatment_result[2]
|
|
802
|
+
|
|
803
|
+
if verbose:
|
|
804
|
+
print(f"Constructing treatment from column '{treatment_name}'", end = " ... ")
|
|
805
|
+
|
|
806
|
+
new_groups = create_groups(
|
|
807
|
+
treatment_group = treatment_group,
|
|
808
|
+
control_group = control_group,
|
|
809
|
+
treatment_name = treatment_name,
|
|
810
|
+
verbose=False
|
|
740
811
|
)
|
|
812
|
+
new_groups_data_df = new_groups.get_data()[0]
|
|
813
|
+
new_groups_config = new_groups.get_config()
|
|
814
|
+
TG_col = new_groups_config[0]["TG_col"]
|
|
815
|
+
|
|
816
|
+
new_treatment = create_treatment(
|
|
817
|
+
study_period = [treatment_meta_old["study_period_start"], treatment_meta_old["study_period_end"]],
|
|
818
|
+
treatment_period = [treatment_period_start, treatment_period_end],
|
|
819
|
+
freq = treatment_meta_old["frequency"],
|
|
820
|
+
date_format = treatment_meta_old["date_format"],
|
|
821
|
+
treatment_name = treatment_name,
|
|
822
|
+
pre_post = treatment_meta_old["pre_post"],
|
|
823
|
+
after_treatment_period = after_treatment_period,
|
|
824
|
+
verbose=False
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
new_treatment_data_df = new_treatment.get_data()
|
|
828
|
+
|
|
829
|
+
new_treatment_config = new_treatment.get_config()
|
|
830
|
+
TT_col = new_treatment_config[0]["TT_col"]
|
|
831
|
+
ATT_col = new_treatment_config[0]["ATT_col"]
|
|
832
|
+
|
|
833
|
+
treatment_cols_new[key_counter] = {
|
|
834
|
+
"TT_col": TT_col,
|
|
835
|
+
"ATT_col": ATT_col,
|
|
836
|
+
"treatment_name": treatment_name,
|
|
837
|
+
"after_treatment_name": after_treatment_name
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
groups_config_new = groups_config_old
|
|
841
|
+
groups_config_new[key_counter] = new_groups_config[0]
|
|
842
|
+
groups_data_new = groups_data_old
|
|
843
|
+
groups_data_old.append(new_groups_data_df)
|
|
844
|
+
groups_new = DiffGroups(
|
|
845
|
+
groups_data_new,
|
|
846
|
+
groups_config_new,
|
|
847
|
+
timestamp = helper.create_timestamp(function="define_treatment")
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
treatment_meta_new = treatment_meta_old
|
|
851
|
+
treatment_meta_new["no_treatments"] = no_treatments
|
|
852
|
+
treatment_config_new = treatment_config_old
|
|
853
|
+
treatment_config_new[key_counter] = new_treatment_config[0]
|
|
854
|
+
|
|
855
|
+
treatment_new = DiffTreatment(
|
|
856
|
+
new_treatment_data_df,
|
|
857
|
+
treatment_config_new,
|
|
858
|
+
treatment_meta_new,
|
|
859
|
+
timestamp = helper.create_timestamp(function="define_treatment")
|
|
860
|
+
)
|
|
741
861
|
|
|
742
862
|
if verbose:
|
|
743
863
|
print("OK")
|
|
744
|
-
|
|
745
|
-
|
|
864
|
+
|
|
865
|
+
if treatment_name in covariates:
|
|
866
|
+
|
|
867
|
+
if verbose:
|
|
868
|
+
print(f"NOTE: Column '{treatment_name}' was defined as covariate before and is now removed from covariates list.")
|
|
869
|
+
|
|
870
|
+
covariates.remove(treatment_name)
|
|
871
|
+
|
|
872
|
+
self.data[0] = did_modeldata_old
|
|
873
|
+
self.data[1] = groups_new
|
|
874
|
+
self.data[2] = treatment_new
|
|
875
|
+
self.data[3] = outcome_col_original
|
|
876
|
+
self.data[4] = unit_time_col_original
|
|
877
|
+
self.data[5] = covariates
|
|
878
|
+
self.data[6] = treatment_cols_new
|
|
879
|
+
self.data[7][len(self.data[7])] = helper.create_timestamp(function="define_treatment")
|
|
880
|
+
|
|
881
|
+
return self
|
|
746
882
|
|
|
747
883
|
def add_segmentation(
|
|
748
884
|
self,
|
|
@@ -979,8 +1115,8 @@ class DiffData:
|
|
|
979
1115
|
if value["after_treatment_name"] is not None:
|
|
980
1116
|
after_treatment_col[key] = value["after_treatment_name"]
|
|
981
1117
|
if value["ATT_col"] is not None:
|
|
982
|
-
ATT_col[key] = value["ATT_col"]
|
|
983
|
-
|
|
1118
|
+
ATT_col[key] = value["ATT_col"]
|
|
1119
|
+
|
|
984
1120
|
did_results = didanalysis.did_analysis(
|
|
985
1121
|
data = did_modeldata,
|
|
986
1122
|
TG_col = TG_col,
|
|
@@ -1038,9 +1174,6 @@ def merge_data(
|
|
|
1038
1174
|
]
|
|
1039
1175
|
)
|
|
1040
1176
|
|
|
1041
|
-
if verbose:
|
|
1042
|
-
print("Merging groups and treatment data", end = " ... ")
|
|
1043
|
-
|
|
1044
1177
|
groups_data_df = diff_groups.get_data()
|
|
1045
1178
|
groups_data_df = groups_data_df[0]
|
|
1046
1179
|
|
|
@@ -1096,6 +1229,9 @@ def merge_data(
|
|
|
1096
1229
|
verbose=verbose
|
|
1097
1230
|
)
|
|
1098
1231
|
|
|
1232
|
+
if verbose:
|
|
1233
|
+
print("Merging groups and treatment data", end = " ... ")
|
|
1234
|
+
|
|
1099
1235
|
if keep_columns:
|
|
1100
1236
|
outcome_data_short = outcome_data
|
|
1101
1237
|
else:
|
|
@@ -1129,7 +1265,8 @@ def merge_data(
|
|
|
1129
1265
|
}
|
|
1130
1266
|
}
|
|
1131
1267
|
|
|
1132
|
-
timestamp =
|
|
1268
|
+
timestamp = {}
|
|
1269
|
+
timestamp[0] = helper.create_timestamp(function="merge_data")
|
|
1133
1270
|
|
|
1134
1271
|
did_data_all = DiffData(
|
|
1135
1272
|
did_modeldata,
|
|
@@ -1196,8 +1333,6 @@ def create_data(
|
|
|
1196
1333
|
verbose = verbose
|
|
1197
1334
|
)
|
|
1198
1335
|
|
|
1199
|
-
did_data_all.timestamp = helper.create_timestamp(function="create_data")
|
|
1200
|
-
|
|
1201
1336
|
return did_data_all
|
|
1202
1337
|
|
|
1203
1338
|
def create_counterfactual(
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
# Author: Thomas Wieland
|
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
|
6
6
|
# mail: geowieland@googlemail.com
|
|
7
|
-
# Version: 2.1.
|
|
8
|
-
# Last update: 2026-02-
|
|
7
|
+
# Version: 2.1.6
|
|
8
|
+
# Last update: 2026-02-26 18:33
|
|
9
9
|
# Copyright (c) 2025-2026 Thomas Wieland
|
|
10
10
|
#-----------------------------------------------------------------------
|
|
11
11
|
|
|
@@ -46,6 +46,30 @@ def check_columns(
|
|
|
46
46
|
if missing_columns:
|
|
47
47
|
raise KeyError(f"Data do not contain column(s): {', '.join(missing_columns)}")
|
|
48
48
|
|
|
49
|
+
def is_numeric(
|
|
50
|
+
df: pd.DataFrame,
|
|
51
|
+
columns: list,
|
|
52
|
+
verbose: bool = config.VERBOSE
|
|
53
|
+
):
|
|
54
|
+
|
|
55
|
+
if len(columns) > 0:
|
|
56
|
+
|
|
57
|
+
if verbose:
|
|
58
|
+
print(f"Checking if column(s) {', '.join(columns)} are numeric", end=" ... ")
|
|
59
|
+
|
|
60
|
+
non_numeric_columns = []
|
|
61
|
+
|
|
62
|
+
for col in columns:
|
|
63
|
+
|
|
64
|
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
|
65
|
+
non_numeric_columns.append(col)
|
|
66
|
+
|
|
67
|
+
if verbose:
|
|
68
|
+
print("OK")
|
|
69
|
+
|
|
70
|
+
if non_numeric_columns:
|
|
71
|
+
raise KeyError(f"Data contain non-numeric column(s): {', '.join(non_numeric_columns)}")
|
|
72
|
+
|
|
49
73
|
def panel_index(
|
|
50
74
|
data: pd.DataFrame,
|
|
51
75
|
unit_col: str,
|
|
@@ -527,8 +551,11 @@ def is_multiple_treatment_period(
|
|
|
527
551
|
unit_treatment = data_sub[treatment_col]
|
|
528
552
|
|
|
529
553
|
groups = (unit_treatment != unit_treatment.shift()).cumsum()
|
|
530
|
-
|
|
531
|
-
|
|
554
|
+
|
|
555
|
+
if config.ACCEPT_CONTINUOUS_TREATMENTS:
|
|
556
|
+
periods_count = (unit_treatment > 0).groupby(groups).any().sum()
|
|
557
|
+
else:
|
|
558
|
+
periods_count = (unit_treatment == 1).groupby(groups).any().sum()
|
|
532
559
|
|
|
533
560
|
unit_treatment_periods[unit] = int(periods_count)
|
|
534
561
|
|
|
@@ -636,25 +663,31 @@ def treatment_times(
|
|
|
636
663
|
verbose=verbose
|
|
637
664
|
)
|
|
638
665
|
|
|
639
|
-
is_multiple_treatment_period(
|
|
666
|
+
is_multiple_treatment_period_result = is_multiple_treatment_period(
|
|
640
667
|
data = data,
|
|
641
668
|
unit_col = unit_col,
|
|
642
669
|
treatment_col = treatment_col,
|
|
643
670
|
verbose = verbose
|
|
644
|
-
)
|
|
671
|
+
)
|
|
645
672
|
|
|
646
673
|
if verbose:
|
|
647
674
|
print(f"Identifying treatment times for treatment '{treatment_col}'", end = " ... ")
|
|
648
675
|
|
|
649
|
-
|
|
650
|
-
|
|
676
|
+
if config.ACCEPT_CONTINUOUS_TREATMENTS:
|
|
677
|
+
tt = list(unique(data.loc[data[treatment_col] > 0, time_col]))
|
|
678
|
+
else:
|
|
679
|
+
tt = list(unique(data.loc[data[treatment_col] == 1, time_col]))
|
|
680
|
+
|
|
651
681
|
units = unique(data[unit_col])
|
|
652
682
|
|
|
653
683
|
units_tt = pd.DataFrame(columns = [unit_col, "treatment_min", "treatment_max"])
|
|
654
684
|
|
|
655
685
|
for unit in units:
|
|
656
686
|
|
|
657
|
-
|
|
687
|
+
if config.ACCEPT_CONTINUOUS_TREATMENTS:
|
|
688
|
+
data_unit_tt = data[(data[unit_col] == unit) & (data[treatment_col] > 0)]
|
|
689
|
+
else:
|
|
690
|
+
data_unit_tt = data[(data[unit_col] == unit) & (data[treatment_col] == 1)]
|
|
658
691
|
|
|
659
692
|
if data_unit_tt.empty:
|
|
660
693
|
continue
|
|
@@ -678,7 +711,7 @@ def treatment_times(
|
|
|
678
711
|
|
|
679
712
|
if verbose:
|
|
680
713
|
print("OK")
|
|
681
|
-
|
|
714
|
+
|
|
682
715
|
return [
|
|
683
716
|
units_tt,
|
|
684
717
|
tt
|
|
@@ -796,9 +829,9 @@ def fit_metrics(
|
|
|
796
829
|
|
|
797
830
|
assert observed_no == expected_no, "Error while calculating fit metrics: Observed and expected differ in length"
|
|
798
831
|
|
|
799
|
-
if not pd.api.types.is_numeric_dtype(observed):
|
|
832
|
+
if not pd.api.types.is_numeric_dtype(observed) or not np.issubdtype(observed.dtype, np.number):
|
|
800
833
|
raise ValueError("Error while calculating fit metrics: Observed column is not numeric")
|
|
801
|
-
if not pd.api.types.is_numeric_dtype(expected):
|
|
834
|
+
if not pd.api.types.is_numeric_dtype(expected) or not np.issubdtype(expected.dtype, np.number):
|
|
802
835
|
raise ValueError("Error while calculating fit metrics: Expected column is not numeric")
|
|
803
836
|
|
|
804
837
|
if outcome_col is not None:
|
|
@@ -810,8 +843,8 @@ def fit_metrics(
|
|
|
810
843
|
|
|
811
844
|
if remove_nan:
|
|
812
845
|
|
|
813
|
-
observed =
|
|
814
|
-
expected =
|
|
846
|
+
observed = np.array(observed)
|
|
847
|
+
expected = np.array(expected)
|
|
815
848
|
|
|
816
849
|
obs_exp = pd.DataFrame(
|
|
817
850
|
{
|
|
@@ -968,6 +1001,7 @@ def check_date_format(
|
|
|
968
1001
|
|
|
969
1002
|
if len(invalid_dates) > 0:
|
|
970
1003
|
invalid_dates_included = True
|
|
1004
|
+
invalid_dates = [str(d) for d in invalid_dates]
|
|
971
1005
|
|
|
972
1006
|
return [
|
|
973
1007
|
invalid_dates_included,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: diffindiff
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.7
|
|
4
4
|
Summary: diffindiff: Python library for convenient Difference-in-Differences analyses
|
|
5
5
|
Author: Thomas Wieland
|
|
6
6
|
Author-email: geowieland@googlemail.com
|
|
@@ -27,7 +27,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
|
|
|
27
27
|
|
|
28
28
|
If you use this software, please cite:
|
|
29
29
|
|
|
30
|
-
Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.
|
|
30
|
+
Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.7) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
## Installation
|
|
@@ -167,11 +167,9 @@ See the /tests directory for usage examples of most of the included functions.
|
|
|
167
167
|
- Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
|
|
168
168
|
|
|
169
169
|
|
|
170
|
-
## What's new (v2.2.
|
|
170
|
+
## What's new (v2.2.7)
|
|
171
|
+
- Functions
|
|
172
|
+
- diddata.DiffData.define_treatment() for constructing a new treatment from a column in the dataframe
|
|
171
173
|
- Bugfixes:
|
|
172
|
-
-
|
|
173
|
-
-
|
|
174
|
-
- Removed unnecessary old dependencies and imports
|
|
175
|
-
- Other:
|
|
176
|
-
- Changed diddata.DiffGroups.add_segmentation() to return a message rather than raising an exception when the DiffGroups object already includes a benefit group
|
|
177
|
-
|
|
174
|
+
- didtools.treatment_times() and didtools.is_multiple_treatment_period() now also identify continuous treatments correctly
|
|
175
|
+
- Fixed problematic type conversion in didtools.fit_metrics()
|
|
@@ -7,7 +7,7 @@ def read_README():
|
|
|
7
7
|
|
|
8
8
|
setup(
|
|
9
9
|
name='diffindiff',
|
|
10
|
-
version='2.2.
|
|
10
|
+
version='2.2.7',
|
|
11
11
|
description='diffindiff: Python library for convenient Difference-in-Differences analyses',
|
|
12
12
|
packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
|
|
13
13
|
include_package_data=True,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|