PyPI - diffindiff - Versions diffs - 2.2.6__tar.gz → 2.2.7__tar.gz - Mend

diffindiff 2.2.6tar.gz → 2.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{diffindiff-2.2.6 → diffindiff-2.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: diffindiff
-Version: 2.2.6
+Version: 2.2.7
 Summary: diffindiff: Python library for convenient Difference-in-Differences analyses
 Author: Thomas Wieland
 Author-email: geowieland@googlemail.com
@@ -27,7 +27,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
 If you use this software, please cite:
-Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.6) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
+Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.7) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
 ## Installation
@@ -167,11 +167,9 @@ See the /tests directory for usage examples of most of the included functions.
   - Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
-## What's new (v2.2.6)
+## What's new (v2.2.7)
+- Functions
+  - diddata.DiffData.define_treatment() for constructing a new treatment from a column in the dataframe
 - Bugfixes:
-  - Check for correct dates in diddata.create_treatment()
-  - Check for valid columns in diddata.merge_data()
-  - Removed unnecessary old dependencies and imports
-- Other:
-  - Changed diddata.DiffGroups.add_segmentation() to return a message rather than raising an exception when the DiffGroups object already includes a benefit group
+  - didtools.treatment_times() and didtools.is_multiple_treatment_period() now also identify continuous treatments correctly
+  - Fixed problematic type conversion in didtools.fit_metrics()

{diffindiff-2.2.6 → diffindiff-2.2.7}/README.md RENAMED Viewed

@@ -19,7 +19,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
 If you use this software, please cite:
-Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.6) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
+Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.7) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
 ## Installation
@@ -159,11 +159,9 @@ See the /tests directory for usage examples of most of the included functions.
   - Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
-## What's new (v2.2.6)
+## What's new (v2.2.7)
+- Functions
+  - diddata.DiffData.define_treatment() for constructing a new treatment from a column in the dataframe
 - Bugfixes:
-  - Check for correct dates in diddata.create_treatment()
-  - Check for valid columns in diddata.merge_data()
-  - Removed unnecessary old dependencies and imports
-- Other:
-  - Changed diddata.DiffGroups.add_segmentation() to return a message rather than raising an exception when the DiffGroups object already includes a benefit group
+  - didtools.treatment_times() and didtools.is_multiple_treatment_period() now also identify continuous treatments correctly
+  - Fixed problematic type conversion in didtools.fit_metrics()

{diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/config.py RENAMED Viewed

@@ -4,23 +4,25 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     1.0.5
-# Last update: 2026-02-20 17:30
+# Version:     1.0.6
+# Last update: 2026-02-26 18:04
 # Copyright (c) 2025-2026 Thomas Wieland
 #-----------------------------------------------------------------------
 # Basic config:
 PACKAGE_NAME = "diffindiff"
-PACKAGE_VERSION = "2.2.6"
+PACKAGE_VERSION = "2.2.7"
-VERBOSE = False
+VERBOSE = True
 ROUND_STATISTIC = 3
 ROUND_PERCENT = 2
 AUTO_SWITCH_TO_PREPOST = True
+ACCEPT_CONTINUOUS_TREATMENTS = True
 # Description texts:
 DID_DESCRIPTION = "Difference-in-Differences Analysis"

{diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/didanalysis.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     2.2.3
-# Last update: 2026-02-20 17:42
+# Version:     2.2.4
+# Last update: 2026-02-26 18:04
 # Copyright (c) 2024-2026 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -1356,8 +1356,8 @@ def did_analysis(
     missing_replace_by_zero: bool = False,
     fit_by = "ols_fit",
     verbose: bool = config.VERBOSE
-    ):
+    ):
     tools.check_columns(
         df = data,
         columns = [
@@ -1384,6 +1384,12 @@ def did_analysis(
         verbose = verbose
         )
+    tools.is_numeric(
+        df = data,
+        columns = treatment_col,
+        verbose = verbose
+        )
     cols_relevant = [
         unit_col,
         time_col,
@@ -1807,7 +1813,7 @@ def did_analysis(
         }
     if bonferroni:
-        confint_alpha = confint_alpha/no_treatments
+        confint_alpha = confint_alpha/no_treatments
     if fit_by == "ml":
         fit_result = helper.ml_fit(
@@ -1824,7 +1830,7 @@ def did_analysis(
             cluster_SE_by = cluster_SE_by,
             verbose = verbose
         )
     model_results = helper.extract_model_results(
         fit_result = fit_result,
         TG_col = TG_col,

{diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/didanalysis_helper.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     1.0.6
-# Last update: 2025-02-20 17:38
+# Version:     1.0.7
+# Last update: 2025-02-26 18:02
 # Copyright (c) 2025-2026 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -203,7 +203,7 @@ def create_spillover(
                 time_col = time_col,
                 treatment_col = treatment,
                 create_TT_col = TT_col,
-                verbose = verbose
+                verbose = False
                 )[0]
         sp_unit_col = f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"
@@ -396,7 +396,11 @@ def treatment_diagnostics(
         )
     if verbose:
-        print(f"There are {no_treatments} treatments (simultaneous: {no_treatments-staggered_count}, staggered: {staggered_count}) with {untreated[0]} treated and {untreated[1]} untreated units.")
+        if no_treatments > 1:
+            print(f"There are {no_treatments} treatments (simultaneous: {no_treatments-staggered_count}, staggered: {staggered_count}) with {untreated[0]} treated and {untreated[1]} untreated units.")
+        else:
+            print(f"There is {no_treatments} treatment (staggered: {staggered_count}) with {untreated[0]} treated and {untreated[1]} untreated units.")
     return [
         treatment_diagnostics_results,

{diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/diddata.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     2.1.6
-# Last update: 2026-02-20 18:28
+# Version:     2.1.8
+# Last update: 2026-02-26 18:30
 # Copyright (c) 2024-2026 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -486,7 +486,7 @@ class DiffData:
         variables: list = None,
         unit_col: str = None,
         time_col: str = None,
-        verbose: bool = config.VERBOSE
+        verbose: bool = False
         ):
         if unit_col is None and time_col is None:
@@ -579,6 +579,7 @@ class DiffData:
         self.data[0] = did_modeldata
         self.data[5] = variables
+        self.data[7][len(self.data[7])] = helper.create_timestamp(function="add_covariates")
         if verbose:
             print("OK")
@@ -622,7 +623,6 @@ class DiffData:
         groups_data_old = did_groups_old.get_data()
         did_modeldata_old = self.get_did_modeldata_df()
-        unit_id_col, time_col = self.get_unit_time_cols()
         outcome_col_original = self.data[3]
         unit_time_col_original = self.get_unit_time_cols()
         covariates = self.get_covariates()
@@ -728,21 +728,157 @@ class DiffData:
             timestamp = helper.create_timestamp(function="add_treatment")
             )
-        did_data_new = DiffData(
-            did_modeldata = did_modeldata_new,
-            diff_groups = groups_new,
-            diff_treatment = treatment_new,
-            outcome_col_original = outcome_col_original,
-            unit_time_col_original = unit_time_col_original,
-            covariates = covariates,
-            treatment_cols = treatment_cols_new,
-            timestamp = helper.create_timestamp(function="add_segmentation")
+        if verbose:
+            print("OK")
+        self.data[0] = did_modeldata_new
+        self.data[1] = groups_new
+        self.data[2] = treatment_new
+        self.data[3] = outcome_col_original
+        self.data[4] = unit_time_col_original
+        self.data[5] = covariates
+        self.data[6] = treatment_cols_new
+        self.data[7][len(self.data[7])] = helper.create_timestamp(function="add_treatment")
+        return self
+    def define_treatment(
+        self,
+        treatment_name,
+        after_treatment_period: bool = False,
+        after_treatment_name = None,
+        verbose: bool = config.VERBOSE
+        ):
+        if not treatment_name:
+            raise ValueError("When adding a treatment from the data, you need to specify a treatment column with parameter treament_name = [your_treatment].")
+        if treatment_name not in self.get_did_modeldata_df().columns:
+            raise KeyError(f"Column '{treatment_name}' not in data frame")
+        did_treatment_old = self.get_did_treatment()
+        treatment_config_old = did_treatment_old.get_config()
+        treatment_meta_old = did_treatment_old.get_metadata()
+        no_treatments_old = treatment_meta_old["no_treatments"]
+        did_groups_old = self.get_did_groups()
+        groups_config_old = did_groups_old.get_config()
+        groups_data_old = did_groups_old.get_data()
+        did_modeldata_old = self.get_did_modeldata_df()
+        outcome_col_original = self.data[3]
+        unit_time_col_original = self.get_unit_time_cols()
+        covariates = self.get_covariates()
+        treatment_cols = self.get_treatment_cols()
+        treatment_cols_new = treatment_cols
+        no_treatments = no_treatments_old+1
+        key_counter = no_treatments-1
+        tt = tools.treatment_times(
+            data = did_modeldata_old,
+            unit_col=config.UNIT_COL,
+            time_col=config.TIME_COL,
+            treatment_col=treatment_name,
+            verbose=verbose
+        )
+        tt_date = [datetime.strptime(t, treatment_meta_old["date_format"]) for t in tt[1]]
+        treatment_period_start = min(tt_date)
+        treatment_period_end = max(tt_date)
+        treatment_period_start = treatment_period_start.strftime("%Y-%m-%d")
+        treatment_period_end = treatment_period_end.strftime("%Y-%m-%d")
+        is_notreatment_result = tools.is_notreatment(
+            data = did_modeldata_old,
+            unit_col=config.UNIT_COL,
+            treatment_col=treatment_name,
+            verbose = verbose
+            )
+        treatment_group = is_notreatment_result[1]
+        control_group = is_notreatment_result[2]
+        if verbose:
+            print(f"Constructing treatment from column '{treatment_name}'", end = " ... ")
+        new_groups = create_groups(
+            treatment_group = treatment_group,
+            control_group = control_group,
+            treatment_name = treatment_name,
+            verbose=False
             )
+        new_groups_data_df = new_groups.get_data()[0]
+        new_groups_config = new_groups.get_config()
+        TG_col = new_groups_config[0]["TG_col"]
+        new_treatment = create_treatment(
+            study_period = [treatment_meta_old["study_period_start"], treatment_meta_old["study_period_end"]],
+            treatment_period = [treatment_period_start, treatment_period_end],
+            freq = treatment_meta_old["frequency"],
+            date_format = treatment_meta_old["date_format"],
+            treatment_name = treatment_name,
+            pre_post = treatment_meta_old["pre_post"],
+            after_treatment_period = after_treatment_period,
+            verbose=False
+            )
+        new_treatment_data_df = new_treatment.get_data()
+        new_treatment_config = new_treatment.get_config()
+        TT_col = new_treatment_config[0]["TT_col"]
+        ATT_col = new_treatment_config[0]["ATT_col"]
+        treatment_cols_new[key_counter] = {
+            "TT_col": TT_col,
+            "ATT_col": ATT_col,
+            "treatment_name": treatment_name,
+            "after_treatment_name": after_treatment_name
+            }
+        groups_config_new = groups_config_old
+        groups_config_new[key_counter] = new_groups_config[0]
+        groups_data_new = groups_data_old
+        groups_data_old.append(new_groups_data_df)
+        groups_new = DiffGroups(
+            groups_data_new,
+            groups_config_new,
+            timestamp = helper.create_timestamp(function="define_treatment")
+            )
+        treatment_meta_new = treatment_meta_old
+        treatment_meta_new["no_treatments"] = no_treatments
+        treatment_config_new = treatment_config_old
+        treatment_config_new[key_counter] = new_treatment_config[0]
+        treatment_new = DiffTreatment(
+            new_treatment_data_df,
+            treatment_config_new,
+            treatment_meta_new,
+            timestamp = helper.create_timestamp(function="define_treatment")
+            )
         if verbose:
             print("OK")
-        return did_data_new
+        if treatment_name in covariates:
+            if verbose:
+                print(f"NOTE: Column '{treatment_name}' was defined as covariate before and is now removed from covariates list.")
+            covariates.remove(treatment_name)
+        self.data[0] = did_modeldata_old
+        self.data[1] = groups_new
+        self.data[2] = treatment_new
+        self.data[3] = outcome_col_original
+        self.data[4] = unit_time_col_original
+        self.data[5] = covariates
+        self.data[6] = treatment_cols_new
+        self.data[7][len(self.data[7])] = helper.create_timestamp(function="define_treatment")
+        return self
     def add_segmentation(
         self,
@@ -979,8 +1115,8 @@ class DiffData:
                 if value["after_treatment_name"] is not None:
                     after_treatment_col[key] = value["after_treatment_name"]
                 if value["ATT_col"] is not None:
-                    ATT_col[key] = value["ATT_col"]
+                    ATT_col[key] = value["ATT_col"]
             did_results = didanalysis.did_analysis(
                 data = did_modeldata,
                 TG_col = TG_col,
@@ -1038,9 +1174,6 @@ def merge_data(
             ]
         )
-    if verbose:
-        print("Merging groups and treatment data", end = " ... ")
     groups_data_df = diff_groups.get_data()
     groups_data_df = groups_data_df[0]
@@ -1096,6 +1229,9 @@ def merge_data(
         verbose=verbose
         )
+    if verbose:
+        print("Merging groups and treatment data", end = " ... ")
     if keep_columns:
         outcome_data_short = outcome_data
     else:
@@ -1129,7 +1265,8 @@ def merge_data(
             }
         }
-    timestamp = helper.create_timestamp(function="merge_data")
+    timestamp = {}
+    timestamp[0] = helper.create_timestamp(function="merge_data")
     did_data_all = DiffData(
         did_modeldata,
@@ -1196,8 +1333,6 @@ def create_data(
         verbose = verbose
         )
-    did_data_all.timestamp = helper.create_timestamp(function="create_data")
     return did_data_all
 def create_counterfactual(

{diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff/didtools.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     2.1.5
-# Last update: 2026-02-20 17:43
+# Version:     2.1.6
+# Last update: 2026-02-26 18:33
 # Copyright (c) 2025-2026 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -46,6 +46,30 @@ def check_columns(
         if missing_columns:
             raise KeyError(f"Data do not contain column(s): {', '.join(missing_columns)}")
+def is_numeric(
+    df: pd.DataFrame,
+    columns: list,
+    verbose: bool = config.VERBOSE
+    ):
+    if len(columns) > 0:
+        if verbose:
+            print(f"Checking if column(s) {', '.join(columns)} are numeric", end=" ... ")
+        non_numeric_columns = []
+        for col in columns:
+            if not pd.api.types.is_numeric_dtype(df[col]):
+                non_numeric_columns.append(col)
+        if verbose:
+            print("OK")
+        if non_numeric_columns:
+            raise KeyError(f"Data contain non-numeric column(s): {', '.join(non_numeric_columns)}")
 def panel_index(
     data: pd.DataFrame,
     unit_col: str,
@@ -527,8 +551,11 @@ def is_multiple_treatment_period(
         unit_treatment = data_sub[treatment_col]
         groups = (unit_treatment != unit_treatment.shift()).cumsum()
-        periods_count = (unit_treatment == 1).groupby(groups).any().sum()
+        if config.ACCEPT_CONTINUOUS_TREATMENTS:
+            periods_count = (unit_treatment > 0).groupby(groups).any().sum()
+        else:
+            periods_count = (unit_treatment == 1).groupby(groups).any().sum()
         unit_treatment_periods[unit] = int(periods_count)
@@ -636,25 +663,31 @@ def treatment_times(
         verbose=verbose
         )
-    is_multiple_treatment_period(
+    is_multiple_treatment_period_result = is_multiple_treatment_period(
         data = data,
         unit_col = unit_col,
         treatment_col = treatment_col,
         verbose = verbose
-        )[0]
+        )
     if verbose:
         print(f"Identifying treatment times for treatment '{treatment_col}'", end = " ... ")
-    tt = list(unique(data.loc[data[treatment_col] == 1, time_col]))
+    if config.ACCEPT_CONTINUOUS_TREATMENTS:
+        tt = list(unique(data.loc[data[treatment_col] > 0, time_col]))
+    else:
+        tt = list(unique(data.loc[data[treatment_col] == 1, time_col]))
     units = unique(data[unit_col])
     units_tt = pd.DataFrame(columns = [unit_col, "treatment_min", "treatment_max"])
     for unit in units:
-        data_unit_tt = data[(data[unit_col] == unit) & (data[treatment_col] == 1)]
+        if config.ACCEPT_CONTINUOUS_TREATMENTS:
+            data_unit_tt = data[(data[unit_col] == unit) & (data[treatment_col] > 0)]
+        else:
+            data_unit_tt = data[(data[unit_col] == unit) & (data[treatment_col] == 1)]
         if data_unit_tt.empty:
             continue
@@ -678,7 +711,7 @@ def treatment_times(
     if verbose:
         print("OK")
     return [
         units_tt,
         tt
@@ -796,9 +829,9 @@ def fit_metrics(
     assert observed_no == expected_no, "Error while calculating fit metrics: Observed and expected differ in length"
-    if not pd.api.types.is_numeric_dtype(observed):
+    if not pd.api.types.is_numeric_dtype(observed) or not np.issubdtype(observed.dtype, np.number):
         raise ValueError("Error while calculating fit metrics: Observed column is not numeric")
-    if not pd.api.types.is_numeric_dtype(expected):
+    if not pd.api.types.is_numeric_dtype(expected) or not np.issubdtype(expected.dtype, np.number):
         raise ValueError("Error while calculating fit metrics: Expected column is not numeric")
     if outcome_col is not None:
@@ -810,8 +843,8 @@ def fit_metrics(
     if remove_nan:
-        observed = observed.reset_index(drop=True)
-        expected = expected.reset_index(drop=True)
+        observed = np.array(observed)
+        expected = np.array(expected)
         obs_exp = pd.DataFrame(
             {
@@ -968,6 +1001,7 @@ def check_date_format(
     if len(invalid_dates) > 0:
         invalid_dates_included = True
+        invalid_dates = [str(d) for d in invalid_dates]
     return [
         invalid_dates_included,

{diffindiff-2.2.6 → diffindiff-2.2.7}/diffindiff.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: diffindiff
-Version: 2.2.6
+Version: 2.2.7
 Summary: diffindiff: Python library for convenient Difference-in-Differences analyses
 Author: Thomas Wieland
 Author-email: geowieland@googlemail.com
@@ -27,7 +27,7 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
 If you use this software, please cite:
-Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.6) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
+Wieland, T. (2026). diffindiff: A Python library for convenient difference-in-differences analyses (Version 2.2.7) [Computer software]. Zenodo. https://doi.org/10.5281/zenodo.18656820
 ## Installation
@@ -167,11 +167,9 @@ See the /tests directory for usage examples of most of the included functions.
   - Wooldridge JM (2012) *Introductory Econometrics. A Modern Approach*.
-## What's new (v2.2.6)
+## What's new (v2.2.7)
+- Functions
+  - diddata.DiffData.define_treatment() for constructing a new treatment from a column in the dataframe
 - Bugfixes:
-  - Check for correct dates in diddata.create_treatment()
-  - Check for valid columns in diddata.merge_data()
-  - Removed unnecessary old dependencies and imports
-- Other:
-  - Changed diddata.DiffGroups.add_segmentation() to return a message rather than raising an exception when the DiffGroups object already includes a benefit group
+  - didtools.treatment_times() and didtools.is_multiple_treatment_period() now also identify continuous treatments correctly
+  - Fixed problematic type conversion in didtools.fit_metrics()

{diffindiff-2.2.6 → diffindiff-2.2.7}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ def read_README():
 setup(
     name='diffindiff',
-    version='2.2.6',
+    version='2.2.7',
     description='diffindiff: Python library for convenient Difference-in-Differences analyses',
     packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
     include_package_data=True,