PyPI - diffindiff - Versions diffs - 2.2.2__tar.gz → 2.2.4__tar.gz - Mend

diffindiff 2.2.2tar.gz → 2.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{diffindiff-2.2.2 → diffindiff-2.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: diffindiff
-Version: 2.2.2
+Version: 2.2.4
 Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
 Author: Thomas Wieland
 Author-email: geowieland@googlemail.com
@@ -16,10 +16,11 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
 Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
-## Updates v2.2.2
+## Updates v2.2.4
 - Bugfixes:
-  - Spillover treatment works now
-  - Verbose per default to False
+  - Spillover treatment really really works now (only relevant in rare cases)
+  - Fixed merging in diddata.DiffData.add_covariates() (only relevant in rare cases)
+  - Dropping missing values consequently (only relevant in rare cases)
 ## Features

{diffindiff-2.2.2 → diffindiff-2.2.4}/README.md RENAMED Viewed

@@ -8,10 +8,11 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
 Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
-## Updates v2.2.2
+## Updates v2.2.4
 - Bugfixes:
-  - Spillover treatment works now
-  - Verbose per default to False
+  - Spillover treatment really really works now (only relevant in rare cases)
+  - Fixed merging in diddata.DiffData.add_covariates() (only relevant in rare cases)
+  - Dropping missing values consequently (only relevant in rare cases)
 ## Features

{diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/didanalysis.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     2.2.0
-# Last update: 2025-12-05 17:38
+# Version:     2.2.2
+# Last update: 2025-12-07 10:27
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -40,6 +40,9 @@ class DiffModel:
             timestamp
             ]
+    def get_did_modeldata_df (self):
+        return pd.DataFrame(self.data[2])
     def treatment_statistics(
         self,
         treatment: str = None,
@@ -890,7 +893,7 @@ class DiffModel:
         treatment_diagnostics = model_config["treatment_diagnostics"]
         no_treatments = model_config["no_treatments"]
         outcome_col = model_config["outcome_col"]
-        outcome_col_predicted = outcome_col+"_predicted"
+        outcome_col_predicted = f"{outcome_col}{config.PREDICTED_SUFFIX}"
         if TG_col is None and treatment is None:
             if no_treatments == 1:
@@ -925,8 +928,7 @@ class DiffModel:
         if ("TG" in plot_intervals_groups and "CG" not in plot_intervals_groups) or ("CG" in plot_intervals_groups and "TG" not in plot_intervals_groups):
             lines_labels_required = lines_labels_required+1
         if "TG" in plot_intervals_groups and "CG" in plot_intervals_groups:
-            lines_labels_required = lines_labels_required+2
+            lines_labels_required = lines_labels_required+2
         assert len(lines_col) == lines_col_required, f"Parameter 'lines_col' must be a list with {lines_col_required} entries"
         assert len(lines_style) == lines_style_required, f"Parameter 'lines_style' must be a list with {lines_col_required} entries"
         assert len(lines_labels) == lines_labels_required, f"Parameter 'lines_labels' must be a list with {lines_labels_required} entries"
@@ -1389,6 +1391,13 @@ def did_analysis(
         *treatment_col
         ]
+    data = tools.panel_index(
+        data = data,
+        unit_col = unit_col,
+        time_col = time_col,
+        verbose = verbose
+        )
     treatment_diagnostics_results = helper.treatment_diagnostics(
         data = data,
         unit_col=unit_col,
@@ -1728,6 +1737,7 @@ def did_analysis(
         spillover = helper.create_spillover(
             data=data,
             unit_col=unit_col,
+            time_col=time_col,
             treatment_col=treatment_col,
             spillover_treatment=spillover_treatment,
             spillover_units=spillover_units
@@ -1925,7 +1935,14 @@ def ddd_analysis(
             )
         cols_relevant = cols_relevant + covariates
+    data = tools.panel_index(
+        data = data,
+        unit_col = unit_col,
+        time_col = time_col,
+        verbose = verbose
+        )
     treatment_diagnostics_results = helper.treatment_diagnostics(
         data = data,
         unit_col=unit_col,

{diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/didanalysis_helper.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     1.0.3
-# Last update: 2025-12-06 11:52
+# Version:     1.0.5
+# Last update: 2025-12-07 10:27
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -172,7 +172,9 @@ def create_specific_treatment_effects(
 def create_spillover(
     data: pd.DataFrame,
     unit_col: str,
+    time_col: str,
     treatment_col: list,
+    TT_col: str = None,
     spillover_treatment: list = [],
     spillover_units: list = [],
     verbose: bool = config.VERBOSE
@@ -189,16 +191,36 @@ def create_spillover(
     spillover_unit_vars = []
     spillover_treatment_vars = []
-    for i, treatment in enumerate(treatment_col):
+    for treatment in treatment_col:
+        if TT_col is None:
+            TT_col = config.TT_COL
-        data[f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"] = 0
-        data[f"{config.SPILLOVER_PREFIX}{config.DELIMITER}{treatment}"] = 0
+            data = tools.treatment_time_col(
+                data = data,
+                unit_col = unit_col,
+                time_col = time_col,
+                treatment_col = treatment,
+                create_TT_col = TT_col,
+                verbose = verbose
+                )[0]
-        spillover_unit_vars.append(f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}")
-        spillover_treatment_vars.append(f"{config.SPILLOVER_PREFIX}{config.DELIMITER}{treatment}")
+        sp_unit_col = f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"
+        sp_treatment_col = f"{config.SPILLOVER_PREFIX}{config.DELIMITER}{treatment}"
-        data.loc[data[unit_col].astype(str).isin(spillover_units), f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"] = 1
-        data.loc[data[unit_col].astype(str).isin(spillover_units), f"{config.SPILLOVER_PREFIX}{config.DELIMITER}{treatment}"] = data.loc[data[unit_col].astype(str).isin(spillover_units), f"{config.SPILLOVER_UNIT_PREFIX}{config.DELIMITER}{treatment}"]*data.loc[data[unit_col].astype(str).isin(spillover_units), treatment]
+        data[sp_unit_col] = 0
+        data[sp_treatment_col] = 0
+        spillover_unit_vars.append(sp_unit_col)
+        spillover_treatment_vars.append(sp_treatment_col)
+        data.loc[
+            data[unit_col].astype(str).isin(spillover_units),
+            sp_unit_col
+            ] = 1
+        data[sp_treatment_col] = data[sp_unit_col]*data[TT_col]
     spillover_treatment_vars_join = ' + '.join(spillover_treatment_vars)

{diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/diddata.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     2.1.3
-# Last update: 2025-12-06 10:49
+# Version:     2.1.5
+# Last update: 2025-12-07 10:27
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -478,7 +478,7 @@ class DiffData:
         ):
         if unit_col is None and time_col is None:
-            raise ValueError("unit_col and/or time_col must be stated")
+            raise ValueError("Parameter 'unit_col' and/or 'time_col' must be stated")
         if verbose:
             if len(variables) > 0:
@@ -488,16 +488,16 @@ class DiffData:
         did_modeldata = self.get_did_modeldata_df()
+        additional_df = tools.panel_index(
+            data=additional_df,
+            unit_col=unit_col,
+            time_col=time_col,
+            verbose=verbose
+            )
         existing_variables = []
-        if unit_col is not None and time_col is not None:
-            additional_df = tools.panel_index(
-                data=additional_df,
-                unit_col=unit_col,
-                time_col=time_col,
-                verbose=verbose
-                )
+        if unit_col is not None and time_col is not None:
             if variables is None:
@@ -659,15 +659,15 @@ class DiffData:
         new_merge = tools.panel_index(
             data=new_merge,
-            unit_col=unit_id_col,
-            time_col=time_col,
+            unit_col=config.UNIT_COL,
+            time_col=config.TIME_COL,
             verbose=verbose
             )
         did_modeldata_old = tools.panel_index(
             data=did_modeldata_old,
-            unit_col=unit_id_col,
-            time_col=time_col,
+            unit_col=config.UNIT_COL,
+            time_col=config.TIME_COL,
             verbose=verbose
             )
@@ -1055,24 +1055,12 @@ def merge_data(
         treatment_data_df,
         how = "cross"
         )
-    if drop_missing or missing_replace_by_zero:
-        modeldata_ismissing = tools.is_missing(
-            data = did_modeldata,
-            drop_missing = drop_missing,
-            missing_replace_by_zero = missing_replace_by_zero,
-            verbose = False
-            )
-        did_modeldata = modeldata_ismissing[2]
     did_modeldata[treatment_name] = did_modeldata[TG_col] * did_modeldata[TT_col]
     if treatment_config["after_treatment_period"]:
         did_modeldata[after_treatment_name] = did_modeldata[TG_col] * did_modeldata[ATT_col]
-    if np.dtype(did_modeldata[config.TIME_COL]) != np.dtype(outcome_data[time_col]):
-        print(f"WARNING: Time columns of treatment data and outcome data differ: {str(np.dtype(did_modeldata[config.TIME_COL]))}, {str(np.dtype(outcome_data[time_col]))}. This might induce an error while building the model dataset.")
     did_modeldata = tools.panel_index(
         data=did_modeldata,
         unit_col=config.UNIT_COL,
@@ -1086,7 +1074,7 @@ def merge_data(
         time_col=time_col,
         verbose=verbose
         )
     if keep_columns:
         outcome_data_short = outcome_data
     else:
@@ -1097,6 +1085,15 @@ def merge_data(
         on=config.UNIT_TIME_COL,
         how="left"
         )
+    if drop_missing or missing_replace_by_zero:
+        modeldata_ismissing = tools.is_missing(
+            data = did_modeldata,
+            drop_missing = drop_missing,
+            missing_replace_by_zero = missing_replace_by_zero,
+            verbose = False
+            )
+        did_modeldata = modeldata_ismissing[2]
     outcome_col_original = outcome_col
     unit_time_col_original = unit_id_col, time_col
@@ -1230,7 +1227,7 @@ def create_counterfactual(
         unit_col = unit_col,
         time_col = time_col,
         treatment_col = treatment_col
-        )
+        )[0]
     units = tools.unique(units_tt[unit_col])
     if not isnotreatment[0]:

{diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff/didtools.py RENAMED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     2.1.1
-# Last update: 2025-12-06 10:48
+# Version:     2.1.4
+# Last update: 2025-12-07 10:27
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -54,25 +54,39 @@ def panel_index(
     ):
     to_str = []
-    unit_x_time = True
-    if data[unit_col].dtype != 'object':
-        data[unit_col] = data[unit_col].astype(str)
-        to_str.append(unit_col)
+    if unit_col is not None:
+        if data[unit_col].dtype != 'object':
+            data[unit_col] = data[unit_col].astype(str)
+            to_str.append(unit_col)
+    else:
+        if verbose:
+            print("NOTE: No unit column was stated")
+    if time_col is not None:
+        if data[time_col].dtype != 'object':
+            data[time_col] = data[time_col].astype(str)
+            to_str.append(time_col)
+    else:
+        if verbose:
+            print("NOTE: No time column was stated")
-    if data[time_col].dtype != 'object':
-        data[time_col] = data[time_col].astype(str)
-        to_str.append(time_col)
+    if verbose and len(to_str) > 0:
+        print(f"NOTE: The following columns were converted to str: {', '.join(to_str)}.")
     if config.UNIT_TIME_COL not in data.columns:
-        unit_x_time = False
-        data[config.UNIT_TIME_COL] = data[unit_col]+config.DELIMITER+data[time_col]
+        if unit_col is not None and time_col is not None:
-    if verbose:
-        if len(to_str) > 0:
-            print(f"NOTE: The following columns were converted to str: {', '.join(to_str)}.")
-        if not unit_x_time:
-            print(f"NOTE: The following unit-time-index column was included: {config.UNIT_TIME_COL}.")
+            data[config.UNIT_TIME_COL] = data[unit_col]+config.DELIMITER+data[time_col]
+            if verbose:
+                print(f"NOTE: The following unit-time-index column was created: {config.UNIT_TIME_COL}.")
+        else:
+            if verbose:
+                print("No unit-time-index column was created.")
     return data
@@ -170,8 +184,8 @@ def is_binary(
     if verbose:
         print("OK")
-    if not binary:
-        print(f"NOTE: treatment column '{treatment_col}' is not binary. Likely treatment format is: {treatment_format}.")
+        if not binary:
+            print(f"NOTE: treatment column '{treatment_col}' is not binary. Likely treatment format is: {treatment_format}.")
     return [
         binary,
@@ -268,8 +282,8 @@ def is_simultaneous(
     if verbose:
         print("OK")
-    if not simultaneous and data_isnotreatment[0]:
-        print(f"NOTE: treatment '{treatment_col}' is not simultaneous.")
+        if not simultaneous and data_isnotreatment[0]:
+            print(f"NOTE: treatment '{treatment_col}' is not simultaneous.")
     if simultaneous and not data_isnotreatment[0]:
         print(f"WARNING: treatment '{treatment_col}' is simultaneous and does not include a {config.NO_TREATMENT_CG_DESCRIPTION}")
@@ -303,8 +317,8 @@ def is_notreatment(
     if verbose:
         print("OK")
-    if not no_treatment:
-        print(f"NOTE: treatment '{treatment_col}' does not include a {config.NO_TREATMENT_CG_DESCRIPTION}.")
+        if not no_treatment:
+            print(f"NOTE: treatment '{treatment_col}' does not include a {config.NO_TREATMENT_CG_DESCRIPTION}.")
     return [
         no_treatment,
@@ -342,8 +356,8 @@ def treatment_group_col(
     if verbose:
         print("OK")
-    if create_TG_col_exists:
-        print(f"NOTE: Column {create_TG_col} already exists. Saved treatment group in column {config.TG_COL}{config.DELIMITER}{treatment_col}.")
+        if create_TG_col_exists:
+            print(f"NOTE: Column {create_TG_col} already exists. Saved treatment group in column {config.TG_COL}{config.DELIMITER}{treatment_col}.")
     return [
         data,
@@ -351,6 +365,32 @@ def treatment_group_col(
         create_TG_col
         ]
+def treatment_time_col(
+    data: pd.DataFrame,
+    unit_col: str,
+    time_col: str,
+    treatment_col: str,
+    create_TT_col: str = "TT",
+    verbose: bool = config.VERBOSE
+    ):
+    tt = treatment_times(
+        data = data,
+        unit_col = unit_col,
+        time_col = time_col,
+        treatment_col = treatment_col,
+        verbose = verbose
+        )[1]
+    data[create_TT_col] = 0
+    data.loc[data[time_col].isin(tt), create_TT_col] = 1
+    return [
+        data,
+        tt,
+        create_TT_col
+    ]
 def untreated_units(
     data: pd.DataFrame,
     unit_col: str,
@@ -460,10 +500,11 @@ def is_prepost(
     if verbose:
         print("OK")
-    if prepost:
-        print("NOTE: Panel data is pre-post.")
-    else:
-        print("NOTE: Panel data is multi-period panel data.")
+    if verbose:
+        if prepost:
+            print("NOTE: Panel data is pre-post.")
+        else:
+            print("NOTE: Panel data is multi-period panel data.")
     return prepost
@@ -502,8 +543,8 @@ def is_multiple_treatment_period(
     if verbose:
         print("OK")
-    if units_multiple > 0:
-        print(f"NOTE: There are {units_multiple} observational units with multiple treatment periods with respect to treatment '{treatment_col}'.")
+        if units_multiple > 0:
+            print(f"NOTE: There are {units_multiple} observational units with multiple treatment periods with respect to treatment '{treatment_col}'.")
     return [
         multiple_treatment_period,
@@ -591,12 +632,22 @@ def treatment_times(
             unit_col,
             time_col,
             treatment_col
-            ]
+            ],
+        verbose=verbose
         )
+    is_multiple_treatment_period(
+        data = data,
+        unit_col = unit_col,
+        treatment_col = treatment_col,
+        verbose = verbose
+        )[0]
     if verbose:
         print(f"Identifying treatment times for treatment '{treatment_col}'", end = " ... ")
+    tt = list(unique(data.loc[data[treatment_col] == 1, time_col]))
     units = unique(data[unit_col])
     units_tt = pd.DataFrame(columns = [unit_col, "treatment_min", "treatment_max"])
@@ -628,7 +679,10 @@ def treatment_times(
     if verbose:
         print("OK")
-    return units_tt
+    return [
+        units_tt,
+        tt
+    ]
 def model_wrapper(
     y,
@@ -833,8 +887,6 @@ def fit_metrics(
         RSQ_ADJ = (1-(1-RSQ)*((observations-1)/(observations-indep_vars_no-1)))
     else:
-        print("NOTE: As no number of independent vars was stated, no Adj. R-Squared is calculated.")
         RSQ_ADJ = np.nan
@@ -854,8 +906,13 @@ def fit_metrics(
     if verbose:
         print("OK")
-    if len(obs_exp_clean) < len(observed) or len(obs_exp_clean) < len(expected):
-        print("NOTE: Vectors 'observed' and/or 'expected' contain NaNs which were dropped.")
+    if verbose:
+        if RSQ_ADJ == np.nan:
+            print("NOTE: As no number of independent vars was stated, no Adj. R-Squared is calculated.")
+        if len(obs_exp_clean) < len(observed) or len(obs_exp_clean) < len(expected):
+            print("NOTE: Vectors 'observed' and/or 'expected' contain NaNs which were dropped.")
     modelfit_results = [
         model_residuals,

{diffindiff-2.2.2 → diffindiff-2.2.4}/diffindiff.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: diffindiff
-Version: 2.2.2
+Version: 2.2.4
 Summary: diffindiff: Python library for convenient Difference-in-Differences Analyses
 Author: Thomas Wieland
 Author-email: geowieland@googlemail.com
@@ -16,10 +16,11 @@ This Python library is designed for performing Difference-in-Differences (DiD) a
 Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geowieland@googlemail.com)
-## Updates v2.2.2
+## Updates v2.2.4
 - Bugfixes:
-  - Spillover treatment works now
-  - Verbose per default to False
+  - Spillover treatment really really works now (only relevant in rare cases)
+  - Fixed merging in diddata.DiffData.add_covariates() (only relevant in rare cases)
+  - Dropping missing values consequently (only relevant in rare cases)
 ## Features

{diffindiff-2.2.2 → diffindiff-2.2.4}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ def read_README():
 setup(
     name='diffindiff',
-    version='2.2.2',
+    version='2.2.4',
     description='diffindiff: Python library for convenient Difference-in-Differences Analyses',
     packages=find_packages(include=["diffindiff", "diffindiff.tests"]),
     include_package_data=True,