PyPI - diff-diff - Versions diffs - 2.8.3__tar.gz → 2.8.4__tar.gz - Mend

diff-diff 2.8.3tar.gz → 2.8.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

{diff_diff-2.8.3 → diff_diff-2.8.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diff-diff
-Version: 2.8.3
+Version: 2.8.4
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: Operating System :: OS Independent

{diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/__init__.py RENAMED Viewed

@@ -94,6 +94,7 @@ from diff_diff.prep import (
     make_treatment_indicator,
     rank_control_units,
     summarize_did_data,
+    trim_weights,
     validate_did_data,
     wide_to_long,
 )
@@ -210,7 +211,7 @@ Stacked = StackedDiD
 Bacon = BaconDecomposition
 EDiD = EfficientDiD
-__version__ = "2.8.3"
+__version__ = "2.8.4"
 __all__ = [
     # Estimators
     "DifferenceInDifferences",
@@ -307,6 +308,7 @@ __all__ = [
     "make_post_indicator",
     "wide_to_long",
     "balance_panel",
+    "trim_weights",
     "validate_did_data",
     "summarize_did_data",
     "generate_did_data",

{diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/bootstrap_utils.py RENAMED Viewed

@@ -433,6 +433,10 @@ def generate_survey_multiplier_weights_batch(
     is present, weights are scaled by ``sqrt(1 - f_h)`` per stratum so
     the bootstrap variance matches the TSL variance.
+    For ``lonely_psu="adjust"``, singleton PSUs from different strata are
+    pooled into a combined pseudo-stratum and weights are generated for
+    the pooled group (no FPC scaling on pooled singletons).
     Parameters
     ----------
     n_bootstrap : int
@@ -454,11 +458,7 @@ def generate_survey_multiplier_weights_batch(
     psu = resolved_survey.psu
     strata = resolved_survey.strata
-    if resolved_survey.lonely_psu == "adjust":
-        raise NotImplementedError(
-            "lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
-            "Use lonely_psu='remove' or 'certainty', or use analytical inference."
-        )
+    _lonely_psu = resolved_survey.lonely_psu
     if psu is None:
         # Each observation is its own PSU
@@ -499,6 +499,7 @@ def generate_survey_multiplier_weights_batch(
         psu_to_col = {int(p): i for i, p in enumerate(psu_ids)}
         unique_strata = np.unique(strata)
+        _singleton_cols = []  # For lonely_psu="adjust" pooling
         for h in unique_strata:
             mask_h = strata == h
@@ -511,8 +512,12 @@ def generate_survey_multiplier_weights_batch(
             cols = np.array([psu_to_col[int(p)] for p in psus_in_h])
             if n_h < 2:
-                # Lonely PSU — zero weight (matches remove/certainty behavior)
-                weights[:, cols] = 0.0
+                if _lonely_psu == "adjust":
+                    # Collect for pooled pseudo-stratum processing
+                    _singleton_cols.extend(cols.tolist())
+                else:
+                    # remove / certainty — zero weight
+                    weights[:, cols] = 0.0
                 continue
             # Generate weights for this stratum
@@ -536,6 +541,31 @@ def generate_survey_multiplier_weights_batch(
             weights[:, cols] = stratum_weights
+        # Pool singleton PSUs into a pseudo-stratum for "adjust"
+        if _singleton_cols:
+            n_pooled = len(_singleton_cols)
+            if n_pooled >= 2:
+                pooled_weights = generate_bootstrap_weights_batch_numpy(
+                    n_bootstrap, n_pooled, weight_type, rng
+                )
+                # No FPC scaling for pooled singletons (conservative)
+                pooled_cols = np.array(_singleton_cols)
+                weights[:, pooled_cols] = pooled_weights
+            else:
+                # Single singleton — cannot pool, zero weight (library-specific
+                # fallback; bootstrap adjust with one singleton = remove).
+                import warnings
+                warnings.warn(
+                    "lonely_psu='adjust' with only 1 singleton stratum in "
+                    "bootstrap: singleton PSU contributes zero variance "
+                    "(same as 'remove'). At least 2 singleton strata are "
+                    "needed for pooled pseudo-stratum bootstrap.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+                weights[:, _singleton_cols[0]] = 0.0
     return weights, psu_ids
@@ -553,6 +583,9 @@ def generate_rao_wu_weights(
     With FPC: ``m_h = max(1, round((1 - f_h) * (n_h - 1)))``
     (Rao, Wu & Yue 1992, Section 3).
+    For ``lonely_psu="adjust"``, singleton PSUs are pooled into a combined
+    pseudo-stratum and resampled together (no FPC scaling on pooled group).
     Parameters
     ----------
     resolved_survey : ResolvedSurveyDesign
@@ -570,11 +603,7 @@ def generate_rao_wu_weights(
     psu = resolved_survey.psu
     strata = resolved_survey.strata
-    if resolved_survey.lonely_psu == "adjust":
-        raise NotImplementedError(
-            "lonely_psu='adjust' is not yet supported for survey-aware bootstrap. "
-            "Use lonely_psu='remove' or 'certainty', or use analytical inference."
-        )
+    _lonely_psu_rw = resolved_survey.lonely_psu
     rescaled = np.zeros(n_obs, dtype=np.float64)
@@ -589,14 +618,20 @@ def generate_rao_wu_weights(
         unique_strata = np.unique(strata)
         strata_masks = [strata == h for h in unique_strata]
+    # Collect singleton PSUs for "adjust" pooling
+    _singleton_info = []  # list of (mask_h, unique_psu_h) tuples
     for mask_h in strata_masks:
         psu_h = obs_psu[mask_h]
         unique_psu_h = np.unique(psu_h)
         n_h = len(unique_psu_h)
         if n_h < 2:
-            # Census / lonely PSU — keep original weights (zero variance)
-            rescaled[mask_h] = base_weights[mask_h]
+            if _lonely_psu_rw == "adjust":
+                _singleton_info.append((mask_h, unique_psu_h))
+            else:
+                # remove / certainty — keep original weights (zero variance)
+                rescaled[mask_h] = base_weights[mask_h]
             continue
         # Compute resample size
@@ -629,6 +664,41 @@ def generate_rao_wu_weights(
         local_indices = np.array([psu_to_local[int(obs_psu[idx])] for idx in obs_in_h])
         rescaled[obs_in_h] = base_weights[obs_in_h] * scale_per_psu[local_indices]
+    # Pool singleton PSUs into a pseudo-stratum for "adjust"
+    if _singleton_info:
+        # Combine all singleton PSUs into one group
+        pooled_psus = np.concatenate([p for _, p in _singleton_info])
+        n_pooled = len(pooled_psus)
+        if n_pooled >= 2:
+            m_pooled = n_pooled - 1  # No FPC for pooled singletons
+            drawn = rng.choice(n_pooled, size=m_pooled, replace=True)
+            counts = np.bincount(drawn, minlength=n_pooled)
+            scale_per_psu = (n_pooled / m_pooled) * counts.astype(np.float64)
+            # Build PSU → scale mapping and apply
+            psu_scale_map = {int(pooled_psus[i]): scale_per_psu[i] for i in range(n_pooled)}
+            for mask_h, _ in _singleton_info:
+                obs_in_h = np.where(mask_h)[0]
+                for idx in obs_in_h:
+                    p = int(obs_psu[idx])
+                    rescaled[idx] = base_weights[idx] * psu_scale_map.get(p, 1.0)
+        else:
+            # Single singleton — cannot pool, keep base weights (library-specific
+            # fallback; bootstrap adjust with one singleton = remove).
+            import warnings
+            warnings.warn(
+                "lonely_psu='adjust' with only 1 singleton stratum in "
+                "bootstrap: singleton PSU contributes zero variance "
+                "(same as 'remove'). At least 2 singleton strata are "
+                "needed for pooled pseudo-stratum bootstrap.",
+                UserWarning,
+                stacklevel=2,
+            )
+            for mask_h, _ in _singleton_info:
+                rescaled[mask_h] = base_weights[mask_h]
     return rescaled

{diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/continuous_did_results.py RENAMED Viewed

@@ -154,6 +154,15 @@ class ContinuousDiDResults:
             f"n_periods={len(self.time_periods)})"
         )
+    @property
+    def coef_var(self) -> float:
+        """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
+        if not (np.isfinite(self.overall_att_se) and self.overall_att_se >= 0):
+            return np.nan
+        if not np.isfinite(self.overall_att) or self.overall_att == 0:
+            return np.nan
+        return self.overall_att_se / abs(self.overall_att)
     def summary(self, alpha: Optional[float] = None) -> str:
         """Generate formatted summary."""
         alpha = alpha or self.alpha
@@ -223,10 +232,15 @@ class ContinuousDiDResults:
                 f"[{self.overall_att_conf_int[0]:.4f}, {self.overall_att_conf_int[1]:.4f}]",
                 f"{conf_level}% CI for ACRT_glob: "
                 f"[{self.overall_acrt_conf_int[0]:.4f}, {self.overall_acrt_conf_int[1]:.4f}]",
-                "",
             ]
         )
+        cv = self.coef_var
+        if np.isfinite(cv):
+            lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
+        lines.append("")
         # Dose-response curve summary (first/mid/last points)
         if len(self.dose_grid) > 0:
             lines.extend(

{diff_diff-2.8.3 → diff_diff-2.8.4}/diff_diff/efficient_did_results.py RENAMED Viewed

@@ -172,6 +172,15 @@ class EfficientDiDResults:
             f"n_periods={len(self.time_periods)})"
         )
+    @property
+    def coef_var(self) -> float:
+        """Coefficient of variation: SE / |overall ATT|. NaN when ATT is 0 or SE non-finite."""
+        if not (np.isfinite(self.overall_se) and self.overall_se >= 0):
+            return np.nan
+        if not np.isfinite(self.overall_att) or self.overall_att == 0:
+            return np.nan
+        return self.overall_se / abs(self.overall_att)
     def summary(self, alpha: Optional[float] = None) -> str:
         """Generate formatted summary of estimation results."""
         alpha = alpha or self.alpha
@@ -219,10 +228,15 @@ class EfficientDiDResults:
                 "",
                 f"{conf_level}% Confidence Interval: "
                 f"[{self.overall_conf_int[0]:.4f}, {self.overall_conf_int[1]:.4f}]",
-                "",
             ]
         )
+        cv = self.coef_var
+        if np.isfinite(cv):
+            lines.append(f"{'CV (SE/|ATT|):':<25} {cv:>10.4f}")
+        lines.append("")
         # Event study effects
         if self.event_study_effects:
             lines.extend(

diff-diff 2.8.3__tar.gz → 2.8.4__tar.gz

diff-diff 2.8.3tar.gz → 2.8.4tar.gz