PyPI - diff-diff - Versions diffs - 2.1.5__tar.gz → 2.1.6__tar.gz - Mend

diff-diff 2.1.5tar.gz → 2.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{diff_diff-2.1.5 → diff_diff-2.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diff-diff
-Version: 2.1.5
+Version: 2.1.6
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Science/Research
 Classifier: Operating System :: OS Independent

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/__init__.py RENAMED Viewed

@@ -136,7 +136,7 @@ from diff_diff.datasets import (
     load_mpdta,
 )
-__version__ = "2.1.5"
+__version__ = "2.1.6"
 __all__ = [
     # Estimators
     "DifferenceInDifferences",

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/honest_did.py RENAMED Viewed

@@ -584,7 +584,12 @@ def _extract_event_study_params(
                     )
                 # Extract event study effects by relative time
-                event_effects = results.event_study_effects
+                # Filter out normalization constraints (n_groups=0) and non-finite SEs
+                event_effects = {
+                    t: data for t, data in results.event_study_effects.items()
+                    if data.get('n_groups', 1) > 0
+                    and np.isfinite(data.get('se', np.nan))
+                }
                 rel_times = sorted(event_effects.keys())
                 # Split into pre and post
@@ -1261,10 +1266,12 @@ class HonestDiD:
             from diff_diff.staggered import CallawaySantAnnaResults
             if isinstance(results, CallawaySantAnnaResults):
                 if results.event_study_effects:
+                    # Filter out normalization constraints (n_groups=0, e.g. reference period)
                     pre_effects = [
                         abs(results.event_study_effects[t]['effect'])
                         for t in results.event_study_effects
                         if t < 0
+                        and results.event_study_effects[t].get('n_groups', 1) > 0
                     ]
                     if pre_effects:
                         return max(pre_effects)

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/pretrends.py RENAMED Viewed

@@ -656,9 +656,12 @@ class PreTrendsPower:
                     )
                 # Get pre-period effects (negative relative times)
+                # Filter out normalization constraints (n_groups=0) and non-finite SEs
                 pre_effects = {
                     t: data for t, data in results.event_study_effects.items()
                     if t < 0
+                    and data.get('n_groups', 1) > 0
+                    and np.isfinite(data.get('se', np.nan))
                 }
                 if not pre_effects:
@@ -680,9 +683,12 @@ class PreTrendsPower:
             from diff_diff.sun_abraham import SunAbrahamResults
             if isinstance(results, SunAbrahamResults):
                 # Get pre-period effects (negative relative times)
+                # Filter out normalization constraints (n_groups=0) and non-finite SEs
                 pre_effects = {
                     t: data for t, data in results.event_study_effects.items()
                     if t < 0
+                    and data.get('n_groups', 1) > 0
+                    and np.isfinite(data.get('se', np.nan))
                 }
                 if not pre_effects:

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/staggered_aggregation.py RENAMED Viewed

@@ -34,6 +34,9 @@ class CallawaySantAnnaAggregationMixin:
     # Type hint for anticipation attribute accessed from main class
     anticipation: int
+    # Type hint for base_period attribute accessed from main class
+    base_period: str
     def _aggregate_simple(
         self,
         group_time_effects: Dict,
@@ -414,6 +417,22 @@ class CallawaySantAnnaAggregationMixin:
                 'n_groups': len(effect_list),
             }
+        # Add reference period for universal base period mode (matches R did package)
+        # The reference period e = -1 - anticipation has effect = 0 by construction
+        # Only add if there are actual computed effects (guard against empty data)
+        if getattr(self, 'base_period', 'varying') == "universal":
+            ref_period = -1 - self.anticipation
+            # Only inject reference if we have at least one real effect
+            if event_study_effects and ref_period not in event_study_effects:
+                event_study_effects[ref_period] = {
+                    'effect': 0.0,
+                    'se': np.nan,  # Undefined - no data, normalization constraint
+                    't_stat': np.nan,  # Undefined - normalization constraint
+                    'p_value': np.nan,
+                    'conf_int': (np.nan, np.nan),  # NaN propagation for undefined inference
+                    'n_groups': 0,  # No groups contribute - fixed by construction
+                }
         return event_study_effects
     def _aggregate_by_group(

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/staggered_bootstrap.py RENAMED Viewed

@@ -60,12 +60,13 @@ def _generate_bootstrap_weights(
     elif weight_type == "webb":
         # Webb's 6-point distribution (recommended for few clusters)
+        # Values: ±√(3/2), ±1, ±√(1/2) with equal probabilities (1/6 each)
+        # This matches R's did package: E[w]=0, Var(w)=1.0
         values = np.array([
             -np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
             np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
         ])
-        probs = np.array([1, 2, 3, 3, 2, 1]) / 12
-        return rng.choice(values, size=n_units, p=probs)
+        return rng.choice(values, size=n_units)  # Equal probs (1/6 each)
     else:
         raise ValueError(
@@ -152,12 +153,13 @@ def _generate_bootstrap_weights_batch_numpy(
     elif weight_type == "webb":
         # Webb's 6-point distribution
+        # Values: ±√(3/2), ±1, ±√(1/2) with equal probabilities (1/6 each)
+        # This matches R's did package: E[w]=0, Var(w)=1.0
         values = np.array([
             -np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
             np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
         ])
-        probs = np.array([1, 2, 3, 3, 2, 1]) / 12
-        return rng.choice(values, size=(n_bootstrap, n_units), p=probs)
+        return rng.choice(values, size=(n_bootstrap, n_units))  # Equal probs (1/6 each)
     else:
         raise ValueError(

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/utils.py RENAMED Viewed

@@ -238,7 +238,7 @@ def _generate_webb_weights(n_clusters: int, rng: np.random.Generator) -> np.ndar
     Generate Webb's 6-point distribution weights.
     Values: {-sqrt(3/2), -sqrt(2/2), -sqrt(1/2), sqrt(1/2), sqrt(2/2), sqrt(3/2)}
-    with probabilities proportional to {1, 2, 3, 3, 2, 1}.
+    with equal probabilities (1/6 each), giving E[w]=0 and Var(w)=1.0.
     This distribution is recommended for very few clusters (G < 10) as it
     provides better finite-sample properties than Rademacher weights.
@@ -259,13 +259,16 @@ def _generate_webb_weights(n_clusters: int, rng: np.random.Generator) -> np.ndar
     ----------
     Webb, M. D. (2014). Reworking wild bootstrap based inference for
     clustered errors. Queen's Economics Department Working Paper No. 1315.
+    Note: Uses equal probabilities (1/6 each) matching R's `did` package,
+    which gives unit variance for consistency with other weight distributions.
     """
     values = np.array([
         -np.sqrt(3 / 2), -np.sqrt(2 / 2), -np.sqrt(1 / 2),
         np.sqrt(1 / 2), np.sqrt(2 / 2), np.sqrt(3 / 2)
     ])
-    probs = np.array([1, 2, 3, 3, 2, 1]) / 12
-    return np.asarray(rng.choice(values, size=n_clusters, p=probs))
+    # Equal probabilities (1/6 each) matching R's did package, giving Var(w) = 1.0
+    return np.asarray(rng.choice(values, size=n_clusters))
 def _generate_mammen_weights(n_clusters: int, rng: np.random.Generator) -> np.ndarray:

{diff_diff-2.1.5 → diff_diff-2.1.6}/diff_diff/visualization.py RENAMED Viewed

@@ -197,11 +197,17 @@ def plot_event_study(
         effect = effects.get(period, np.nan)
         std_err = se.get(period, np.nan)
-        if np.isnan(effect) or np.isnan(std_err):
+        # Skip entries with NaN effect, but allow NaN SE (will plot without error bars)
+        if np.isnan(effect):
             continue
-        ci_lower = effect - critical_value * std_err
-        ci_upper = effect + critical_value * std_err
+        # Compute CI only if SE is finite
+        if np.isfinite(std_err):
+            ci_lower = effect - critical_value * std_err
+            ci_upper = effect + critical_value * std_err
+        else:
+            ci_lower = np.nan
+            ci_upper = np.nan
         plot_data.append({
             'period': period,
@@ -244,13 +250,20 @@ def plot_event_study(
             ref_x = period_to_x[reference_period]
             ax.axvline(x=ref_x, color='gray', linestyle=':', linewidth=1, zorder=1)
-    # Plot error bars
-    yerr = [df['effect'] - df['ci_lower'], df['ci_upper'] - df['effect']]
-    ax.errorbar(
-        x_vals, df['effect'], yerr=yerr,
-        fmt='none', color=color, capsize=capsize, linewidth=linewidth,
-        capthick=linewidth, zorder=2
-    )
+    # Plot error bars (only for entries with finite CI)
+    has_ci = df['ci_lower'].notna() & df['ci_upper'].notna()
+    if has_ci.any():
+        df_with_ci = df[has_ci]
+        x_with_ci = [period_to_x[p] for p in df_with_ci['period']]
+        yerr = [
+            df_with_ci['effect'] - df_with_ci['ci_lower'],
+            df_with_ci['ci_upper'] - df_with_ci['effect']
+        ]
+        ax.errorbar(
+            x_with_ci, df_with_ci['effect'], yerr=yerr,
+            fmt='none', color=color, capsize=capsize, linewidth=linewidth,
+            capthick=linewidth, zorder=2
+        )
     # Plot point estimates
     for i, row in df.iterrows():
@@ -351,7 +364,15 @@ def _extract_plot_data(
         # Reference period is typically -1 for event study
         if reference_period is None:
-            reference_period = -1
+            # Detect reference period from n_groups=0 marker (normalization constraint)
+            # This handles anticipation > 0 where reference is at e = -1 - anticipation
+            for period, effect_data in results.event_study_effects.items():
+                if effect_data.get('n_groups', 1) == 0:
+                    reference_period = period
+                    break
+            # Fallback to -1 if no marker found (backward compatibility)
+            if reference_period is None:
+                reference_period = -1
         if pre_periods is None:
             pre_periods = [p for p in periods if p < 0]

{diff_diff-2.1.5 → diff_diff-2.1.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "maturin"
 [project]
 name = "diff-diff"
-version = "2.1.5"
+version = "2.1.6"
 description = "A library for Difference-in-Differences causal inference analysis"
 readme = "README.md"
 license = "MIT"
@@ -70,7 +70,11 @@ python-packages = ["diff_diff"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = "test_*.py"
+# Run all tests including slow ones by default; use `pytest -m 'not slow'` for faster local runs
 addopts = "-v --tb=short"
+markers = [
+    "slow: marks tests as slow (run `pytest -m 'not slow'` to exclude, or `pytest -m slow` to run only slow tests)",
+]
 [tool.black]
 line-length = 100

{diff_diff-2.1.5 → diff_diff-2.1.6}/rust/Cargo.lock RENAMED Viewed

@@ -115,9 +115,9 @@ dependencies = [
 [[package]]
 name = "cc"
-version = "1.2.53"
+version = "1.2.54"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932"
+checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583"
 dependencies = [
  "find-msvc-tools",
  "shlex",
@@ -289,7 +289,7 @@ dependencies = [
 [[package]]
 name = "diff_diff_rust"
-version = "2.1.5"
+version = "2.1.6"
 dependencies = [
  "ndarray",
  "ndarray-linalg",
@@ -1220,9 +1220,9 @@ dependencies = [
 [[package]]
 name = "quote"
-version = "1.0.43"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
 dependencies = [
  "proc-macro2",
 ]

{diff_diff-2.1.5 → diff_diff-2.1.6}/rust/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "diff_diff_rust"
-version = "2.1.5"
+version = "2.1.6"
 edition = "2021"
 description = "Rust backend for diff-diff DiD library"
 license = "MIT"

{diff_diff-2.1.5 → diff_diff-2.1.6}/rust/src/bootstrap.rs RENAMED Viewed

@@ -115,24 +115,24 @@ fn generate_mammen_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array
 /// Generate Webb 6-point distribution weights.
 ///
-/// Six-point distribution that matches additional moments:
-/// E[w] = 0, E[w²] = 1, E[w³] = 0, E[w⁴] = 1
+/// Six-point distribution with equal probabilities (1/6 each) matching R's `did` package:
+/// E[w] = 0, Var[w] = 1
 ///
-/// Values: ±√(3/2), ±√(1/2), ±√(1/6) with specific probabilities
+/// Values: ±√(3/2), ±√(2/2)=±1, ±√(1/2)
 fn generate_webb_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array2<f64> {
     // Webb 6-point values
-    let val1 = (3.0_f64 / 2.0).sqrt(); // √(3/2) ≈ 1.225
-    let val2 = (1.0_f64 / 2.0).sqrt(); // √(1/2) ≈ 0.707
-    let val3 = (1.0_f64 / 6.0).sqrt(); // √(1/6) ≈ 0.408
+    let val1 = (3.0_f64 / 2.0).sqrt(); // √(3/2) ≈ 1.2247
+    let val2 = 1.0_f64; // √(2/2) = 1.0
+    let val3 = (1.0_f64 / 2.0).sqrt(); // √(1/2) ≈ 0.7071
-    // Lookup table for direct index computation (replaces 6-way if-else)
-    // Equal probability: u in [0, 1/6) -> -val1, [1/6, 2/6) -> -val2, etc.
+    // Values in order: -val1, -val2, -val3, val3, val2, val1
     let weights_table = [-val1, -val2, -val3, val3, val2, val1];
     // Pre-allocate output array - eliminates double allocation
     let mut weights = Array2::<f64>::zeros((n_bootstrap, n_units));
     // Fill rows in parallel with chunk size tuning
+    // Use uniform selection (1/6 probability each) matching R's did package
     weights
         .axis_iter_mut(Axis(0))
         .into_par_iter()
@@ -141,10 +141,8 @@ fn generate_webb_batch(n_bootstrap: usize, n_units: usize, seed: u64) -> Array2<
         .for_each(|(i, mut row)| {
             let mut rng = Xoshiro256PlusPlus::seed_from_u64(seed.wrapping_add(i as u64));
             for elem in row.iter_mut() {
-                let u = rng.gen::<f64>();
-                // Direct bucket computation: multiply by 6 and floor to get index 0-5
-                // Clamp to 5 to handle edge case where u == 1.0
-                let bucket = ((u * 6.0).floor() as usize).min(5);
+                // Uniform selection: generate integer 0-5, index into weights_table
+                let bucket = rng.gen_range(0..6);
                 *elem = weights_table[bucket];
             }
         });
@@ -225,4 +223,60 @@ mod tests {
         // Different seeds should produce different results
         assert_ne!(weights1, weights2);
     }
+    #[test]
+    fn test_webb_mean_approx_zero() {
+        let weights = generate_webb_batch(10000, 1, 42);
+        let mean: f64 = weights.iter().sum::<f64>() / weights.len() as f64;
+        // With 10000 samples, mean should be close to 0
+        assert!(
+            mean.abs() < 0.1,
+            "Webb mean should be close to 0, got {}",
+            mean
+        );
+    }
+    #[test]
+    fn test_webb_variance_approx_correct() {
+        // Webb's 6-point distribution with values ±√(3/2), ±1, ±√(1/2)
+        // and equal probabilities (1/6 each) should have variance = 1.0
+        // This matches R's did package behavior.
+        // Theoretical: Var = (1/6) * (3/2 + 1 + 1/2 + 1/2 + 1 + 3/2) = (1/6) * 6 = 1.0
+        let weights = generate_webb_batch(10000, 100, 42);
+        let n = weights.len() as f64;
+        let mean: f64 = weights.iter().sum::<f64>() / n;
+        let variance: f64 = weights.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n;
+        // Theoretical variance = 1.0 with equal probabilities
+        // Allow some statistical variance in the estimate
+        assert!(
+            (variance - 1.0).abs() < 0.05,
+            "Webb variance should be ~1.0 (matching R's did package), got {}",
+            variance
+        );
+    }
+    #[test]
+    fn test_webb_values_correct() {
+        // Verify that Webb weights only take the expected 6 values
+        let weights = generate_webb_batch(100, 1000, 42);
+        let val1 = (3.0_f64 / 2.0).sqrt(); // ≈ 1.2247
+        let val2 = 1.0_f64;
+        let val3 = (1.0_f64 / 2.0).sqrt(); // ≈ 0.7071
+        let expected_values = [-val1, -val2, -val3, val3, val2, val1];
+        for w in weights.iter() {
+            let matches_expected = expected_values
+                .iter()
+                .any(|&expected| (*w - expected).abs() < 1e-10);
+            assert!(
+                matches_expected,
+                "Webb weight {} is not one of the expected values",
+                w
+            );
+        }
+    }
 }