panelkit 0.2.5__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {panelkit-0.2.5 → panelkit-0.2.7}/Cargo.lock +5 -5
  2. {panelkit-0.2.5 → panelkit-0.2.7}/Cargo.toml +1 -1
  3. {panelkit-0.2.5 → panelkit-0.2.7}/GUIDE.md +8 -4
  4. {panelkit-0.2.5 → panelkit-0.2.7}/PKG-INFO +1 -1
  5. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/src/bootstrap.rs +2 -3
  6. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/tests/inference.rs +2 -2
  7. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/qr.rs +11 -1
  8. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/opt/simplex.rs +6 -1
  9. {panelkit-0.2.5 → panelkit-0.2.7}/pyproject.toml +1 -1
  10. {panelkit-0.2.5 → panelkit-0.2.7}/python/panelkit/design.py +120 -72
  11. {panelkit-0.2.5 → panelkit-0.2.7}/BENCHMARKS.md +0 -0
  12. {panelkit-0.2.5 → panelkit-0.2.7}/LICENSE-APACHE +0 -0
  13. {panelkit-0.2.5 → panelkit-0.2.7}/LICENSE-MIT +0 -0
  14. {panelkit-0.2.5 → panelkit-0.2.7}/README.md +0 -0
  15. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/Cargo.toml +0 -0
  16. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/benches/estimators.rs +0 -0
  17. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/did/bacon.rs +0 -0
  18. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/did/callaway.rs +0 -0
  19. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/did/mod.rs +0 -0
  20. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/did/sunab.rs +0 -0
  21. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/did/twfe.rs +0 -0
  22. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/fe/mod.rs +0 -0
  23. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/fe/within.rs +0 -0
  24. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/lib.rs +0 -0
  25. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/mcnnm/mod.rs +0 -0
  26. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
  27. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/panel.rs +0 -0
  28. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/result.rs +0 -0
  29. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/sc/augmented.rs +0 -0
  30. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/sc/cpasc.rs +0 -0
  31. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/sc/mod.rs +0 -0
  32. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/sc/sdid.rs +0 -0
  33. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/src/sc/synthetic.rs +0 -0
  34. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/tests/cpasc.rs +0 -0
  35. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/tests/did.rs +0 -0
  36. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/tests/sc.rs +0 -0
  37. {panelkit-0.2.5 → panelkit-0.2.7}/crates/estimators/tests/sc_family.rs +0 -0
  38. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/Cargo.toml +0 -0
  39. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/src/diagnostics.rs +0 -0
  40. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/src/lib.rs +0 -0
  41. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/src/power.rs +0 -0
  42. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/src/selection.rs +0 -0
  43. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/src/types.rs +0 -0
  44. {panelkit-0.2.5 → panelkit-0.2.7}/crates/geo/tests/geo.rs +0 -0
  45. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/Cargo.toml +0 -0
  46. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/src/batch.rs +0 -0
  47. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/src/ci.rs +0 -0
  48. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/src/lib.rs +0 -0
  49. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/src/parallel.rs +0 -0
  50. {panelkit-0.2.5 → panelkit-0.2.7}/crates/inference/src/placebo.rs +0 -0
  51. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/Cargo.toml +0 -0
  52. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/error.rs +0 -0
  53. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/cholesky.rs +0 -0
  54. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/eig_sym.rs +0 -0
  55. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/mod.rs +0 -0
  56. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/randomized.rs +0 -0
  57. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/svd.rs +0 -0
  58. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/factor/svd_gram.rs +0 -0
  59. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/lib.rs +0 -0
  60. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/matrix.rs +0 -0
  61. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/ops/matmul.rs +0 -0
  62. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/ops/mod.rs +0 -0
  63. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/ops/norms.rs +0 -0
  64. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/ops/transform.rs +0 -0
  65. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/opt/mod.rs +0 -0
  66. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/opt/softthresh.rs +0 -0
  67. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/rng.rs +0 -0
  68. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/solve/lstsq.rs +0 -0
  69. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/solve/mod.rs +0 -0
  70. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/src/solve/spd.rs +0 -0
  71. {panelkit-0.2.5 → panelkit-0.2.7}/crates/linalg/tests/numerics.rs +0 -0
  72. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/Cargo.toml +0 -0
  73. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/src/api_did.rs +0 -0
  74. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/src/api_geo.rs +0 -0
  75. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/src/api_sc.rs +0 -0
  76. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/src/convert.rs +0 -0
  77. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/src/lib.rs +0 -0
  78. {panelkit-0.2.5 → panelkit-0.2.7}/crates/pypanelkit/src/results.rs +0 -0
  79. {panelkit-0.2.5 → panelkit-0.2.7}/python/panelkit/__init__.py +0 -0
  80. {panelkit-0.2.5 → panelkit-0.2.7}/python/panelkit/_panelkit.pyi +0 -0
  81. {panelkit-0.2.5 → panelkit-0.2.7}/python/panelkit/estimators.py +0 -0
  82. {panelkit-0.2.5 → panelkit-0.2.7}/python/panelkit/py.typed +0 -0
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
462
462
 
463
463
  [[package]]
464
464
  name = "panelkit-estimators"
465
- version = "0.2.5"
465
+ version = "0.2.7"
466
466
  dependencies = [
467
467
  "criterion",
468
468
  "panelkit-linalg",
@@ -471,7 +471,7 @@ dependencies = [
471
471
 
472
472
  [[package]]
473
473
  name = "panelkit-geo"
474
- version = "0.2.5"
474
+ version = "0.2.7"
475
475
  dependencies = [
476
476
  "panelkit-estimators",
477
477
  "panelkit-inference",
@@ -482,7 +482,7 @@ dependencies = [
482
482
 
483
483
  [[package]]
484
484
  name = "panelkit-inference"
485
- version = "0.2.5"
485
+ version = "0.2.7"
486
486
  dependencies = [
487
487
  "panelkit-estimators",
488
488
  "panelkit-linalg",
@@ -491,7 +491,7 @@ dependencies = [
491
491
 
492
492
  [[package]]
493
493
  name = "panelkit-linalg"
494
- version = "0.2.5"
494
+ version = "0.2.7"
495
495
  dependencies = [
496
496
  "proptest",
497
497
  "rayon",
@@ -623,7 +623,7 @@ dependencies = [
623
623
 
624
624
  [[package]]
625
625
  name = "pypanelkit"
626
- version = "0.2.5"
626
+ version = "0.2.7"
627
627
  dependencies = [
628
628
  "numpy",
629
629
  "panelkit-estimators",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/linalg", "crates/estimators", "crates/inference", "crates/geo", "crates/pypanelkit"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.2.5"
6
+ version = "0.2.7"
7
7
  edition = "2021"
8
8
  rust-version = "1.74"
9
9
  license = "MIT OR Apache-2.0"
@@ -300,10 +300,14 @@ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ C
300
300
  ev.lift, ev.cumulative, ev.significant
301
301
  ```
302
302
 
303
- Inference is **in-space placebo** (Abadie): every donor market is refit as if it
304
- were the treated one, and the spread of *their* post-period effects is the null
305
- reference — capturing out-of-sample extrapolation error, the real source of
306
- uncertainty. (A bootstrap of the treated unit's own post-period only sees
303
+ Inference defaults to **in-space placebo** (Abadie, `inference="placebo"`): every
304
+ donor market is refit as if it were the treated one, and the spread of *their*
305
+ post-period effects is the null reference — capturing out-of-sample extrapolation
306
+ error, the real source of uncertainty. A second engine, `inference="bootstrap"`,
307
+ uses a moving-block bootstrap of the pre-period residuals; it's serial-correlation
308
+ aware and works as a **fallback when the donor pool is too small for placebo**, but
309
+ it only sees in-sample noise, so it is *optimistic* (the report is flagged
310
+ `optimistic` and you shouldn't lean on it for significance). (A bootstrap of the treated unit's own post-period only sees
307
311
  in-sample noise and is wildly anti-conservative — on null data its 90% interval
308
312
  falsely flags an effect ~50% of the time; the placebo version sits at/below the
309
313
  nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelkit
3
- Version: 0.2.5
3
+ Version: 0.2.7
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Topic :: Scientific/Engineering
@@ -53,14 +53,13 @@ pub fn multiplier_bootstrap(
53
53
  }
54
54
 
55
55
  /// Jackknife (leave-one-out) standard error from a set of leave-one-out
56
- /// estimates and the full-sample estimate.
57
- pub fn jackknife_se(loo_estimates: &[f64], full: f64) -> f64 {
56
+ /// estimates: `sqrt((n-1)/n · Σ(θ_i − θ̄)²)`, centered on the LOO mean.
57
+ pub fn jackknife_se(loo_estimates: &[f64]) -> f64 {
58
58
  let n = loo_estimates.len();
59
59
  if n < 2 {
60
60
  return 0.0;
61
61
  }
62
62
  let mean = loo_estimates.iter().sum::<f64>() / n as f64;
63
- let _ = full;
64
63
  let ss: f64 = loo_estimates.iter().map(|x| (x - mean).powi(2)).sum();
65
64
  ((n as f64 - 1.0) / n as f64 * ss).sqrt()
66
65
  }
@@ -107,11 +107,11 @@ fn bootstrap_engines_thread_count_invariant() {
107
107
  #[test]
108
108
  fn jackknife_se_of_constant_is_zero() {
109
109
  let est = vec![2.0; 10];
110
- assert!(jackknife_se(&est, 2.0) < 1e-12);
110
+ assert!(jackknife_se(&est) < 1e-12);
111
111
  }
112
112
 
113
113
  #[test]
114
114
  fn jackknife_se_positive_for_varying() {
115
115
  let est = vec![1.0, 2.0, 3.0, 4.0, 5.0];
116
- assert!(jackknife_se(&est, 3.0) > 0.0);
116
+ assert!(jackknife_se(&est) > 0.0);
117
117
  }
@@ -92,8 +92,18 @@ impl Qr {
92
92
  }
93
93
 
94
94
  /// Back-substitute `R x = rhs[0..n]`, returning `x` (length `n`).
95
+ ///
96
+ /// Householder QR does not rank-reveal, so a rank-deficient design can leave a
97
+ /// (near-)zero pivot on the diagonal. Rather than emit `inf`/`NaN` (which would
98
+ /// silently poison downstream OLS coefficients), we zero that component — a
99
+ /// minimum-norm-style choice — using a relative pivot threshold.
95
100
  fn back_solve(&self, rhs: &[f64]) -> Vec<f64> {
96
101
  let n = self.n;
102
+ let mut max_diag = 0.0_f64;
103
+ for i in 0..n {
104
+ max_diag = max_diag.max(self.packed.get(i, i).abs());
105
+ }
106
+ let eps = 1e-12 * max_diag.max(1.0);
97
107
  let mut x = vec![0.0; n];
98
108
  for i in (0..n).rev() {
99
109
  let mut s = rhs[i];
@@ -101,7 +111,7 @@ impl Qr {
101
111
  s -= self.packed.get(i, k) * x[k];
102
112
  }
103
113
  let rii = self.packed.get(i, i);
104
- x[i] = s / rii;
114
+ x[i] = if rii.abs() > eps { s / rii } else { 0.0 };
105
115
  }
106
116
  x
107
117
  }
@@ -145,8 +145,13 @@ pub fn solve_fw(gram: &Mat, b: &[f64], eta: f64, max_iter: usize, tol: f64) -> S
145
145
  let dgd = dot(&d, &gd_vec);
146
146
  let gamma = if dgd > 0.0 {
147
147
  (-gd / dgd).clamp(0.0, gamma_max)
148
- } else {
148
+ } else if gd < 0.0 {
149
+ // Non-positive curvature along a descent direction → go to the
150
+ // feasible cap (bounded so the step never leaves the simplex).
149
151
  gamma_max.min(1.0)
152
+ } else {
153
+ // Not a descent direction → don't move.
154
+ 0.0
150
155
  };
151
156
  for i in 0..j {
152
157
  w[i] += gamma * d[i];
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "panelkit"
7
- version = "0.2.5"
7
+ version = "0.2.7"
8
8
  description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -53,6 +53,24 @@ def _ensemble_weight_arg(spec):
53
53
  return w
54
54
 
55
55
 
56
+ def _block_bootstrap_paths(pre_gaps, length, block_len, n_reps, seed):
57
+ """Moving-block bootstrap of the (centered) pre-period residuals into placebo
58
+ paths of ``length`` periods. Resampling whole blocks preserves the residual
59
+ autocorrelation. Returns an ``(n_reps, length)`` array (empty if no pre-period
60
+ or zero length)."""
61
+ g = np.asarray(pre_gaps, dtype=float)
62
+ m = len(g)
63
+ if m == 0 or length <= 0 or n_reps <= 0:
64
+ return np.empty((0, max(length, 0)))
65
+ g = g - g.mean() # null is "no effect" → center residuals
66
+ rng = np.random.default_rng(int(seed))
67
+ bl = max(1, min(int(block_len), m))
68
+ n_blocks = int(np.ceil(length / bl))
69
+ starts = rng.integers(0, m, size=(n_reps, n_blocks))
70
+ idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
71
+ return g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
72
+
73
+
56
74
  class _PowerReport:
57
75
  """Result of a power analysis across methods, with a report and plots."""
58
76
 
@@ -682,7 +700,10 @@ class GeoDesign:
682
700
  methods: Sequence[str] = _METHODS,
683
701
  weights="auto",
684
702
  level: float = 0.90,
703
+ inference: str = "placebo",
685
704
  max_placebo: int = 200,
705
+ n_boot: int = 2000,
706
+ block_len: int = 4,
686
707
  seed: int = 0,
687
708
  exclude=None,
688
709
  ) -> "_EvalReport":
@@ -693,13 +714,20 @@ class GeoDesign:
693
714
  post-period column), it fits SC / ASC / SDID, reports each one's effect,
694
715
  and combines them into a weighted-average **ensemble** estimate.
695
716
 
696
- Inference is **in-space placebo** (Abadie): every donor market is refit as
697
- if it were the treated one, and the spread of *their* post-period effects
698
- is the null reference. This captures out-of-sample extrapolation error —
699
- the dominant source of uncertainty so the intervals are calibrated
700
- (unlike a bootstrap of the treated unit's own post-period, which only sees
701
- in-sample noise and is far too narrow). Poorly-fit placebos (pre-period
702
- RMSPE > 2× the treated unit's) are dropped, per Abadie.
717
+ Two inference engines (``inference=``):
718
+
719
+ - ``"placebo"`` (default) **in-space placebo** (Abadie): every donor
720
+ market is refit as if it were treated, and the spread of *their*
721
+ post-period effects is the null. This captures out-of-sample
722
+ extrapolation error (the dominant uncertainty), so it is calibrated.
723
+ Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
724
+ dropped. Needs a reasonable donor pool to have power.
725
+ - ``"bootstrap"`` — a **moving-block bootstrap of the pre-period
726
+ residuals** (serial-correlation-aware). Useful as a within-sample noise
727
+ band and as a fallback when the donor pool is too small for placebo
728
+ inference, **but it is optimistic**: it only sees in-sample noise, not
729
+ extrapolation error, so do not rely on it for significance. The report
730
+ is flagged ``optimistic`` in this mode.
703
731
 
704
732
  Parameters
705
733
  ----------
@@ -733,7 +761,8 @@ class GeoDesign:
733
761
  if bad:
734
762
  raise ValueError(f"treated markets were also excluded: {bad}")
735
763
  return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
736
- level=level, max_placebo=max_placebo, seed=seed)
764
+ level=level, inference=inference, max_placebo=max_placebo,
765
+ n_boot=n_boot, block_len=block_len, seed=seed)
737
766
  idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
738
767
  names = [self.names[i] for i in idx]
739
768
  t0 = int(treat_start)
@@ -782,29 +811,55 @@ class GeoDesign:
782
811
  "pre_rmspe": float(fit.pre_rmspe),
783
812
  }
784
813
 
785
- # --- in-space placebo: refit each donor as if it were treated ---
786
- treated_set = set(idx)
787
- donors = [u for u in range(self.n) if u not in treated_set]
788
- if len(donors) > int(max_placebo):
789
- rng = np.random.default_rng(int(seed))
790
- donors = sorted(int(j) for j in rng.choice(donors, int(max_placebo), replace=False))
791
- pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
792
- for j in donors:
793
- for m in methods:
794
- fj = _fit(m, [j])
795
- pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
796
-
797
- # --- ensemble weights ---
798
- def _placebo_att_sd(m):
799
- if not pb[m]:
800
- return 1.0
801
- vals = np.array([p.mean() for (p, _) in pb[m]])
802
- return float(np.std(vals)) if len(vals) > 1 else 1.0
814
+ inference = str(inference).lower()
815
+ if inference not in ("placebo", "bootstrap"):
816
+ raise ValueError("inference must be 'placebo' or 'bootstrap'")
817
+ a = (1.0 - float(level)) / 2.0
818
+
819
+ def _ci(point, null_samples):
820
+ """Pivot CI: point estimate ± the null spread (null 0). Returns NaN
821
+ when there are too few null samples — never a fake zero-width CI."""
822
+ if len(null_samples) >= 2:
823
+ return point + float(np.quantile(null_samples, a)), \
824
+ point + float(np.quantile(null_samples, 1.0 - a))
825
+ return float("nan"), float("nan")
826
+
827
+ # --- engine: per-method null att-samples (+ donor placebo paths if used) ---
828
+ if inference == "placebo":
829
+ treated_set = set(idx)
830
+ donors = [u for u in range(self.n) if u not in treated_set]
831
+ if len(donors) > int(max_placebo):
832
+ rng = np.random.default_rng(int(seed))
833
+ donors = sorted(int(j) for j in
834
+ rng.choice(donors, int(max_placebo), replace=False))
835
+ pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
836
+ for j in donors:
837
+ for m in methods:
838
+ fj = _fit(m, [j])
839
+ pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
840
+
841
+ def _kept_att(samples, treated_pre_m):
842
+ keep = [p.mean() for (p, pre) in samples
843
+ if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
844
+ if len(keep) < 5 and samples:
845
+ keep = [p.mean() for (p, _) in samples]
846
+ return np.array(keep)
847
+ null_att = {m: _kept_att(pb[m], per[m]["pre_rmspe"]) for m in order}
848
+ else: # bootstrap of pre-period residuals
849
+ null_att = {}
850
+ for m in order:
851
+ pre_resid = treated_series[:t0] - per[m]["full_cf"][:t0]
852
+ Bm = _block_bootstrap_paths(pre_resid, post_len, block_len, n_boot, seed)
853
+ null_att[m] = Bm.mean(axis=1) if Bm.size else np.array([])
854
+
855
+ # --- ensemble weights (auto = inverse null-att variance per method) ---
856
+ def _null_sd(m):
857
+ v = null_att[m]
858
+ return float(np.std(v)) if len(v) > 1 else 1.0
803
859
  if isinstance(weights, str) and weights.lower() == "equal":
804
860
  wv = [1.0 / len(order)] * len(order)
805
861
  elif isinstance(weights, str) and weights.lower() == "auto":
806
- # inverse-variance from each method's placebo-null spread (precision)
807
- prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
862
+ prec = [1.0 / max(_null_sd(m) ** 2, 1e-300) for m in order]
808
863
  s = sum(prec)
809
864
  wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
810
865
  elif isinstance(weights, dict):
@@ -821,54 +876,41 @@ class GeoDesign:
821
876
  s = sum(raw)
822
877
  wv = [r / s for r in raw]
823
878
  wmap = dict(zip(order, wv))
824
- a = (1.0 - float(level)) / 2.0
825
879
 
826
- def _ci(point, null_samples):
827
- """Pivot CI: point estimate ± the placebo null spread (null ≈ 0).
828
- Returns NaN when there are too few placebos to form an interval —
829
- never a fake zero-width CI."""
830
- if len(null_samples) >= 2:
831
- return point + float(np.quantile(null_samples, a)), \
832
- point + float(np.quantile(null_samples, 1.0 - a))
833
- return float("nan"), float("nan")
834
-
835
- def _kept_att(samples, treated_pre_m):
836
- """Placebo att-means after the Abadie 2x pre-fit filter (fallback to
837
- all placebos if too few comparable ones survive)."""
838
- keep = [p.mean() for (p, pre) in samples
839
- if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
840
- if len(keep) < 5 and samples:
841
- keep = [p.mean() for (p, _) in samples]
842
- return np.array(keep)
843
-
844
- # --- per-method point CIs from each method's placebo att spread (same
845
- # 2x pre-fit filter as the ensemble, for internal consistency) ---
880
+ # --- per-method point CIs from each method's null att spread ---
846
881
  for m in order:
847
- mp = _kept_att(pb[m], per[m]["pre_rmspe"])
848
- lo, hi = _ci(per[m]["att"], mp)
882
+ lo, hi = _ci(per[m]["att"], null_att[m])
849
883
  cfm = per[m]["cf_mean"]
850
884
  per[m]["att_lo"], per[m]["att_hi"] = lo, hi
851
885
  per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
852
886
  per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
853
887
 
854
- # --- ensemble estimate + ensemble placebo paths (Abadie pre-fit filter) ---
888
+ # --- ensemble estimate ---
855
889
  ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
856
890
  ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
857
891
  ens_att = float(ens_path.mean())
858
- treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
859
-
860
- ens_pb = [] # (path, pre_rmspe)
861
- for di in range(len(donors)):
862
- path = sum(wmap[m] * pb[m][di][0] for m in order)
863
- pre = sum(wmap[m] * pb[m][di][1] for m in order)
864
- ens_pb.append((path, pre))
865
- kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
866
- if len(kept) < 5: # too few comparable placebos → use all
867
- kept = [p for (p, _) in ens_pb]
868
- pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
892
+ ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
893
+
894
+ # --- ensemble null-path matrix (engine-specific) ---
895
+ if inference == "placebo":
896
+ treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
897
+ ens_pb = []
898
+ for di in range(len(donors)):
899
+ path = sum(wmap[m] * pb[m][di][0] for m in order)
900
+ pre = sum(wmap[m] * pb[m][di][1] for m in order)
901
+ ens_pb.append((path, pre))
902
+ kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
903
+ if len(kept) < 5: # too few comparable placebos → use all
904
+ kept = [p for (p, _) in ens_pb]
905
+ pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
906
+ label = "in-space placebo"
907
+ else:
908
+ ens_pre = treated_series[:t0] - ens_full_cf[:t0]
909
+ pb_mat = _block_bootstrap_paths(ens_pre, post_len, block_len, n_boot, seed)
910
+ label = "block bootstrap"
869
911
  n_pb = pb_mat.shape[0]
870
912
 
871
- # pointwise + cumulative + mean CIs, all from the placebo null
913
+ # --- shared: pointwise / cumulative / mean CIs + p-value from the null ---
872
914
  if n_pb >= 2:
873
915
  point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
874
916
  point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
@@ -898,12 +940,14 @@ class GeoDesign:
898
940
  "lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
899
941
  "lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
900
942
  "cumulative": float(ens_path.sum()) * n_treated,
901
- "weights": wmap, "n_placebo": n_pb,
902
- "low_power": n_pb < 8, # too few placebos for reliable inference
943
+ "weights": wmap, "n_placebo": n_pb, "inference": label,
944
+ # placebo with too few donors is undefined/low-power; bootstrap is
945
+ # serial-correlation-aware but optimistic (in-sample noise only).
946
+ "low_power": (inference == "placebo" and n_pb < 8),
947
+ "optimistic": (inference == "bootstrap"),
903
948
  }
904
949
 
905
950
  # full-timeline counterfactual + gap path (pre shows fit; post = effect)
906
- ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
907
951
  full_gap = treated_series - ens_full_cf
908
952
  full_gap[t0:] = ens_path
909
953
  counterfactual = treated_series - full_gap
@@ -1114,12 +1158,17 @@ class _EvalReport:
1114
1158
  wstr = ", ".join(f"{m} {100*w:.0f}%" for m, w in e["weights"].items())
1115
1159
  lines.append(f" ensemble weights: {wstr}")
1116
1160
  lines.append("")
1161
+ engine = e.get("inference", "in-space placebo")
1162
+ unit = "draws" if engine == "block bootstrap" else "donors"
1117
1163
  if self.p_value is not None:
1118
- lines.append(f"In-space placebo p-value : {self.p_value:.3f} "
1119
- f"(ensemble, {e.get('n_placebo', 0)} donors)")
1164
+ lines.append(f"Placebo/bootstrap p-value : {self.p_value:.3f} "
1165
+ f"({engine}, {e.get('n_placebo', 0)} {unit})")
1120
1166
  if e.get("low_power"):
1121
1167
  lines.append("⚠ Few comparable donors — inference is low-powered; treat "
1122
1168
  "intervals/p-value with caution.")
1169
+ if e.get("optimistic"):
1170
+ lines.append("⚠ Bootstrap CIs see in-sample noise only (optimistic) — use "
1171
+ "inference='placebo' for significance when donors allow.")
1123
1172
  if self.significant:
1124
1173
  verdict = "✓ Significant lift — the ensemble interval excludes zero."
1125
1174
  elif not (np.isfinite(e["att_lo"]) and np.isfinite(e["att_hi"])):
@@ -1132,8 +1181,7 @@ class _EvalReport:
1132
1181
  f"{e['cumulative']:,.0f} cumulative incremental")
1133
1182
  if "cum_lo" in e:
1134
1183
  lines.append(f"Cumulative {cl}% CI : "
1135
- f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
1136
- f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
1184
+ f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] ({engine})")
1137
1185
  lines.append(verdict)
1138
1186
  lines.append("=" * 66)
1139
1187
  return "\n".join(lines)
@@ -1682,7 +1730,7 @@ def _plot_eval_timeline(rep: "_EvalReport", path):
1682
1730
  cum = e["cum_curve"]
1683
1731
  axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
1684
1732
  axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
1685
- alpha=0.15, label=f"{cl}% band (in-space placebo)")
1733
+ alpha=0.15, label=f"{cl}% band ({e.get('inference', 'in-space placebo')})")
1686
1734
  axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
1687
1735
  axc.axhline(0, color="#111827", lw=1.0)
1688
1736
  axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes