panelkit 0.2.4__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {panelkit-0.2.4 → panelkit-0.2.6}/Cargo.lock +5 -5
  2. {panelkit-0.2.4 → panelkit-0.2.6}/Cargo.toml +1 -1
  3. {panelkit-0.2.4 → panelkit-0.2.6}/GUIDE.md +8 -4
  4. {panelkit-0.2.4 → panelkit-0.2.6}/PKG-INFO +3 -3
  5. {panelkit-0.2.4 → panelkit-0.2.6}/README.md +2 -2
  6. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/sdid.rs +4 -0
  7. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/qr.rs +11 -1
  8. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/opt/simplex.rs +6 -4
  9. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/api_sc.rs +5 -3
  10. {panelkit-0.2.4 → panelkit-0.2.6}/pyproject.toml +1 -1
  11. {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/_panelkit.pyi +1 -1
  12. {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/design.py +152 -73
  13. {panelkit-0.2.4 → panelkit-0.2.6}/BENCHMARKS.md +0 -0
  14. {panelkit-0.2.4 → panelkit-0.2.6}/LICENSE-APACHE +0 -0
  15. {panelkit-0.2.4 → panelkit-0.2.6}/LICENSE-MIT +0 -0
  16. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/Cargo.toml +0 -0
  17. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/benches/estimators.rs +0 -0
  18. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/bacon.rs +0 -0
  19. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/callaway.rs +0 -0
  20. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/mod.rs +0 -0
  21. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/sunab.rs +0 -0
  22. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/twfe.rs +0 -0
  23. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/fe/mod.rs +0 -0
  24. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/fe/within.rs +0 -0
  25. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/lib.rs +0 -0
  26. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/mcnnm/mod.rs +0 -0
  27. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
  28. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/panel.rs +0 -0
  29. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/result.rs +0 -0
  30. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/augmented.rs +0 -0
  31. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/cpasc.rs +0 -0
  32. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/mod.rs +0 -0
  33. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/synthetic.rs +0 -0
  34. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/cpasc.rs +0 -0
  35. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/did.rs +0 -0
  36. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/sc.rs +0 -0
  37. {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/sc_family.rs +0 -0
  38. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/Cargo.toml +0 -0
  39. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/diagnostics.rs +0 -0
  40. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/lib.rs +0 -0
  41. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/power.rs +0 -0
  42. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/selection.rs +0 -0
  43. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/types.rs +0 -0
  44. {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/tests/geo.rs +0 -0
  45. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/Cargo.toml +0 -0
  46. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/batch.rs +0 -0
  47. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/bootstrap.rs +0 -0
  48. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/ci.rs +0 -0
  49. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/lib.rs +0 -0
  50. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/parallel.rs +0 -0
  51. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/placebo.rs +0 -0
  52. {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/tests/inference.rs +0 -0
  53. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/Cargo.toml +0 -0
  54. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/error.rs +0 -0
  55. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/cholesky.rs +0 -0
  56. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/eig_sym.rs +0 -0
  57. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/mod.rs +0 -0
  58. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/randomized.rs +0 -0
  59. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/svd.rs +0 -0
  60. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/svd_gram.rs +0 -0
  61. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/lib.rs +0 -0
  62. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/matrix.rs +0 -0
  63. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/matmul.rs +0 -0
  64. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/mod.rs +0 -0
  65. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/norms.rs +0 -0
  66. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/transform.rs +0 -0
  67. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/opt/mod.rs +0 -0
  68. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/opt/softthresh.rs +0 -0
  69. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/rng.rs +0 -0
  70. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/solve/lstsq.rs +0 -0
  71. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/solve/mod.rs +0 -0
  72. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/solve/spd.rs +0 -0
  73. {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/tests/numerics.rs +0 -0
  74. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/Cargo.toml +0 -0
  75. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/api_did.rs +0 -0
  76. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/api_geo.rs +0 -0
  77. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/convert.rs +0 -0
  78. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/lib.rs +0 -0
  79. {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/results.rs +0 -0
  80. {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/__init__.py +0 -0
  81. {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/estimators.py +0 -0
  82. {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/py.typed +0 -0
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
462
462
 
463
463
  [[package]]
464
464
  name = "panelkit-estimators"
465
- version = "0.2.4"
465
+ version = "0.2.6"
466
466
  dependencies = [
467
467
  "criterion",
468
468
  "panelkit-linalg",
@@ -471,7 +471,7 @@ dependencies = [
471
471
 
472
472
  [[package]]
473
473
  name = "panelkit-geo"
474
- version = "0.2.4"
474
+ version = "0.2.6"
475
475
  dependencies = [
476
476
  "panelkit-estimators",
477
477
  "panelkit-inference",
@@ -482,7 +482,7 @@ dependencies = [
482
482
 
483
483
  [[package]]
484
484
  name = "panelkit-inference"
485
- version = "0.2.4"
485
+ version = "0.2.6"
486
486
  dependencies = [
487
487
  "panelkit-estimators",
488
488
  "panelkit-linalg",
@@ -491,7 +491,7 @@ dependencies = [
491
491
 
492
492
  [[package]]
493
493
  name = "panelkit-linalg"
494
- version = "0.2.4"
494
+ version = "0.2.6"
495
495
  dependencies = [
496
496
  "proptest",
497
497
  "rayon",
@@ -623,7 +623,7 @@ dependencies = [
623
623
 
624
624
  [[package]]
625
625
  name = "pypanelkit"
626
- version = "0.2.4"
626
+ version = "0.2.6"
627
627
  dependencies = [
628
628
  "numpy",
629
629
  "panelkit-estimators",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/linalg", "crates/estimators", "crates/inference", "crates/geo", "crates/pypanelkit"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.2.4"
6
+ version = "0.2.6"
7
7
  edition = "2021"
8
8
  rust-version = "1.74"
9
9
  license = "MIT OR Apache-2.0"
@@ -300,10 +300,14 @@ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ C
300
300
  ev.lift, ev.cumulative, ev.significant
301
301
  ```
302
302
 
303
- Inference is **in-space placebo** (Abadie): every donor market is refit as if it
304
- were the treated one, and the spread of *their* post-period effects is the null
305
- reference — capturing out-of-sample extrapolation error, the real source of
306
- uncertainty. (A bootstrap of the treated unit's own post-period only sees
303
+ Inference defaults to **in-space placebo** (Abadie, `inference="placebo"`): every
304
+ donor market is refit as if it were the treated one, and the spread of *their*
305
+ post-period effects is the null reference — capturing out-of-sample extrapolation
306
+ error, the real source of uncertainty. A second engine, `inference="bootstrap"`,
307
+ uses a moving-block bootstrap of the pre-period residuals; it's serial-correlation
308
+ aware and works as a **fallback when the donor pool is too small for placebo**, but
309
+ it only sees in-sample noise, so it is *optimistic* (the report is flagged
310
+ `optimistic` and you shouldn't lean on it for significance). (A bootstrap of the treated unit's own post-period only sees
307
311
  in-sample noise and is wildly anti-conservative — on null data its 90% interval
308
312
  falsely flags an effect ~50% of the time; the placebo version sits at/below the
309
313
  nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelkit
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Topic :: Scientific/Engineering
@@ -274,7 +274,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
274
274
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
275
275
  them into a weighted-average **ensemble** estimate, and report each one's lift,
276
276
  confidence interval (in-space placebo), and cumulative incremental —
277
- with an SC in-space placebo p-value:
277
+ with an in-space placebo p-value:
278
278
 
279
279
  ![test evaluation](assets/geo_evaluate.png)
280
280
 
@@ -316,7 +316,7 @@ What you get out of the box:
316
316
  - **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
317
317
  with auto inverse-variance weights) for a steadier estimate than any one method.
318
318
  - **Post-test evaluation** — `evaluate()` measures a test that already ran:
319
- per-method + ensemble lift, bootstrap CIs, cumulative incremental, and a p-value.
319
+ per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
320
320
 
321
321
  See [`examples/geo_demo.py`](examples/geo_demo.py).
322
322
 
@@ -244,7 +244,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
244
244
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
245
245
  them into a weighted-average **ensemble** estimate, and report each one's lift,
246
246
  confidence interval (in-space placebo), and cumulative incremental —
247
- with an SC in-space placebo p-value:
247
+ with an in-space placebo p-value:
248
248
 
249
249
  ![test evaluation](assets/geo_evaluate.png)
250
250
 
@@ -286,7 +286,7 @@ What you get out of the box:
286
286
  - **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
287
287
  with auto inverse-variance weights) for a steadier estimate than any one method.
288
288
  - **Post-test evaluation** — `evaluate()` measures a test that already ran:
289
- per-method + ensemble lift, bootstrap CIs, cumulative incremental, and a p-value.
289
+ per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
290
290
 
291
291
  See [`examples/geo_demo.py`](examples/geo_demo.py).
292
292
 
@@ -87,6 +87,10 @@ pub fn fit_at(panel: &Panel, t0: usize, cfg: SdidConfig) -> ScFit {
87
87
  let t = panel.n_periods();
88
88
  let t_pre = t0;
89
89
  let t_post = t - t0;
90
+ assert!(
91
+ t_pre >= 1 && t_post >= 1,
92
+ "SDID needs at least one pre- and one post-period (t0 in 1..n_periods)"
93
+ );
90
94
  let n_tr = treated.len();
91
95
 
92
96
  // Treated-average series.
@@ -92,8 +92,18 @@ impl Qr {
92
92
  }
93
93
 
94
94
  /// Back-substitute `R x = rhs[0..n]`, returning `x` (length `n`).
95
+ ///
96
+ /// Householder QR does not rank-reveal, so a rank-deficient design can leave a
97
+ /// (near-)zero pivot on the diagonal. Rather than emit `inf`/`NaN` (which would
98
+ /// silently poison downstream OLS coefficients), we zero that component — a
99
+ /// minimum-norm-style choice — using a relative pivot threshold.
95
100
  fn back_solve(&self, rhs: &[f64]) -> Vec<f64> {
96
101
  let n = self.n;
102
+ let mut max_diag = 0.0_f64;
103
+ for i in 0..n {
104
+ max_diag = max_diag.max(self.packed.get(i, i).abs());
105
+ }
106
+ let eps = 1e-12 * max_diag.max(1.0);
97
107
  let mut x = vec![0.0; n];
98
108
  for i in (0..n).rev() {
99
109
  let mut s = rhs[i];
@@ -101,7 +111,7 @@ impl Qr {
101
111
  s -= self.packed.get(i, k) * x[k];
102
112
  }
103
113
  let rii = self.packed.get(i, i);
104
- x[i] = s / rii;
114
+ x[i] = if rii.abs() > eps { s / rii } else { 0.0 };
105
115
  }
106
116
  x
107
117
  }
@@ -30,17 +30,14 @@ pub fn project_simplex(v: &[f64]) -> Vec<f64> {
30
30
  let mut u = v.to_vec();
31
31
  u.sort_by(|a, b| b.partial_cmp(a).unwrap()); // descending
32
32
  let mut css = 0.0;
33
- let mut rho = 0usize;
34
33
  let mut theta = 0.0;
35
34
  for (j, &uj) in u.iter().enumerate() {
36
35
  css += uj;
37
36
  let t = (css - 1.0) / (j as f64 + 1.0);
38
37
  if uj - t > 0.0 {
39
- rho = j + 1;
40
38
  theta = t;
41
39
  }
42
40
  }
43
- let _ = rho;
44
41
  v.iter().map(|&vi| (vi - theta).max(0.0)).collect()
45
42
  }
46
43
 
@@ -148,8 +145,13 @@ pub fn solve_fw(gram: &Mat, b: &[f64], eta: f64, max_iter: usize, tol: f64) -> S
148
145
  let dgd = dot(&d, &gd_vec);
149
146
  let gamma = if dgd > 0.0 {
150
147
  (-gd / dgd).clamp(0.0, gamma_max)
151
- } else {
148
+ } else if gd < 0.0 {
149
+ // Non-positive curvature along a descent direction → go to the
150
+ // feasible cap (bounded so the step never leaves the simplex).
152
151
  gamma_max.min(1.0)
152
+ } else {
153
+ // Not a descent direction → don't move.
154
+ 0.0
153
155
  };
154
156
  for i in 0..j {
155
157
  w[i] += gamma * d[i];
@@ -111,13 +111,15 @@ pub fn fit_sdid(
111
111
  /// Fit Matrix-Completion NNM (Athey et al. 2021). `max_rank`, when set, uses a
112
112
  /// fast randomized truncated SVD inside SoftImpute (big speedup, low-rank cap).
113
113
  #[pyfunction]
114
- #[pyo3(signature = (y, treated, treat_time, lambda=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
114
+ // `lambda_` (not `lambda`) so it is usable as a Python keyword argument —
115
+ // `lambda` is a reserved word in Python.
116
+ #[pyo3(signature = (y, treated, treat_time, lambda_=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
115
117
  #[allow(clippy::too_many_arguments)]
116
118
  pub fn fit_mcnnm(
117
119
  y: PyReadonlyArray2<f64>,
118
120
  treated: Vec<usize>,
119
121
  treat_time: usize,
120
- lambda: Option<f64>,
122
+ lambda_: Option<f64>,
121
123
  max_iter: usize,
122
124
  tol: f64,
123
125
  seed: u64,
@@ -125,7 +127,7 @@ pub fn fit_mcnnm(
125
127
  ) -> PyResult<PyScResult> {
126
128
  let panel = Panel::block(mat_from_numpy(&y), &treated, treat_time);
127
129
  let cfg = McnnmConfig {
128
- lambda,
130
+ lambda: lambda_,
129
131
  max_iter,
130
132
  tol,
131
133
  seed,
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "panelkit"
7
- version = "0.2.4"
7
+ version = "0.2.6"
8
8
  description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -82,7 +82,7 @@ def fit_mcnnm(
82
82
  y: npt.NDArray[np.float64],
83
83
  treated: Sequence[int],
84
84
  treat_time: int,
85
- lambda_: Optional[float] = ...,
85
+ lambda_: Optional[float] = ..., # NOTE: matches the Rust binding's `lambda_`
86
86
  max_iter: int = ...,
87
87
  tol: float = ...,
88
88
  seed: int = ...,
@@ -42,7 +42,8 @@ def _ensemble_weight_arg(spec):
42
42
  raise ValueError(f"unknown ensemble_weights {spec!r} (use 'auto', 'equal', "
43
43
  "a dict, or a 3-list)")
44
44
  if isinstance(spec, dict):
45
- w = [float(spec.get(m, spec.get(m.lower(), 0.0))) for m in _ENSEMBLE_ORDER]
45
+ norm = {str(k).upper(): v for k, v in spec.items()} # case-insensitive keys
46
+ w = [float(norm.get(m, 0.0)) for m in _ENSEMBLE_ORDER]
46
47
  else:
47
48
  w = [float(x) for x in spec]
48
49
  if len(w) != 3:
@@ -52,6 +53,24 @@ def _ensemble_weight_arg(spec):
52
53
  return w
53
54
 
54
55
 
56
+ def _block_bootstrap_paths(pre_gaps, length, block_len, n_reps, seed):
57
+ """Moving-block bootstrap of the (centered) pre-period residuals into placebo
58
+ paths of ``length`` periods. Resampling whole blocks preserves the residual
59
+ autocorrelation. Returns an ``(n_reps, length)`` array (empty if no pre-period
60
+ or zero length)."""
61
+ g = np.asarray(pre_gaps, dtype=float)
62
+ m = len(g)
63
+ if m == 0 or length <= 0 or n_reps <= 0:
64
+ return np.empty((0, max(length, 0)))
65
+ g = g - g.mean() # null is "no effect" → center residuals
66
+ rng = np.random.default_rng(int(seed))
67
+ bl = max(1, min(int(block_len), m))
68
+ n_blocks = int(np.ceil(length / bl))
69
+ starts = rng.integers(0, m, size=(n_reps, n_blocks))
70
+ idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
71
+ return g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
72
+
73
+
55
74
  class _PowerReport:
56
75
  """Result of a power analysis across methods, with a report and plots."""
57
76
 
@@ -407,7 +426,7 @@ class GeoDesign:
407
426
  target_power=target_power, recommended=recommended,
408
427
  lookback=lookback, ensemble=ensemble,
409
428
  ensemble_weights=ensemble_weights)
410
- idx = self._resolve(treated)
429
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
411
430
  names = [self.names[i] for i in idx]
412
431
  lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
413
432
  if 0.0 not in lifts:
@@ -443,7 +462,7 @@ class GeoDesign:
443
462
  if bad:
444
463
  raise ValueError(f"treated markets were also excluded: {bad}")
445
464
  return sub.diagnose(tnames, test_len)
446
- idx = self._resolve(treated)
465
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
447
466
  names = [self.names[i] for i in idx]
448
467
  t0 = self.t - int(test_len)
449
468
  diag = _panelkit.geo_diagnostics(self.Y, idx, int(test_len))
@@ -681,7 +700,10 @@ class GeoDesign:
681
700
  methods: Sequence[str] = _METHODS,
682
701
  weights="auto",
683
702
  level: float = 0.90,
703
+ inference: str = "placebo",
684
704
  max_placebo: int = 200,
705
+ n_boot: int = 2000,
706
+ block_len: int = 4,
685
707
  seed: int = 0,
686
708
  exclude=None,
687
709
  ) -> "_EvalReport":
@@ -692,13 +714,20 @@ class GeoDesign:
692
714
  post-period column), it fits SC / ASC / SDID, reports each one's effect,
693
715
  and combines them into a weighted-average **ensemble** estimate.
694
716
 
695
- Inference is **in-space placebo** (Abadie): every donor market is refit as
696
- if it were the treated one, and the spread of *their* post-period effects
697
- is the null reference. This captures out-of-sample extrapolation error —
698
- the dominant source of uncertainty so the intervals are calibrated
699
- (unlike a bootstrap of the treated unit's own post-period, which only sees
700
- in-sample noise and is far too narrow). Poorly-fit placebos (pre-period
701
- RMSPE > 2× the treated unit's) are dropped, per Abadie.
717
+ Two inference engines (``inference=``):
718
+
719
+ - ``"placebo"`` (default) **in-space placebo** (Abadie): every donor
720
+ market is refit as if it were treated, and the spread of *their*
721
+ post-period effects is the null. This captures out-of-sample
722
+ extrapolation error (the dominant uncertainty), so it is calibrated.
723
+ Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
724
+ dropped. Needs a reasonable donor pool to have power.
725
+ - ``"bootstrap"`` — a **moving-block bootstrap of the pre-period
726
+ residuals** (serial-correlation-aware). Useful as a within-sample noise
727
+ band and as a fallback when the donor pool is too small for placebo
728
+ inference, **but it is optimistic**: it only sees in-sample noise, not
729
+ extrapolation error, so do not rely on it for significance. The report
730
+ is flagged ``optimistic`` in this mode.
702
731
 
703
732
  Parameters
704
733
  ----------
@@ -732,8 +761,9 @@ class GeoDesign:
732
761
  if bad:
733
762
  raise ValueError(f"treated markets were also excluded: {bad}")
734
763
  return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
735
- level=level, max_placebo=max_placebo, seed=seed)
736
- idx = self._resolve(treated)
764
+ level=level, inference=inference, max_placebo=max_placebo,
765
+ n_boot=n_boot, block_len=block_len, seed=seed)
766
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
737
767
  names = [self.names[i] for i in idx]
738
768
  t0 = int(treat_start)
739
769
  if not (1 <= t0 < self.t):
@@ -781,33 +811,60 @@ class GeoDesign:
781
811
  "pre_rmspe": float(fit.pre_rmspe),
782
812
  }
783
813
 
784
- # --- in-space placebo: refit each donor as if it were treated ---
785
- treated_set = set(idx)
786
- donors = [u for u in range(self.n) if u not in treated_set]
787
- if len(donors) > int(max_placebo):
788
- rng = np.random.default_rng(int(seed))
789
- donors = sorted(int(j) for j in rng.choice(donors, int(max_placebo), replace=False))
790
- pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
791
- for j in donors:
792
- for m in methods:
793
- fj = _fit(m, [j])
794
- pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
795
-
796
- # --- ensemble weights ---
797
- def _placebo_att_sd(m):
798
- if not pb[m]:
799
- return 1.0
800
- vals = np.array([p.mean() for (p, _) in pb[m]])
801
- return float(np.std(vals)) if len(vals) > 1 else 1.0
814
+ inference = str(inference).lower()
815
+ if inference not in ("placebo", "bootstrap"):
816
+ raise ValueError("inference must be 'placebo' or 'bootstrap'")
817
+ a = (1.0 - float(level)) / 2.0
818
+
819
+ def _ci(point, null_samples):
820
+ """Pivot CI: point estimate ± the null spread (null 0). Returns NaN
821
+ when there are too few null samples — never a fake zero-width CI."""
822
+ if len(null_samples) >= 2:
823
+ return point + float(np.quantile(null_samples, a)), \
824
+ point + float(np.quantile(null_samples, 1.0 - a))
825
+ return float("nan"), float("nan")
826
+
827
+ # --- engine: per-method null att-samples (+ donor placebo paths if used) ---
828
+ if inference == "placebo":
829
+ treated_set = set(idx)
830
+ donors = [u for u in range(self.n) if u not in treated_set]
831
+ if len(donors) > int(max_placebo):
832
+ rng = np.random.default_rng(int(seed))
833
+ donors = sorted(int(j) for j in
834
+ rng.choice(donors, int(max_placebo), replace=False))
835
+ pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
836
+ for j in donors:
837
+ for m in methods:
838
+ fj = _fit(m, [j])
839
+ pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
840
+
841
+ def _kept_att(samples, treated_pre_m):
842
+ keep = [p.mean() for (p, pre) in samples
843
+ if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
844
+ if len(keep) < 5 and samples:
845
+ keep = [p.mean() for (p, _) in samples]
846
+ return np.array(keep)
847
+ null_att = {m: _kept_att(pb[m], per[m]["pre_rmspe"]) for m in order}
848
+ else: # bootstrap of pre-period residuals
849
+ null_att = {}
850
+ for m in order:
851
+ pre_resid = treated_series[:t0] - per[m]["full_cf"][:t0]
852
+ Bm = _block_bootstrap_paths(pre_resid, post_len, block_len, n_boot, seed)
853
+ null_att[m] = Bm.mean(axis=1) if Bm.size else np.array([])
854
+
855
+ # --- ensemble weights (auto = inverse null-att variance per method) ---
856
+ def _null_sd(m):
857
+ v = null_att[m]
858
+ return float(np.std(v)) if len(v) > 1 else 1.0
802
859
  if isinstance(weights, str) and weights.lower() == "equal":
803
860
  wv = [1.0 / len(order)] * len(order)
804
861
  elif isinstance(weights, str) and weights.lower() == "auto":
805
- # inverse-variance from each method's placebo-null spread (precision)
806
- prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
862
+ prec = [1.0 / max(_null_sd(m) ** 2, 1e-300) for m in order]
807
863
  s = sum(prec)
808
864
  wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
809
865
  elif isinstance(weights, dict):
810
- raw = [float(weights.get(m, weights.get(m.lower(), 0.0))) for m in order]
866
+ norm = {str(k).upper(): v for k, v in weights.items()} # case-insensitive
867
+ raw = [float(norm.get(m, 0.0)) for m in order]
811
868
  s = sum(raw)
812
869
  if s <= 0:
813
870
  raise ValueError("ensemble weights must sum to > 0")
@@ -819,42 +876,41 @@ class GeoDesign:
819
876
  s = sum(raw)
820
877
  wv = [r / s for r in raw]
821
878
  wmap = dict(zip(order, wv))
822
- a = (1.0 - float(level)) / 2.0
823
879
 
824
- def _ci(point, null_samples):
825
- """Pivot CI: point estimate ± the placebo null spread (null ≈ 0)."""
826
- if len(null_samples) >= 2:
827
- return point + float(np.quantile(null_samples, a)), \
828
- point + float(np.quantile(null_samples, 1.0 - a))
829
- return point, point
830
-
831
- # --- per-method point CIs from each method's placebo att spread ---
880
+ # --- per-method point CIs from each method's null att spread ---
832
881
  for m in order:
833
- mp = np.array([p.mean() for (p, _) in pb[m]]) if pb[m] else np.array([])
834
- lo, hi = _ci(per[m]["att"], mp)
882
+ lo, hi = _ci(per[m]["att"], null_att[m])
835
883
  cfm = per[m]["cf_mean"]
836
884
  per[m]["att_lo"], per[m]["att_hi"] = lo, hi
837
885
  per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
838
886
  per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
839
887
 
840
- # --- ensemble estimate + ensemble placebo paths (Abadie pre-fit filter) ---
888
+ # --- ensemble estimate ---
841
889
  ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
842
890
  ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
843
891
  ens_att = float(ens_path.mean())
844
- treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
845
-
846
- ens_pb = [] # (path, pre_rmspe)
847
- for di in range(len(donors)):
848
- path = sum(wmap[m] * pb[m][di][0] for m in order)
849
- pre = sum(wmap[m] * pb[m][di][1] for m in order)
850
- ens_pb.append((path, pre))
851
- kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
852
- if len(kept) < 5: # too few comparable placebos → use all
853
- kept = [p for (p, _) in ens_pb]
854
- pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
892
+ ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
893
+
894
+ # --- ensemble null-path matrix (engine-specific) ---
895
+ if inference == "placebo":
896
+ treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
897
+ ens_pb = []
898
+ for di in range(len(donors)):
899
+ path = sum(wmap[m] * pb[m][di][0] for m in order)
900
+ pre = sum(wmap[m] * pb[m][di][1] for m in order)
901
+ ens_pb.append((path, pre))
902
+ kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
903
+ if len(kept) < 5: # too few comparable placebos → use all
904
+ kept = [p for (p, _) in ens_pb]
905
+ pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
906
+ label = "in-space placebo"
907
+ else:
908
+ ens_pre = treated_series[:t0] - ens_full_cf[:t0]
909
+ pb_mat = _block_bootstrap_paths(ens_pre, post_len, block_len, n_boot, seed)
910
+ label = "block bootstrap"
855
911
  n_pb = pb_mat.shape[0]
856
912
 
857
- # pointwise + cumulative + mean CIs, all from the placebo null
913
+ # --- shared: pointwise / cumulative / mean CIs + p-value from the null ---
858
914
  if n_pb >= 2:
859
915
  point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
860
916
  point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
@@ -866,10 +922,12 @@ class GeoDesign:
866
922
  pb_att = pb_mat.mean(axis=1)
867
923
  p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
868
924
  else:
869
- point_lo = point_hi = ens_path.copy()
870
- point_hw = 0.0
925
+ # too few comparable placebos → inference undefined (no fake band)
871
926
  run = np.cumsum(ens_path)
872
- cum_lo_band = cum_hi_band = np.zeros(post_len)
927
+ point_lo = np.full(post_len, np.nan)
928
+ point_hi = np.full(post_len, np.nan)
929
+ point_hw = 0.0
930
+ cum_lo_band = cum_hi_band = np.full(post_len, np.nan)
873
931
  pb_att = np.array([])
874
932
  p_value = None
875
933
  att_lo, att_hi = _ci(ens_att, pb_att)
@@ -882,11 +940,14 @@ class GeoDesign:
882
940
  "lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
883
941
  "lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
884
942
  "cumulative": float(ens_path.sum()) * n_treated,
885
- "weights": wmap, "n_placebo": n_pb,
943
+ "weights": wmap, "n_placebo": n_pb, "inference": label,
944
+ # placebo with too few donors is undefined/low-power; bootstrap is
945
+ # serial-correlation-aware but optimistic (in-sample noise only).
946
+ "low_power": (inference == "placebo" and n_pb < 8),
947
+ "optimistic": (inference == "bootstrap"),
886
948
  }
887
949
 
888
950
  # full-timeline counterfactual + gap path (pre shows fit; post = effect)
889
- ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
890
951
  full_gap = treated_series - ens_full_cf
891
952
  full_gap[t0:] = ens_path
892
953
  counterfactual = treated_series - full_gap
@@ -1000,11 +1061,14 @@ class _MultiCellReport:
1000
1061
  f"({', '.join(map(str, self.cells))})")
1001
1062
  lines.append(f"Test duration : {self.test_len} periods")
1002
1063
  lines.append(f"Shared donor pool : {len(self.donor_names)} markets")
1003
- lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume")
1064
+ lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume "
1065
+ f"(all cells together)")
1004
1066
  lines.append(f"Powered at {int(100*self.target_power)}% power, "
1005
1067
  f"{int(100*(1-self.alpha))}% confidence "
1006
1068
  f"(each cell vs. the shared pool).")
1007
1069
  lines.append("")
1070
+ # Per-cell 'Holdout' is that cell's share of its OWN sub-panel (cell +
1071
+ # shared donors); the Combined holdout above is over the full panel.
1008
1072
  lines.append(f"{'Cell':<14}{'Markets':<28}{'MDE':>8}{'Conf':>7}{'Holdout':>9}")
1009
1073
  lines.append("-" * 64)
1010
1074
  for label, rep in self.cells.items():
@@ -1069,8 +1133,11 @@ class _EvalReport:
1069
1133
 
1070
1134
  @property
1071
1135
  def significant(self):
1072
- """True if the ensemble CI excludes zero (effect detected)."""
1136
+ """True if the ensemble CI is well-defined and excludes zero. Returns
1137
+ False when inference is undefined (too few placebos → NaN interval)."""
1073
1138
  lo, hi = self.ensemble["att_lo"], self.ensemble["att_hi"]
1139
+ if not (np.isfinite(lo) and np.isfinite(hi)):
1140
+ return False
1074
1141
  return (lo > 0) or (hi < 0)
1075
1142
 
1076
1143
  def summary(self) -> str:
@@ -1091,18 +1158,30 @@ class _EvalReport:
1091
1158
  wstr = ", ".join(f"{m} {100*w:.0f}%" for m, w in e["weights"].items())
1092
1159
  lines.append(f" ensemble weights: {wstr}")
1093
1160
  lines.append("")
1161
+ engine = e.get("inference", "in-space placebo")
1162
+ unit = "draws" if engine == "block bootstrap" else "donors"
1094
1163
  if self.p_value is not None:
1095
- lines.append(f"SC in-space placebo p-value : {self.p_value:.3f}")
1096
- verdict = ("✓ Significant lift — the ensemble interval excludes zero."
1097
- if self.significant else
1098
- "~ Not distinguishable from zero at this level the ensemble "
1099
- "interval includes zero.")
1164
+ lines.append(f"Placebo/bootstrap p-value : {self.p_value:.3f} "
1165
+ f"({engine}, {e.get('n_placebo', 0)} {unit})")
1166
+ if e.get("low_power"):
1167
+ lines.append(" Few comparable donors inference is low-powered; treat "
1168
+ "intervals/p-value with caution.")
1169
+ if e.get("optimistic"):
1170
+ lines.append("⚠ Bootstrap CIs see in-sample noise only (optimistic) — use "
1171
+ "inference='placebo' for significance when donors allow.")
1172
+ if self.significant:
1173
+ verdict = "✓ Significant lift — the ensemble interval excludes zero."
1174
+ elif not (np.isfinite(e["att_lo"]) and np.isfinite(e["att_hi"])):
1175
+ verdict = ("? Inference undefined — too few comparable donor placebos "
1176
+ "to form an interval.")
1177
+ else:
1178
+ verdict = ("~ Not distinguishable from zero at this level — the ensemble "
1179
+ "interval includes zero.")
1100
1180
  lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
1101
1181
  f"{e['cumulative']:,.0f} cumulative incremental")
1102
1182
  if "cum_lo" in e:
1103
1183
  lines.append(f"Cumulative {cl}% CI : "
1104
- f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
1105
- f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
1184
+ f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] ({engine})")
1106
1185
  lines.append(verdict)
1107
1186
  lines.append("=" * 66)
1108
1187
  return "\n".join(lines)
@@ -1588,7 +1667,7 @@ def _plot_eval(rep: "_EvalReport", path):
1588
1667
  axc.set_title("Lift by method", fontweight="bold")
1589
1668
  axc.grid(True, axis="x", alpha=0.25)
1590
1669
 
1591
- pv = f" · SC placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
1670
+ pv = f" · placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
1592
1671
  verdict = "significant" if rep.significant else "not significant"
1593
1672
  fig.suptitle(f"panelkit · test evaluation — ensemble lift "
1594
1673
  f"{100*rep.ensemble['lift']:+.2f}% ({verdict}){pv}",
@@ -1651,7 +1730,7 @@ def _plot_eval_timeline(rep: "_EvalReport", path):
1651
1730
  cum = e["cum_curve"]
1652
1731
  axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
1653
1732
  axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
1654
- alpha=0.15, label=f"{cl}% band (in-space placebo)")
1733
+ alpha=0.15, label=f"{cl}% band ({e.get('inference', 'in-space placebo')})")
1655
1734
  axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
1656
1735
  axc.axhline(0, color="#111827", lw=1.0)
1657
1736
  axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
File without changes
File without changes
File without changes
File without changes
File without changes