panelkit 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {panelkit-0.2.3 → panelkit-0.2.5}/Cargo.lock +5 -5
  2. {panelkit-0.2.3 → panelkit-0.2.5}/Cargo.toml +1 -1
  3. {panelkit-0.2.3 → panelkit-0.2.5}/GUIDE.md +17 -11
  4. {panelkit-0.2.3 → panelkit-0.2.5}/PKG-INFO +4 -4
  5. {panelkit-0.2.3 → panelkit-0.2.5}/README.md +3 -3
  6. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/sdid.rs +4 -0
  7. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/opt/simplex.rs +0 -3
  8. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/api_sc.rs +5 -3
  9. {panelkit-0.2.3 → panelkit-0.2.5}/pyproject.toml +1 -1
  10. {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/_panelkit.pyi +1 -1
  11. {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/design.py +173 -123
  12. {panelkit-0.2.3 → panelkit-0.2.5}/BENCHMARKS.md +0 -0
  13. {panelkit-0.2.3 → panelkit-0.2.5}/LICENSE-APACHE +0 -0
  14. {panelkit-0.2.3 → panelkit-0.2.5}/LICENSE-MIT +0 -0
  15. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/Cargo.toml +0 -0
  16. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/benches/estimators.rs +0 -0
  17. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/bacon.rs +0 -0
  18. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/callaway.rs +0 -0
  19. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/mod.rs +0 -0
  20. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/sunab.rs +0 -0
  21. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/twfe.rs +0 -0
  22. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/fe/mod.rs +0 -0
  23. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/fe/within.rs +0 -0
  24. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/lib.rs +0 -0
  25. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/mcnnm/mod.rs +0 -0
  26. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
  27. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/panel.rs +0 -0
  28. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/result.rs +0 -0
  29. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/augmented.rs +0 -0
  30. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/cpasc.rs +0 -0
  31. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/mod.rs +0 -0
  32. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/synthetic.rs +0 -0
  33. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/cpasc.rs +0 -0
  34. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/did.rs +0 -0
  35. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/sc.rs +0 -0
  36. {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/sc_family.rs +0 -0
  37. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/Cargo.toml +0 -0
  38. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/diagnostics.rs +0 -0
  39. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/lib.rs +0 -0
  40. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/power.rs +0 -0
  41. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/selection.rs +0 -0
  42. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/types.rs +0 -0
  43. {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/tests/geo.rs +0 -0
  44. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/Cargo.toml +0 -0
  45. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/batch.rs +0 -0
  46. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/bootstrap.rs +0 -0
  47. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/ci.rs +0 -0
  48. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/lib.rs +0 -0
  49. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/parallel.rs +0 -0
  50. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/placebo.rs +0 -0
  51. {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/tests/inference.rs +0 -0
  52. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/Cargo.toml +0 -0
  53. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/error.rs +0 -0
  54. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/cholesky.rs +0 -0
  55. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/eig_sym.rs +0 -0
  56. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/mod.rs +0 -0
  57. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/qr.rs +0 -0
  58. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/randomized.rs +0 -0
  59. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/svd.rs +0 -0
  60. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/svd_gram.rs +0 -0
  61. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/lib.rs +0 -0
  62. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/matrix.rs +0 -0
  63. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/matmul.rs +0 -0
  64. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/mod.rs +0 -0
  65. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/norms.rs +0 -0
  66. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/transform.rs +0 -0
  67. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/opt/mod.rs +0 -0
  68. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/opt/softthresh.rs +0 -0
  69. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/rng.rs +0 -0
  70. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/solve/lstsq.rs +0 -0
  71. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/solve/mod.rs +0 -0
  72. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/solve/spd.rs +0 -0
  73. {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/tests/numerics.rs +0 -0
  74. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/Cargo.toml +0 -0
  75. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/api_did.rs +0 -0
  76. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/api_geo.rs +0 -0
  77. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/convert.rs +0 -0
  78. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/lib.rs +0 -0
  79. {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/results.rs +0 -0
  80. {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/__init__.py +0 -0
  81. {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/estimators.py +0 -0
  82. {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/py.typed +0 -0
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
462
462
 
463
463
  [[package]]
464
464
  name = "panelkit-estimators"
465
- version = "0.2.3"
465
+ version = "0.2.5"
466
466
  dependencies = [
467
467
  "criterion",
468
468
  "panelkit-linalg",
@@ -471,7 +471,7 @@ dependencies = [
471
471
 
472
472
  [[package]]
473
473
  name = "panelkit-geo"
474
- version = "0.2.3"
474
+ version = "0.2.5"
475
475
  dependencies = [
476
476
  "panelkit-estimators",
477
477
  "panelkit-inference",
@@ -482,7 +482,7 @@ dependencies = [
482
482
 
483
483
  [[package]]
484
484
  name = "panelkit-inference"
485
- version = "0.2.3"
485
+ version = "0.2.5"
486
486
  dependencies = [
487
487
  "panelkit-estimators",
488
488
  "panelkit-linalg",
@@ -491,7 +491,7 @@ dependencies = [
491
491
 
492
492
  [[package]]
493
493
  name = "panelkit-linalg"
494
- version = "0.2.3"
494
+ version = "0.2.5"
495
495
  dependencies = [
496
496
  "proptest",
497
497
  "rayon",
@@ -623,7 +623,7 @@ dependencies = [
623
623
 
624
624
  [[package]]
625
625
  name = "pypanelkit"
626
- version = "0.2.3"
626
+ version = "0.2.5"
627
627
  dependencies = [
628
628
  "numpy",
629
629
  "panelkit-estimators",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/linalg", "crates/estimators", "crates/inference", "crates/geo", "crates/pypanelkit"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.2.3"
6
+ version = "0.2.5"
7
7
  edition = "2021"
8
8
  rust-version = "1.74"
9
9
  license = "MIT OR Apache-2.0"
@@ -300,10 +300,16 @@ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ C
300
300
  ev.lift, ev.cumulative, ev.significant
301
301
  ```
302
302
 
303
- Each estimate gets a confidence interval from a **stationary block bootstrap** of
304
- its post-period effect path; an **SC in-space placebo** supplies a p-value. The
305
- ensemble uses the same `weights` choices as `power()` (`"auto"` = inverse-variance
306
- from each method's bootstrap SE, `"equal"`, or an explicit dict/list). `ev` exposes
303
+ Inference is **in-space placebo** (Abadie): every donor market is refit as if it
304
+ were the treated one, and the spread of *their* post-period effects is the null
305
+ reference capturing out-of-sample extrapolation error, the real source of
306
+ uncertainty. (A bootstrap of the treated unit's own post-period only sees
307
+ in-sample noise and is wildly anti-conservative — on null data its 90% interval
308
+ falsely flags an effect ~50% of the time; the placebo version sits at/below the
309
+ nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
310
+ dropped, per Abadie. The p-value is the placebo rank of the treated effect, and
311
+ `"auto"` ensemble weights are inverse-variance from each method's placebo-null
312
+ spread. `ev` exposes
307
313
  `.lift`, `.att`, `.cumulative`, `.significant`, the per-method results in `ev.per`,
308
314
  and the ensemble in `ev.ensemble`. Reported numbers: **% lift** (effect ÷
309
315
  counterfactual), **per-period ATT**, and **cumulative incremental** over the
@@ -315,13 +321,13 @@ you can see it sits flat (centered on zero) inside the noise band before the tes
315
321
  starts (a placebo check) and breaks out after — and the running **cumulative
316
322
  incremental**, each as a point estimate with a confidence band. The counterfactual
317
323
  is centered on the pre-period, so the gap shows fit quality rather than a level
318
- offset (SDID matches trends, not levels). The bands come from a **moving-block
319
- bootstrap** of the pre-period residuals: resampling whole blocks preserves their
320
- autocorrelation, so the intervals are more conservative than an iid normal
321
- approximation the cumulative band in particular widens faster than √k when the
322
- residuals are positively autocorrelated. Raise `block_len` to capture longer-range
323
- dependence (wider, more conservative cumulative bands). Pass `exclude=[…]` to drop
324
- markets from the control pool (e.g. ones you don't trust as donors).
324
+ offset (SDID matches trends, not levels). The bands come from the **in-space
325
+ placebo** distribution: at each horizon, the pointwise band is the spread of the
326
+ donor placebos' per-period effects, and the cumulative band is the spread of their
327
+ cumulative sums (so it fans out with horizon). Placebo inference needs a decent
328
+ donor pool to have power with only a handful of comparable donors the intervals
329
+ are necessarily wide. Pass `exclude=[…]` to drop markets from the control pool
330
+ (e.g. ones you don't trust as donors).
325
331
 
326
332
  ### Choosing a specification — `design.recommend(test_lengths, n_geos_options, target_lift, alphas=…)`
327
333
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelkit
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Topic :: Scientific/Engineering
@@ -273,8 +273,8 @@ per-cell MDE/confidence/holdout report and a combined figure:
273
273
  **Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
274
274
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
275
275
  them into a weighted-average **ensemble** estimate, and report each one's lift,
276
- confidence interval (stationary block bootstrap), and cumulative incremental —
277
- with an SC in-space placebo p-value:
276
+ confidence interval (in-space placebo), and cumulative incremental —
277
+ with an in-space placebo p-value:
278
278
 
279
279
  ![test evaluation](assets/geo_evaluate.png)
280
280
 
@@ -316,7 +316,7 @@ What you get out of the box:
316
316
  - **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
317
317
  with auto inverse-variance weights) for a steadier estimate than any one method.
318
318
  - **Post-test evaluation** — `evaluate()` measures a test that already ran:
319
- per-method + ensemble lift, bootstrap CIs, cumulative incremental, and a p-value.
319
+ per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
320
320
 
321
321
  See [`examples/geo_demo.py`](examples/geo_demo.py).
322
322
 
@@ -243,8 +243,8 @@ per-cell MDE/confidence/holdout report and a combined figure:
243
243
  **Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
244
244
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
245
245
  them into a weighted-average **ensemble** estimate, and report each one's lift,
246
- confidence interval (stationary block bootstrap), and cumulative incremental —
247
- with an SC in-space placebo p-value:
246
+ confidence interval (in-space placebo), and cumulative incremental —
247
+ with an in-space placebo p-value:
248
248
 
249
249
  ![test evaluation](assets/geo_evaluate.png)
250
250
 
@@ -286,7 +286,7 @@ What you get out of the box:
286
286
  - **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
287
287
  with auto inverse-variance weights) for a steadier estimate than any one method.
288
288
  - **Post-test evaluation** — `evaluate()` measures a test that already ran:
289
- per-method + ensemble lift, bootstrap CIs, cumulative incremental, and a p-value.
289
+ per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
290
290
 
291
291
  See [`examples/geo_demo.py`](examples/geo_demo.py).
292
292
 
@@ -87,6 +87,10 @@ pub fn fit_at(panel: &Panel, t0: usize, cfg: SdidConfig) -> ScFit {
87
87
  let t = panel.n_periods();
88
88
  let t_pre = t0;
89
89
  let t_post = t - t0;
90
+ assert!(
91
+ t_pre >= 1 && t_post >= 1,
92
+ "SDID needs at least one pre- and one post-period (t0 in 1..n_periods)"
93
+ );
90
94
  let n_tr = treated.len();
91
95
 
92
96
  // Treated-average series.
@@ -30,17 +30,14 @@ pub fn project_simplex(v: &[f64]) -> Vec<f64> {
30
30
  let mut u = v.to_vec();
31
31
  u.sort_by(|a, b| b.partial_cmp(a).unwrap()); // descending
32
32
  let mut css = 0.0;
33
- let mut rho = 0usize;
34
33
  let mut theta = 0.0;
35
34
  for (j, &uj) in u.iter().enumerate() {
36
35
  css += uj;
37
36
  let t = (css - 1.0) / (j as f64 + 1.0);
38
37
  if uj - t > 0.0 {
39
- rho = j + 1;
40
38
  theta = t;
41
39
  }
42
40
  }
43
- let _ = rho;
44
41
  v.iter().map(|&vi| (vi - theta).max(0.0)).collect()
45
42
  }
46
43
 
@@ -111,13 +111,15 @@ pub fn fit_sdid(
111
111
  /// Fit Matrix-Completion NNM (Athey et al. 2021). `max_rank`, when set, uses a
112
112
  /// fast randomized truncated SVD inside SoftImpute (big speedup, low-rank cap).
113
113
  #[pyfunction]
114
- #[pyo3(signature = (y, treated, treat_time, lambda=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
114
+ // `lambda_` (not `lambda`) so it is usable as a Python keyword argument —
115
+ // `lambda` is a reserved word in Python.
116
+ #[pyo3(signature = (y, treated, treat_time, lambda_=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
115
117
  #[allow(clippy::too_many_arguments)]
116
118
  pub fn fit_mcnnm(
117
119
  y: PyReadonlyArray2<f64>,
118
120
  treated: Vec<usize>,
119
121
  treat_time: usize,
120
- lambda: Option<f64>,
122
+ lambda_: Option<f64>,
121
123
  max_iter: usize,
122
124
  tol: f64,
123
125
  seed: u64,
@@ -125,7 +127,7 @@ pub fn fit_mcnnm(
125
127
  ) -> PyResult<PyScResult> {
126
128
  let panel = Panel::block(mat_from_numpy(&y), &treated, treat_time);
127
129
  let cfg = McnnmConfig {
128
- lambda,
130
+ lambda: lambda_,
129
131
  max_iter,
130
132
  tol,
131
133
  seed,
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "panelkit"
7
- version = "0.2.3"
7
+ version = "0.2.5"
8
8
  description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -82,7 +82,7 @@ def fit_mcnnm(
82
82
  y: npt.NDArray[np.float64],
83
83
  treated: Sequence[int],
84
84
  treat_time: int,
85
- lambda_: Optional[float] = ...,
85
+ lambda_: Optional[float] = ..., # NOTE: matches the Rust binding's `lambda_`
86
86
  max_iter: int = ...,
87
87
  tol: float = ...,
88
88
  seed: int = ...,
@@ -42,7 +42,8 @@ def _ensemble_weight_arg(spec):
42
42
  raise ValueError(f"unknown ensemble_weights {spec!r} (use 'auto', 'equal', "
43
43
  "a dict, or a 3-list)")
44
44
  if isinstance(spec, dict):
45
- w = [float(spec.get(m, spec.get(m.lower(), 0.0))) for m in _ENSEMBLE_ORDER]
45
+ norm = {str(k).upper(): v for k, v in spec.items()} # case-insensitive keys
46
+ w = [float(norm.get(m, 0.0)) for m in _ENSEMBLE_ORDER]
46
47
  else:
47
48
  w = [float(x) for x in spec]
48
49
  if len(w) != 3:
@@ -52,26 +53,6 @@ def _ensemble_weight_arg(spec):
52
53
  return w
53
54
 
54
55
 
55
- def _placebo_paths(pre_gaps, length, block_len, n_reps, seed):
56
- """Moving-block bootstrap of the (centered) pre-period residuals into placebo
57
- paths of ``length`` periods. Resampling whole blocks preserves the residual
58
- autocorrelation, so the resulting CI bands are more conservative than an iid
59
- normal approximation. Returns an ``(n_reps, length)`` array (empty if no
60
- pre-period or zero length)."""
61
- g = np.asarray(pre_gaps, dtype=float)
62
- m = len(g)
63
- if m == 0 or length <= 0 or n_reps <= 0:
64
- return np.empty((0, max(length, 0)))
65
- g = g - g.mean() # null is "no effect" → center the residuals
66
- rng = np.random.default_rng(int(seed))
67
- bl = max(1, min(int(block_len), m))
68
- n_blocks = int(np.ceil(length / bl))
69
- starts = rng.integers(0, m, size=(n_reps, n_blocks))
70
- idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
71
- paths = g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
72
- return paths
73
-
74
-
75
56
  class _PowerReport:
76
57
  """Result of a power analysis across methods, with a report and plots."""
77
58
 
@@ -427,7 +408,7 @@ class GeoDesign:
427
408
  target_power=target_power, recommended=recommended,
428
409
  lookback=lookback, ensemble=ensemble,
429
410
  ensemble_weights=ensemble_weights)
430
- idx = self._resolve(treated)
411
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
431
412
  names = [self.names[i] for i in idx]
432
413
  lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
433
414
  if 0.0 not in lifts:
@@ -463,7 +444,7 @@ class GeoDesign:
463
444
  if bad:
464
445
  raise ValueError(f"treated markets were also excluded: {bad}")
465
446
  return sub.diagnose(tnames, test_len)
466
- idx = self._resolve(treated)
447
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
467
448
  names = [self.names[i] for i in idx]
468
449
  t0 = self.t - int(test_len)
469
450
  diag = _panelkit.geo_diagnostics(self.Y, idx, int(test_len))
@@ -701,8 +682,7 @@ class GeoDesign:
701
682
  methods: Sequence[str] = _METHODS,
702
683
  weights="auto",
703
684
  level: float = 0.90,
704
- n_boot: int = 2000,
705
- block_len: int = 4,
685
+ max_placebo: int = 200,
706
686
  seed: int = 0,
707
687
  exclude=None,
708
688
  ) -> "_EvalReport":
@@ -711,9 +691,15 @@ class GeoDesign:
711
691
  This is the measurement counterpart to :meth:`power`: given the treated
712
692
  markets and the period treatment began (``treat_start``, the first
713
693
  post-period column), it fits SC / ASC / SDID, reports each one's effect,
714
- and combines them into a weighted-average **ensemble** estimate. Each
715
- estimate gets a confidence interval from a stationary block bootstrap of
716
- its post-period effect path; an SC in-space placebo supplies a p-value.
694
+ and combines them into a weighted-average **ensemble** estimate.
695
+
696
+ Inference is **in-space placebo** (Abadie): every donor market is refit as
697
+ if it were the treated one, and the spread of *their* post-period effects
698
+ is the null reference. This captures out-of-sample extrapolation error —
699
+ the dominant source of uncertainty — so the intervals are calibrated
700
+ (unlike a bootstrap of the treated unit's own post-period, which only sees
701
+ in-sample noise and is far too narrow). Poorly-fit placebos (pre-period
702
+ RMSPE > 2× the treated unit's) are dropped, per Abadie.
717
703
 
718
704
  Parameters
719
705
  ----------
@@ -725,11 +711,13 @@ class GeoDesign:
725
711
  Which estimators to fit and blend.
726
712
  weights : "auto" | "equal" | dict
727
713
  Ensemble weighting. ``"auto"`` is inverse-variance (precision)
728
- weighting from each method's bootstrap standard error.
714
+ weighting from each method's placebo-null spread.
729
715
  level : float
730
716
  Confidence level for the intervals (e.g. 0.90).
731
- n_boot, block_len, seed :
732
- Stationary-bootstrap settings for the effect-path CIs.
717
+ max_placebo : int
718
+ Cap on the number of donor placebos used (sampled if exceeded).
719
+ seed : int
720
+ Seed for placebo sampling when ``max_placebo`` is exceeded.
733
721
 
734
722
  Returns
735
723
  -------
@@ -745,8 +733,8 @@ class GeoDesign:
745
733
  if bad:
746
734
  raise ValueError(f"treated markets were also excluded: {bad}")
747
735
  return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
748
- level=level, n_boot=n_boot, block_len=block_len, seed=seed)
749
- idx = self._resolve(treated)
736
+ level=level, max_placebo=max_placebo, seed=seed)
737
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
750
738
  names = [self.names[i] for i in idx]
751
739
  t0 = int(treat_start)
752
740
  if not (1 <= t0 < self.t):
@@ -757,27 +745,28 @@ class GeoDesign:
757
745
  if unknown:
758
746
  raise ValueError(f"unknown methods {unknown}; choose from {_METHODS}")
759
747
 
760
- fitters = {
761
- "SC": lambda: _panelkit.fit_sc(self.Y, idx, t0, 0.0, False, level),
762
- "ASC": lambda: _panelkit.fit_asc(self.Y, idx, t0, 0.0, None),
763
- "SDID": lambda: _panelkit.fit_sdid(self.Y, idx, t0, 1.0),
764
- }
748
+ def _fit(method, tr):
749
+ if method == "SC":
750
+ return _panelkit.fit_sc(self.Y, tr, t0, 0.0, False, level)
751
+ if method == "ASC":
752
+ return _panelkit.fit_asc(self.Y, tr, t0, 0.0, None)
753
+ return _panelkit.fit_sdid(self.Y, tr, t0, 1.0)
754
+
765
755
  treated_series = self.Y[idx].mean(axis=0)
756
+ post_len = self.t - t0
757
+ order = methods
758
+
759
+ # --- point estimates on the treated set ---
766
760
  per = {}
767
761
  for m in methods:
768
- fit = fitters[m]()
762
+ fit = _fit(m, idx)
769
763
  att_path = np.asarray(fit.att_path, dtype=float)
770
764
  cf = np.asarray(fit.counterfactual, dtype=float)
771
765
  att = float(fit.att)
772
766
  cf_mean = float(np.mean(cf)) if cf.size else float("nan")
773
- se, lo, hi = _panelkit.bootstrap_mean(
774
- att_path.tolist(), "stationary", int(block_len), int(n_boot),
775
- int(seed), float(level))
776
- # Full-timeline counterfactual via donor weights (exact for SC; the
777
- # dominant term for ASC/SDID). Center on the pre-period so the gap
778
- # reflects FIT, not a level offset — SDID is level-agnostic (matches
779
- # trends, not levels), so its donor-weighted series sits at a constant
780
- # offset that would otherwise look like a non-zero pre-period.
767
+ # Full-timeline counterfactual via donor weights, centered on the
768
+ # pre-period so the gap reflects FIT, not a level offset (SDID matches
769
+ # trends, not levels).
781
770
  dids = np.asarray(fit.donor_ids, dtype=int)
782
771
  ws = np.asarray(fit.weights, dtype=float)
783
772
  if dids.size:
@@ -787,25 +776,40 @@ class GeoDesign:
787
776
  full_cf = np.full(self.t, np.nan)
788
777
  per[m] = {
789
778
  "att": att, "att_path": att_path, "counterfactual": cf,
790
- "full_cf": full_cf,
791
- "cf_mean": cf_mean, "lift": att / cf_mean if cf_mean else float("nan"),
792
- "se": se, "att_lo": lo, "att_hi": hi,
793
- "lift_lo": lo / cf_mean if cf_mean else float("nan"),
794
- "lift_hi": hi / cf_mean if cf_mean else float("nan"),
779
+ "full_cf": full_cf, "cf_mean": cf_mean,
780
+ "lift": att / cf_mean if cf_mean else float("nan"),
795
781
  "cumulative": float(att_path.sum()) * n_treated,
796
782
  "pre_rmspe": float(fit.pre_rmspe),
797
783
  }
798
784
 
799
- # Ensemble: weight-average the post-period effect paths, then summarize.
800
- order = methods
785
+ # --- in-space placebo: refit each donor as if it were treated ---
786
+ treated_set = set(idx)
787
+ donors = [u for u in range(self.n) if u not in treated_set]
788
+ if len(donors) > int(max_placebo):
789
+ rng = np.random.default_rng(int(seed))
790
+ donors = sorted(int(j) for j in rng.choice(donors, int(max_placebo), replace=False))
791
+ pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
792
+ for j in donors:
793
+ for m in methods:
794
+ fj = _fit(m, [j])
795
+ pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
796
+
797
+ # --- ensemble weights ---
798
+ def _placebo_att_sd(m):
799
+ if not pb[m]:
800
+ return 1.0
801
+ vals = np.array([p.mean() for (p, _) in pb[m]])
802
+ return float(np.std(vals)) if len(vals) > 1 else 1.0
801
803
  if isinstance(weights, str) and weights.lower() == "equal":
802
804
  wv = [1.0 / len(order)] * len(order)
803
805
  elif isinstance(weights, str) and weights.lower() == "auto":
804
- prec = [1.0 / max(per[m]["se"] ** 2, 1e-300) for m in order]
806
+ # inverse-variance from each method's placebo-null spread (precision)
807
+ prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
805
808
  s = sum(prec)
806
809
  wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
807
810
  elif isinstance(weights, dict):
808
- raw = [float(weights.get(m, weights.get(m.lower(), 0.0))) for m in order]
811
+ norm = {str(k).upper(): v for k, v in weights.items()} # case-insensitive
812
+ raw = [float(norm.get(m, 0.0)) for m in order]
809
813
  s = sum(raw)
810
814
  if s <= 0:
811
815
  raise ValueError("ensemble weights must sum to > 0")
@@ -817,71 +821,103 @@ class GeoDesign:
817
821
  s = sum(raw)
818
822
  wv = [r / s for r in raw]
819
823
  wmap = dict(zip(order, wv))
824
+ a = (1.0 - float(level)) / 2.0
820
825
 
826
+ def _ci(point, null_samples):
827
+ """Pivot CI: point estimate ± the placebo null spread (null ≈ 0).
828
+ Returns NaN when there are too few placebos to form an interval —
829
+ never a fake zero-width CI."""
830
+ if len(null_samples) >= 2:
831
+ return point + float(np.quantile(null_samples, a)), \
832
+ point + float(np.quantile(null_samples, 1.0 - a))
833
+ return float("nan"), float("nan")
834
+
835
+ def _kept_att(samples, treated_pre_m):
836
+ """Placebo att-means after the Abadie 2x pre-fit filter (fallback to
837
+ all placebos if too few comparable ones survive)."""
838
+ keep = [p.mean() for (p, pre) in samples
839
+ if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
840
+ if len(keep) < 5 and samples:
841
+ keep = [p.mean() for (p, _) in samples]
842
+ return np.array(keep)
843
+
844
+ # --- per-method point CIs from each method's placebo att spread (same
845
+ # 2x pre-fit filter as the ensemble, for internal consistency) ---
846
+ for m in order:
847
+ mp = _kept_att(pb[m], per[m]["pre_rmspe"])
848
+ lo, hi = _ci(per[m]["att"], mp)
849
+ cfm = per[m]["cf_mean"]
850
+ per[m]["att_lo"], per[m]["att_hi"] = lo, hi
851
+ per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
852
+ per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
853
+
854
+ # --- ensemble estimate + ensemble placebo paths (Abadie pre-fit filter) ---
821
855
  ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
822
856
  ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
823
857
  ens_att = float(ens_path.mean())
824
- se, lo, hi = _panelkit.bootstrap_mean(
825
- ens_path.tolist(), "stationary", int(block_len), int(n_boot),
826
- int(seed), float(level))
858
+ treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
859
+
860
+ ens_pb = [] # (path, pre_rmspe)
861
+ for di in range(len(donors)):
862
+ path = sum(wmap[m] * pb[m][di][0] for m in order)
863
+ pre = sum(wmap[m] * pb[m][di][1] for m in order)
864
+ ens_pb.append((path, pre))
865
+ kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
866
+ if len(kept) < 5: # too few comparable placebos → use all
867
+ kept = [p for (p, _) in ens_pb]
868
+ pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
869
+ n_pb = pb_mat.shape[0]
870
+
871
+ # pointwise + cumulative + mean CIs, all from the placebo null
872
+ if n_pb >= 2:
873
+ point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
874
+ point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
875
+ point_hw = float(np.quantile(np.abs(pb_mat), float(level)))
876
+ cum_pb = np.cumsum(pb_mat, axis=1)
877
+ run = np.cumsum(ens_path)
878
+ cum_lo_band = np.quantile(cum_pb, a, axis=0)
879
+ cum_hi_band = np.quantile(cum_pb, 1.0 - a, axis=0)
880
+ pb_att = pb_mat.mean(axis=1)
881
+ p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
882
+ else:
883
+ # too few comparable placebos → inference undefined (no fake band)
884
+ run = np.cumsum(ens_path)
885
+ point_lo = np.full(post_len, np.nan)
886
+ point_hi = np.full(post_len, np.nan)
887
+ point_hw = 0.0
888
+ cum_lo_band = cum_hi_band = np.full(post_len, np.nan)
889
+ pb_att = np.array([])
890
+ p_value = None
891
+ att_lo, att_hi = _ci(ens_att, pb_att)
892
+
893
+ cum_curve = run * n_treated
827
894
  ensemble = {
828
- "att": ens_att, "att_path": ens_path, "se": se,
829
- "att_lo": lo, "att_hi": hi,
895
+ "att": ens_att, "att_path": ens_path,
896
+ "att_lo": att_lo, "att_hi": att_hi,
830
897
  "lift": ens_att / ens_cf_mean if ens_cf_mean else float("nan"),
831
- "lift_lo": lo / ens_cf_mean if ens_cf_mean else float("nan"),
832
- "lift_hi": hi / ens_cf_mean if ens_cf_mean else float("nan"),
898
+ "lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
899
+ "lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
833
900
  "cumulative": float(ens_path.sum()) * n_treated,
834
- "weights": wmap,
901
+ "weights": wmap, "n_placebo": n_pb,
902
+ "low_power": n_pb < 8, # too few placebos for reliable inference
835
903
  }
836
904
 
837
- # Significance: SC in-space placebo p-value.
838
- sc = _panelkit.fit_sc(self.Y, idx, t0, 0.0, True, level)
839
- p_value = sc.p_value
840
-
841
- # Full-timeline ensemble counterfactual + gap path (pre-period shows fit,
842
- # post-period uses the exact ensemble effect).
905
+ # full-timeline counterfactual + gap path (pre shows fit; post = effect)
843
906
  ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
844
907
  full_gap = treated_series - ens_full_cf
845
- full_gap[t0:] = ens_path # exact ensemble post effect
846
- counterfactual = treated_series - full_gap # consistent everywhere
847
- pre_gaps = full_gap[:t0]
848
- sigma_pre = float(np.std(pre_gaps, ddof=1)) if t0 > 1 else float(np.std(pre_gaps))
849
-
850
- # CI bands from a MOVING-BLOCK BOOTSTRAP of the pre-period residuals.
851
- # Blocks preserve autocorrelation, so the bands are more conservative than
852
- # an iid normal approximation — especially the cumulative band, whose
853
- # spread grows faster than sqrt(k) under positive autocorrelation.
854
- post_len = self.t - t0
855
- a = (1.0 - float(level)) / 2.0
856
- paths = _placebo_paths(pre_gaps, post_len, int(block_len), int(n_boot), int(seed))
857
- if paths.size:
858
- point_lo = np.quantile(paths, a, axis=0)
859
- point_hi = np.quantile(paths, 1.0 - a, axis=0)
860
- point_hw = float(np.quantile(np.abs(paths), float(level))) # symmetric, full-timeline
861
- cum_paths = np.cumsum(paths, axis=1)
862
- cum_band_lo = np.quantile(cum_paths, a, axis=0)
863
- cum_band_hi = np.quantile(cum_paths, 1.0 - a, axis=0)
864
- else:
865
- point_lo = point_hi = np.zeros(post_len)
866
- point_hw = 0.0
867
- cum_band_lo = cum_band_hi = np.zeros(post_len)
868
-
869
- ens_post = ens_path
870
- run = np.cumsum(ens_post)
871
- cum_curve = run * n_treated
872
- cum_lo_curve = (run + cum_band_lo) * n_treated
873
- cum_hi_curve = (run + cum_band_hi) * n_treated
874
-
875
- ensemble["sigma_pre"] = sigma_pre
908
+ full_gap[t0:] = ens_path
909
+ counterfactual = treated_series - full_gap
876
910
  ensemble["full_gap"] = full_gap
877
- ensemble["point_hw"] = point_hw # constant pointwise half-width
878
- ensemble["point_lo"] = ens_post + point_lo # per-period CI on the effect
879
- ensemble["point_hi"] = ens_post + point_hi
880
- ensemble["cum_curve"] = cum_curve # cumulative incremental path
881
- ensemble["cum_lo_curve"] = cum_lo_curve
882
- ensemble["cum_hi_curve"] = cum_hi_curve
883
- ensemble["cum_lo"] = float(cum_lo_curve[-1]) if post_len else float("nan")
884
- ensemble["cum_hi"] = float(cum_hi_curve[-1]) if post_len else float("nan")
911
+ ensemble["sigma_pre"] = (float(np.std(full_gap[:t0], ddof=1)) if t0 > 1
912
+ else float(np.std(full_gap[:t0])))
913
+ ensemble["point_hw"] = point_hw
914
+ ensemble["point_lo"] = point_lo
915
+ ensemble["point_hi"] = point_hi
916
+ ensemble["cum_curve"] = cum_curve
917
+ ensemble["cum_lo_curve"] = (run + cum_lo_band) * n_treated
918
+ ensemble["cum_hi_curve"] = (run + cum_hi_band) * n_treated
919
+ ensemble["cum_lo"] = float(ensemble["cum_lo_curve"][-1]) if post_len else float("nan")
920
+ ensemble["cum_hi"] = float(ensemble["cum_hi_curve"][-1]) if post_len else float("nan")
885
921
 
886
922
  return _EvalReport(names, t0, n_treated, per, ensemble, p_value, level,
887
923
  treated_series, counterfactual)
@@ -981,11 +1017,14 @@ class _MultiCellReport:
981
1017
  f"({', '.join(map(str, self.cells))})")
982
1018
  lines.append(f"Test duration : {self.test_len} periods")
983
1019
  lines.append(f"Shared donor pool : {len(self.donor_names)} markets")
984
- lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume")
1020
+ lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume "
1021
+ f"(all cells together)")
985
1022
  lines.append(f"Powered at {int(100*self.target_power)}% power, "
986
1023
  f"{int(100*(1-self.alpha))}% confidence "
987
1024
  f"(each cell vs. the shared pool).")
988
1025
  lines.append("")
1026
+ # Per-cell 'Holdout' is that cell's share of its OWN sub-panel (cell +
1027
+ # shared donors); the Combined holdout above is over the full panel.
989
1028
  lines.append(f"{'Cell':<14}{'Markets':<28}{'MDE':>8}{'Conf':>7}{'Holdout':>9}")
990
1029
  lines.append("-" * 64)
991
1030
  for label, rep in self.cells.items():
@@ -1050,8 +1089,11 @@ class _EvalReport:
1050
1089
 
1051
1090
  @property
1052
1091
  def significant(self):
1053
- """True if the ensemble CI excludes zero (effect detected)."""
1092
+ """True if the ensemble CI is well-defined and excludes zero. Returns
1093
+ False when inference is undefined (too few placebos → NaN interval)."""
1054
1094
  lo, hi = self.ensemble["att_lo"], self.ensemble["att_hi"]
1095
+ if not (np.isfinite(lo) and np.isfinite(hi)):
1096
+ return False
1055
1097
  return (lo > 0) or (hi < 0)
1056
1098
 
1057
1099
  def summary(self) -> str:
@@ -1073,17 +1115,25 @@ class _EvalReport:
1073
1115
  lines.append(f" ensemble weights: {wstr}")
1074
1116
  lines.append("")
1075
1117
  if self.p_value is not None:
1076
- lines.append(f"SC in-space placebo p-value : {self.p_value:.3f}")
1077
- verdict = ("✓ Significant lift — the ensemble interval excludes zero."
1078
- if self.significant else
1079
- "~ Not distinguishable from zero at this level the ensemble "
1080
- "interval includes zero.")
1118
+ lines.append(f"In-space placebo p-value : {self.p_value:.3f} "
1119
+ f"(ensemble, {e.get('n_placebo', 0)} donors)")
1120
+ if e.get("low_power"):
1121
+ lines.append(" Few comparable donors inference is low-powered; treat "
1122
+ "intervals/p-value with caution.")
1123
+ if self.significant:
1124
+ verdict = "✓ Significant lift — the ensemble interval excludes zero."
1125
+ elif not (np.isfinite(e["att_lo"]) and np.isfinite(e["att_hi"])):
1126
+ verdict = ("? Inference undefined — too few comparable donor placebos "
1127
+ "to form an interval.")
1128
+ else:
1129
+ verdict = ("~ Not distinguishable from zero at this level — the ensemble "
1130
+ "interval includes zero.")
1081
1131
  lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
1082
1132
  f"{e['cumulative']:,.0f} cumulative incremental")
1083
1133
  if "cum_lo" in e:
1084
1134
  lines.append(f"Cumulative {cl}% CI : "
1085
1135
  f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
1086
- f"(moving-block bootstrap, block_len-aware)")
1136
+ f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
1087
1137
  lines.append(verdict)
1088
1138
  lines.append("=" * 66)
1089
1139
  return "\n".join(lines)
@@ -1569,7 +1619,7 @@ def _plot_eval(rep: "_EvalReport", path):
1569
1619
  axc.set_title("Lift by method", fontweight="bold")
1570
1620
  axc.grid(True, axis="x", alpha=0.25)
1571
1621
 
1572
- pv = f" · SC placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
1622
+ pv = f" · placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
1573
1623
  verdict = "significant" if rep.significant else "not significant"
1574
1624
  fig.suptitle(f"panelkit · test evaluation — ensemble lift "
1575
1625
  f"{100*rep.ensemble['lift']:+.2f}% ({verdict}){pv}",
@@ -1582,10 +1632,10 @@ def _plot_eval(rep: "_EvalReport", path):
1582
1632
  def _plot_eval_timeline(rep: "_EvalReport", path):
1583
1633
  """Pointwise + cumulative effect over the full timeline, with CI bands.
1584
1634
 
1585
- Bands come from a moving-block bootstrap of the pre-period residuals (so they
1586
- capture autocorrelation): the pointwise band is the per-period placebo spread
1587
- around the estimate; the cumulative band grows with horizon as the bootstrap
1588
- placebo cumulative-sums spread out."""
1635
+ Bands come from the in-space placebo distribution (every donor refit as if
1636
+ treated): the pointwise band is the per-period placebo spread around the
1637
+ estimate; the cumulative band grows with horizon as the placebo
1638
+ cumulative-sums spread out."""
1589
1639
  _, plt = _require_mpl()
1590
1640
  import numpy as _np
1591
1641
  from matplotlib.gridspec import GridSpec
@@ -1632,7 +1682,7 @@ def _plot_eval_timeline(rep: "_EvalReport", path):
1632
1682
  cum = e["cum_curve"]
1633
1683
  axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
1634
1684
  axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
1635
- alpha=0.15, label=f"{cl}% band (block bootstrap)")
1685
+ alpha=0.15, label=f"{cl}% band (in-space placebo)")
1636
1686
  axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
1637
1687
  axc.axhline(0, color="#111827", lw=1.0)
1638
1688
  axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
File without changes
File without changes
File without changes
File without changes
File without changes