panelkit 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {panelkit-0.2.4 → panelkit-0.2.5}/Cargo.lock +5 -5
  2. {panelkit-0.2.4 → panelkit-0.2.5}/Cargo.toml +1 -1
  3. {panelkit-0.2.4 → panelkit-0.2.5}/PKG-INFO +3 -3
  4. {panelkit-0.2.4 → panelkit-0.2.5}/README.md +2 -2
  5. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/sc/sdid.rs +4 -0
  6. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/opt/simplex.rs +0 -3
  7. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/src/api_sc.rs +5 -3
  8. {panelkit-0.2.4 → panelkit-0.2.5}/pyproject.toml +1 -1
  9. {panelkit-0.2.4 → panelkit-0.2.5}/python/panelkit/_panelkit.pyi +1 -1
  10. {panelkit-0.2.4 → panelkit-0.2.5}/python/panelkit/design.py +52 -21
  11. {panelkit-0.2.4 → panelkit-0.2.5}/BENCHMARKS.md +0 -0
  12. {panelkit-0.2.4 → panelkit-0.2.5}/GUIDE.md +0 -0
  13. {panelkit-0.2.4 → panelkit-0.2.5}/LICENSE-APACHE +0 -0
  14. {panelkit-0.2.4 → panelkit-0.2.5}/LICENSE-MIT +0 -0
  15. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/Cargo.toml +0 -0
  16. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/benches/estimators.rs +0 -0
  17. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/did/bacon.rs +0 -0
  18. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/did/callaway.rs +0 -0
  19. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/did/mod.rs +0 -0
  20. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/did/sunab.rs +0 -0
  21. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/did/twfe.rs +0 -0
  22. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/fe/mod.rs +0 -0
  23. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/fe/within.rs +0 -0
  24. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/lib.rs +0 -0
  25. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/mcnnm/mod.rs +0 -0
  26. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
  27. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/panel.rs +0 -0
  28. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/result.rs +0 -0
  29. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/sc/augmented.rs +0 -0
  30. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/sc/cpasc.rs +0 -0
  31. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/sc/mod.rs +0 -0
  32. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/src/sc/synthetic.rs +0 -0
  33. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/tests/cpasc.rs +0 -0
  34. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/tests/did.rs +0 -0
  35. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/tests/sc.rs +0 -0
  36. {panelkit-0.2.4 → panelkit-0.2.5}/crates/estimators/tests/sc_family.rs +0 -0
  37. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/Cargo.toml +0 -0
  38. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/src/diagnostics.rs +0 -0
  39. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/src/lib.rs +0 -0
  40. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/src/power.rs +0 -0
  41. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/src/selection.rs +0 -0
  42. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/src/types.rs +0 -0
  43. {panelkit-0.2.4 → panelkit-0.2.5}/crates/geo/tests/geo.rs +0 -0
  44. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/Cargo.toml +0 -0
  45. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/src/batch.rs +0 -0
  46. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/src/bootstrap.rs +0 -0
  47. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/src/ci.rs +0 -0
  48. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/src/lib.rs +0 -0
  49. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/src/parallel.rs +0 -0
  50. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/src/placebo.rs +0 -0
  51. {panelkit-0.2.4 → panelkit-0.2.5}/crates/inference/tests/inference.rs +0 -0
  52. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/Cargo.toml +0 -0
  53. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/error.rs +0 -0
  54. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/cholesky.rs +0 -0
  55. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/eig_sym.rs +0 -0
  56. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/mod.rs +0 -0
  57. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/qr.rs +0 -0
  58. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/randomized.rs +0 -0
  59. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/svd.rs +0 -0
  60. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/factor/svd_gram.rs +0 -0
  61. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/lib.rs +0 -0
  62. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/matrix.rs +0 -0
  63. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/ops/matmul.rs +0 -0
  64. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/ops/mod.rs +0 -0
  65. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/ops/norms.rs +0 -0
  66. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/ops/transform.rs +0 -0
  67. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/opt/mod.rs +0 -0
  68. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/opt/softthresh.rs +0 -0
  69. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/rng.rs +0 -0
  70. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/solve/lstsq.rs +0 -0
  71. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/solve/mod.rs +0 -0
  72. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/src/solve/spd.rs +0 -0
  73. {panelkit-0.2.4 → panelkit-0.2.5}/crates/linalg/tests/numerics.rs +0 -0
  74. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/Cargo.toml +0 -0
  75. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/src/api_did.rs +0 -0
  76. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/src/api_geo.rs +0 -0
  77. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/src/convert.rs +0 -0
  78. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/src/lib.rs +0 -0
  79. {panelkit-0.2.4 → panelkit-0.2.5}/crates/pypanelkit/src/results.rs +0 -0
  80. {panelkit-0.2.4 → panelkit-0.2.5}/python/panelkit/__init__.py +0 -0
  81. {panelkit-0.2.4 → panelkit-0.2.5}/python/panelkit/estimators.py +0 -0
  82. {panelkit-0.2.4 → panelkit-0.2.5}/python/panelkit/py.typed +0 -0
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
462
462
 
463
463
  [[package]]
464
464
  name = "panelkit-estimators"
465
- version = "0.2.4"
465
+ version = "0.2.5"
466
466
  dependencies = [
467
467
  "criterion",
468
468
  "panelkit-linalg",
@@ -471,7 +471,7 @@ dependencies = [
471
471
 
472
472
  [[package]]
473
473
  name = "panelkit-geo"
474
- version = "0.2.4"
474
+ version = "0.2.5"
475
475
  dependencies = [
476
476
  "panelkit-estimators",
477
477
  "panelkit-inference",
@@ -482,7 +482,7 @@ dependencies = [
482
482
 
483
483
  [[package]]
484
484
  name = "panelkit-inference"
485
- version = "0.2.4"
485
+ version = "0.2.5"
486
486
  dependencies = [
487
487
  "panelkit-estimators",
488
488
  "panelkit-linalg",
@@ -491,7 +491,7 @@ dependencies = [
491
491
 
492
492
  [[package]]
493
493
  name = "panelkit-linalg"
494
- version = "0.2.4"
494
+ version = "0.2.5"
495
495
  dependencies = [
496
496
  "proptest",
497
497
  "rayon",
@@ -623,7 +623,7 @@ dependencies = [
623
623
 
624
624
  [[package]]
625
625
  name = "pypanelkit"
626
- version = "0.2.4"
626
+ version = "0.2.5"
627
627
  dependencies = [
628
628
  "numpy",
629
629
  "panelkit-estimators",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/linalg", "crates/estimators", "crates/inference", "crates/geo", "crates/pypanelkit"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.2.4"
6
+ version = "0.2.5"
7
7
  edition = "2021"
8
8
  rust-version = "1.74"
9
9
  license = "MIT OR Apache-2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelkit
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Topic :: Scientific/Engineering
@@ -274,7 +274,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
274
274
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
275
275
  them into a weighted-average **ensemble** estimate, and report each one's lift,
276
276
  confidence interval (in-space placebo), and cumulative incremental —
277
- with an SC in-space placebo p-value:
277
+ with an in-space placebo p-value:
278
278
 
279
279
  ![test evaluation](assets/geo_evaluate.png)
280
280
 
@@ -316,7 +316,7 @@ What you get out of the box:
316
316
  - **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
317
317
  with auto inverse-variance weights) for a steadier estimate than any one method.
318
318
  - **Post-test evaluation** — `evaluate()` measures a test that already ran:
319
- per-method + ensemble lift, bootstrap CIs, cumulative incremental, and a p-value.
319
+ per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
320
320
 
321
321
  See [`examples/geo_demo.py`](examples/geo_demo.py).
322
322
 
@@ -244,7 +244,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
244
244
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
245
245
  them into a weighted-average **ensemble** estimate, and report each one's lift,
246
246
  confidence interval (in-space placebo), and cumulative incremental —
247
- with an SC in-space placebo p-value:
247
+ with an in-space placebo p-value:
248
248
 
249
249
  ![test evaluation](assets/geo_evaluate.png)
250
250
 
@@ -286,7 +286,7 @@ What you get out of the box:
286
286
  - **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
287
287
  with auto inverse-variance weights) for a steadier estimate than any one method.
288
288
  - **Post-test evaluation** — `evaluate()` measures a test that already ran:
289
- per-method + ensemble lift, bootstrap CIs, cumulative incremental, and a p-value.
289
+ per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
290
290
 
291
291
  See [`examples/geo_demo.py`](examples/geo_demo.py).
292
292
 
@@ -87,6 +87,10 @@ pub fn fit_at(panel: &Panel, t0: usize, cfg: SdidConfig) -> ScFit {
87
87
  let t = panel.n_periods();
88
88
  let t_pre = t0;
89
89
  let t_post = t - t0;
90
+ assert!(
91
+ t_pre >= 1 && t_post >= 1,
92
+ "SDID needs at least one pre- and one post-period (t0 in 1..n_periods)"
93
+ );
90
94
  let n_tr = treated.len();
91
95
 
92
96
  // Treated-average series.
@@ -30,17 +30,14 @@ pub fn project_simplex(v: &[f64]) -> Vec<f64> {
30
30
  let mut u = v.to_vec();
31
31
  u.sort_by(|a, b| b.partial_cmp(a).unwrap()); // descending
32
32
  let mut css = 0.0;
33
- let mut rho = 0usize;
34
33
  let mut theta = 0.0;
35
34
  for (j, &uj) in u.iter().enumerate() {
36
35
  css += uj;
37
36
  let t = (css - 1.0) / (j as f64 + 1.0);
38
37
  if uj - t > 0.0 {
39
- rho = j + 1;
40
38
  theta = t;
41
39
  }
42
40
  }
43
- let _ = rho;
44
41
  v.iter().map(|&vi| (vi - theta).max(0.0)).collect()
45
42
  }
46
43
 
@@ -111,13 +111,15 @@ pub fn fit_sdid(
111
111
  /// Fit Matrix-Completion NNM (Athey et al. 2021). `max_rank`, when set, uses a
112
112
  /// fast randomized truncated SVD inside SoftImpute (big speedup, low-rank cap).
113
113
  #[pyfunction]
114
- #[pyo3(signature = (y, treated, treat_time, lambda=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
114
+ // `lambda_` (not `lambda`) so it is usable as a Python keyword argument —
115
+ // `lambda` is a reserved word in Python.
116
+ #[pyo3(signature = (y, treated, treat_time, lambda_=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
115
117
  #[allow(clippy::too_many_arguments)]
116
118
  pub fn fit_mcnnm(
117
119
  y: PyReadonlyArray2<f64>,
118
120
  treated: Vec<usize>,
119
121
  treat_time: usize,
120
- lambda: Option<f64>,
122
+ lambda_: Option<f64>,
121
123
  max_iter: usize,
122
124
  tol: f64,
123
125
  seed: u64,
@@ -125,7 +127,7 @@ pub fn fit_mcnnm(
125
127
  ) -> PyResult<PyScResult> {
126
128
  let panel = Panel::block(mat_from_numpy(&y), &treated, treat_time);
127
129
  let cfg = McnnmConfig {
128
- lambda,
130
+ lambda: lambda_,
129
131
  max_iter,
130
132
  tol,
131
133
  seed,
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "panelkit"
7
- version = "0.2.4"
7
+ version = "0.2.5"
8
8
  description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -82,7 +82,7 @@ def fit_mcnnm(
82
82
  y: npt.NDArray[np.float64],
83
83
  treated: Sequence[int],
84
84
  treat_time: int,
85
- lambda_: Optional[float] = ...,
85
+ lambda_: Optional[float] = ..., # NOTE: matches the Rust binding's `lambda_`
86
86
  max_iter: int = ...,
87
87
  tol: float = ...,
88
88
  seed: int = ...,
@@ -42,7 +42,8 @@ def _ensemble_weight_arg(spec):
42
42
  raise ValueError(f"unknown ensemble_weights {spec!r} (use 'auto', 'equal', "
43
43
  "a dict, or a 3-list)")
44
44
  if isinstance(spec, dict):
45
- w = [float(spec.get(m, spec.get(m.lower(), 0.0))) for m in _ENSEMBLE_ORDER]
45
+ norm = {str(k).upper(): v for k, v in spec.items()} # case-insensitive keys
46
+ w = [float(norm.get(m, 0.0)) for m in _ENSEMBLE_ORDER]
46
47
  else:
47
48
  w = [float(x) for x in spec]
48
49
  if len(w) != 3:
@@ -407,7 +408,7 @@ class GeoDesign:
407
408
  target_power=target_power, recommended=recommended,
408
409
  lookback=lookback, ensemble=ensemble,
409
410
  ensemble_weights=ensemble_weights)
410
- idx = self._resolve(treated)
411
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
411
412
  names = [self.names[i] for i in idx]
412
413
  lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
413
414
  if 0.0 not in lifts:
@@ -443,7 +444,7 @@ class GeoDesign:
443
444
  if bad:
444
445
  raise ValueError(f"treated markets were also excluded: {bad}")
445
446
  return sub.diagnose(tnames, test_len)
446
- idx = self._resolve(treated)
447
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
447
448
  names = [self.names[i] for i in idx]
448
449
  t0 = self.t - int(test_len)
449
450
  diag = _panelkit.geo_diagnostics(self.Y, idx, int(test_len))
@@ -733,7 +734,7 @@ class GeoDesign:
733
734
  raise ValueError(f"treated markets were also excluded: {bad}")
734
735
  return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
735
736
  level=level, max_placebo=max_placebo, seed=seed)
736
- idx = self._resolve(treated)
737
+ idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
737
738
  names = [self.names[i] for i in idx]
738
739
  t0 = int(treat_start)
739
740
  if not (1 <= t0 < self.t):
@@ -807,7 +808,8 @@ class GeoDesign:
807
808
  s = sum(prec)
808
809
  wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
809
810
  elif isinstance(weights, dict):
810
- raw = [float(weights.get(m, weights.get(m.lower(), 0.0))) for m in order]
811
+ norm = {str(k).upper(): v for k, v in weights.items()} # case-insensitive
812
+ raw = [float(norm.get(m, 0.0)) for m in order]
811
813
  s = sum(raw)
812
814
  if s <= 0:
813
815
  raise ValueError("ensemble weights must sum to > 0")
@@ -822,15 +824,27 @@ class GeoDesign:
822
824
  a = (1.0 - float(level)) / 2.0
823
825
 
824
826
  def _ci(point, null_samples):
825
- """Pivot CI: point estimate ± the placebo null spread (null ≈ 0)."""
827
+ """Pivot CI: point estimate ± the placebo null spread (null ≈ 0).
828
+ Returns NaN when there are too few placebos to form an interval —
829
+ never a fake zero-width CI."""
826
830
  if len(null_samples) >= 2:
827
831
  return point + float(np.quantile(null_samples, a)), \
828
832
  point + float(np.quantile(null_samples, 1.0 - a))
829
- return point, point
830
-
831
- # --- per-method point CIs from each method's placebo att spread ---
833
+ return float("nan"), float("nan")
834
+
835
+ def _kept_att(samples, treated_pre_m):
836
+ """Placebo att-means after the Abadie 2x pre-fit filter (fallback to
837
+ all placebos if too few comparable ones survive)."""
838
+ keep = [p.mean() for (p, pre) in samples
839
+ if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
840
+ if len(keep) < 5 and samples:
841
+ keep = [p.mean() for (p, _) in samples]
842
+ return np.array(keep)
843
+
844
+ # --- per-method point CIs from each method's placebo att spread (same
845
+ # 2x pre-fit filter as the ensemble, for internal consistency) ---
832
846
  for m in order:
833
- mp = np.array([p.mean() for (p, _) in pb[m]]) if pb[m] else np.array([])
847
+ mp = _kept_att(pb[m], per[m]["pre_rmspe"])
834
848
  lo, hi = _ci(per[m]["att"], mp)
835
849
  cfm = per[m]["cf_mean"]
836
850
  per[m]["att_lo"], per[m]["att_hi"] = lo, hi
@@ -866,10 +880,12 @@ class GeoDesign:
866
880
  pb_att = pb_mat.mean(axis=1)
867
881
  p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
868
882
  else:
869
- point_lo = point_hi = ens_path.copy()
870
- point_hw = 0.0
883
+ # too few comparable placebos → inference undefined (no fake band)
871
884
  run = np.cumsum(ens_path)
872
- cum_lo_band = cum_hi_band = np.zeros(post_len)
885
+ point_lo = np.full(post_len, np.nan)
886
+ point_hi = np.full(post_len, np.nan)
887
+ point_hw = 0.0
888
+ cum_lo_band = cum_hi_band = np.full(post_len, np.nan)
873
889
  pb_att = np.array([])
874
890
  p_value = None
875
891
  att_lo, att_hi = _ci(ens_att, pb_att)
@@ -883,6 +899,7 @@ class GeoDesign:
883
899
  "lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
884
900
  "cumulative": float(ens_path.sum()) * n_treated,
885
901
  "weights": wmap, "n_placebo": n_pb,
902
+ "low_power": n_pb < 8, # too few placebos for reliable inference
886
903
  }
887
904
 
888
905
  # full-timeline counterfactual + gap path (pre shows fit; post = effect)
@@ -1000,11 +1017,14 @@ class _MultiCellReport:
1000
1017
  f"({', '.join(map(str, self.cells))})")
1001
1018
  lines.append(f"Test duration : {self.test_len} periods")
1002
1019
  lines.append(f"Shared donor pool : {len(self.donor_names)} markets")
1003
- lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume")
1020
+ lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume "
1021
+ f"(all cells together)")
1004
1022
  lines.append(f"Powered at {int(100*self.target_power)}% power, "
1005
1023
  f"{int(100*(1-self.alpha))}% confidence "
1006
1024
  f"(each cell vs. the shared pool).")
1007
1025
  lines.append("")
1026
+ # Per-cell 'Holdout' is that cell's share of its OWN sub-panel (cell +
1027
+ # shared donors); the Combined holdout above is over the full panel.
1008
1028
  lines.append(f"{'Cell':<14}{'Markets':<28}{'MDE':>8}{'Conf':>7}{'Holdout':>9}")
1009
1029
  lines.append("-" * 64)
1010
1030
  for label, rep in self.cells.items():
@@ -1069,8 +1089,11 @@ class _EvalReport:
1069
1089
 
1070
1090
  @property
1071
1091
  def significant(self):
1072
- """True if the ensemble CI excludes zero (effect detected)."""
1092
+ """True if the ensemble CI is well-defined and excludes zero. Returns
1093
+ False when inference is undefined (too few placebos → NaN interval)."""
1073
1094
  lo, hi = self.ensemble["att_lo"], self.ensemble["att_hi"]
1095
+ if not (np.isfinite(lo) and np.isfinite(hi)):
1096
+ return False
1074
1097
  return (lo > 0) or (hi < 0)
1075
1098
 
1076
1099
  def summary(self) -> str:
@@ -1092,11 +1115,19 @@ class _EvalReport:
1092
1115
  lines.append(f" ensemble weights: {wstr}")
1093
1116
  lines.append("")
1094
1117
  if self.p_value is not None:
1095
- lines.append(f"SC in-space placebo p-value : {self.p_value:.3f}")
1096
- verdict = ("✓ Significant lift — the ensemble interval excludes zero."
1097
- if self.significant else
1098
- "~ Not distinguishable from zero at this level the ensemble "
1099
- "interval includes zero.")
1118
+ lines.append(f"In-space placebo p-value : {self.p_value:.3f} "
1119
+ f"(ensemble, {e.get('n_placebo', 0)} donors)")
1120
+ if e.get("low_power"):
1121
+ lines.append(" Few comparable donors inference is low-powered; treat "
1122
+ "intervals/p-value with caution.")
1123
+ if self.significant:
1124
+ verdict = "✓ Significant lift — the ensemble interval excludes zero."
1125
+ elif not (np.isfinite(e["att_lo"]) and np.isfinite(e["att_hi"])):
1126
+ verdict = ("? Inference undefined — too few comparable donor placebos "
1127
+ "to form an interval.")
1128
+ else:
1129
+ verdict = ("~ Not distinguishable from zero at this level — the ensemble "
1130
+ "interval includes zero.")
1100
1131
  lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
1101
1132
  f"{e['cumulative']:,.0f} cumulative incremental")
1102
1133
  if "cum_lo" in e:
@@ -1588,7 +1619,7 @@ def _plot_eval(rep: "_EvalReport", path):
1588
1619
  axc.set_title("Lift by method", fontweight="bold")
1589
1620
  axc.grid(True, axis="x", alpha=0.25)
1590
1621
 
1591
- pv = f" · SC placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
1622
+ pv = f" · placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
1592
1623
  verdict = "significant" if rep.significant else "not significant"
1593
1624
  fig.suptitle(f"panelkit · test evaluation — ensemble lift "
1594
1625
  f"{100*rep.ensemble['lift']:+.2f}% ({verdict}){pv}",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes