panelkit 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {panelkit-0.2.3 → panelkit-0.2.4}/Cargo.lock +5 -5
  2. {panelkit-0.2.3 → panelkit-0.2.4}/Cargo.toml +1 -1
  3. {panelkit-0.2.3 → panelkit-0.2.4}/GUIDE.md +17 -11
  4. {panelkit-0.2.3 → panelkit-0.2.4}/PKG-INFO +2 -2
  5. {panelkit-0.2.3 → panelkit-0.2.4}/README.md +1 -1
  6. {panelkit-0.2.3 → panelkit-0.2.4}/pyproject.toml +1 -1
  7. {panelkit-0.2.3 → panelkit-0.2.4}/python/panelkit/design.py +129 -110
  8. {panelkit-0.2.3 → panelkit-0.2.4}/BENCHMARKS.md +0 -0
  9. {panelkit-0.2.3 → panelkit-0.2.4}/LICENSE-APACHE +0 -0
  10. {panelkit-0.2.3 → panelkit-0.2.4}/LICENSE-MIT +0 -0
  11. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/Cargo.toml +0 -0
  12. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/benches/estimators.rs +0 -0
  13. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/did/bacon.rs +0 -0
  14. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/did/callaway.rs +0 -0
  15. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/did/mod.rs +0 -0
  16. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/did/sunab.rs +0 -0
  17. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/did/twfe.rs +0 -0
  18. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/fe/mod.rs +0 -0
  19. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/fe/within.rs +0 -0
  20. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/lib.rs +0 -0
  21. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/mcnnm/mod.rs +0 -0
  22. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
  23. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/panel.rs +0 -0
  24. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/result.rs +0 -0
  25. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/sc/augmented.rs +0 -0
  26. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/sc/cpasc.rs +0 -0
  27. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/sc/mod.rs +0 -0
  28. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/sc/sdid.rs +0 -0
  29. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/src/sc/synthetic.rs +0 -0
  30. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/tests/cpasc.rs +0 -0
  31. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/tests/did.rs +0 -0
  32. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/tests/sc.rs +0 -0
  33. {panelkit-0.2.3 → panelkit-0.2.4}/crates/estimators/tests/sc_family.rs +0 -0
  34. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/Cargo.toml +0 -0
  35. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/src/diagnostics.rs +0 -0
  36. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/src/lib.rs +0 -0
  37. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/src/power.rs +0 -0
  38. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/src/selection.rs +0 -0
  39. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/src/types.rs +0 -0
  40. {panelkit-0.2.3 → panelkit-0.2.4}/crates/geo/tests/geo.rs +0 -0
  41. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/Cargo.toml +0 -0
  42. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/src/batch.rs +0 -0
  43. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/src/bootstrap.rs +0 -0
  44. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/src/ci.rs +0 -0
  45. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/src/lib.rs +0 -0
  46. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/src/parallel.rs +0 -0
  47. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/src/placebo.rs +0 -0
  48. {panelkit-0.2.3 → panelkit-0.2.4}/crates/inference/tests/inference.rs +0 -0
  49. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/Cargo.toml +0 -0
  50. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/error.rs +0 -0
  51. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/cholesky.rs +0 -0
  52. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/eig_sym.rs +0 -0
  53. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/mod.rs +0 -0
  54. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/qr.rs +0 -0
  55. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/randomized.rs +0 -0
  56. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/svd.rs +0 -0
  57. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/factor/svd_gram.rs +0 -0
  58. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/lib.rs +0 -0
  59. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/matrix.rs +0 -0
  60. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/ops/matmul.rs +0 -0
  61. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/ops/mod.rs +0 -0
  62. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/ops/norms.rs +0 -0
  63. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/ops/transform.rs +0 -0
  64. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/opt/mod.rs +0 -0
  65. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/opt/simplex.rs +0 -0
  66. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/opt/softthresh.rs +0 -0
  67. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/rng.rs +0 -0
  68. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/solve/lstsq.rs +0 -0
  69. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/solve/mod.rs +0 -0
  70. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/src/solve/spd.rs +0 -0
  71. {panelkit-0.2.3 → panelkit-0.2.4}/crates/linalg/tests/numerics.rs +0 -0
  72. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/Cargo.toml +0 -0
  73. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/src/api_did.rs +0 -0
  74. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/src/api_geo.rs +0 -0
  75. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/src/api_sc.rs +0 -0
  76. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/src/convert.rs +0 -0
  77. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/src/lib.rs +0 -0
  78. {panelkit-0.2.3 → panelkit-0.2.4}/crates/pypanelkit/src/results.rs +0 -0
  79. {panelkit-0.2.3 → panelkit-0.2.4}/python/panelkit/__init__.py +0 -0
  80. {panelkit-0.2.3 → panelkit-0.2.4}/python/panelkit/_panelkit.pyi +0 -0
  81. {panelkit-0.2.3 → panelkit-0.2.4}/python/panelkit/estimators.py +0 -0
  82. {panelkit-0.2.3 → panelkit-0.2.4}/python/panelkit/py.typed +0 -0
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
462
462
 
463
463
  [[package]]
464
464
  name = "panelkit-estimators"
465
- version = "0.2.3"
465
+ version = "0.2.4"
466
466
  dependencies = [
467
467
  "criterion",
468
468
  "panelkit-linalg",
@@ -471,7 +471,7 @@ dependencies = [
471
471
 
472
472
  [[package]]
473
473
  name = "panelkit-geo"
474
- version = "0.2.3"
474
+ version = "0.2.4"
475
475
  dependencies = [
476
476
  "panelkit-estimators",
477
477
  "panelkit-inference",
@@ -482,7 +482,7 @@ dependencies = [
482
482
 
483
483
  [[package]]
484
484
  name = "panelkit-inference"
485
- version = "0.2.3"
485
+ version = "0.2.4"
486
486
  dependencies = [
487
487
  "panelkit-estimators",
488
488
  "panelkit-linalg",
@@ -491,7 +491,7 @@ dependencies = [
491
491
 
492
492
  [[package]]
493
493
  name = "panelkit-linalg"
494
- version = "0.2.3"
494
+ version = "0.2.4"
495
495
  dependencies = [
496
496
  "proptest",
497
497
  "rayon",
@@ -623,7 +623,7 @@ dependencies = [
623
623
 
624
624
  [[package]]
625
625
  name = "pypanelkit"
626
- version = "0.2.3"
626
+ version = "0.2.4"
627
627
  dependencies = [
628
628
  "numpy",
629
629
  "panelkit-estimators",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/linalg", "crates/estimators", "crates/inference", "crates/geo", "crates/pypanelkit"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.2.3"
6
+ version = "0.2.4"
7
7
  edition = "2021"
8
8
  rust-version = "1.74"
9
9
  license = "MIT OR Apache-2.0"
@@ -300,10 +300,16 @@ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ C
300
300
  ev.lift, ev.cumulative, ev.significant
301
301
  ```
302
302
 
303
- Each estimate gets a confidence interval from a **stationary block bootstrap** of
304
- its post-period effect path; an **SC in-space placebo** supplies a p-value. The
305
- ensemble uses the same `weights` choices as `power()` (`"auto"` = inverse-variance
306
- from each method's bootstrap SE, `"equal"`, or an explicit dict/list). `ev` exposes
303
+ Inference is **in-space placebo** (Abadie): every donor market is refit as if it
304
+ were the treated one, and the spread of *their* post-period effects is the null
305
+ reference capturing out-of-sample extrapolation error, the real source of
306
+ uncertainty. (A bootstrap of the treated unit's own post-period only sees
307
+ in-sample noise and is wildly anti-conservative — on null data its 90% interval
308
+ falsely flags an effect ~50% of the time; the placebo version sits at/below the
309
+ nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
310
+ dropped, per Abadie. The p-value is the placebo rank of the treated effect, and
311
+ `"auto"` ensemble weights are inverse-variance from each method's placebo-null
312
+ spread. `ev` exposes
307
313
  `.lift`, `.att`, `.cumulative`, `.significant`, the per-method results in `ev.per`,
308
314
  and the ensemble in `ev.ensemble`. Reported numbers: **% lift** (effect ÷
309
315
  counterfactual), **per-period ATT**, and **cumulative incremental** over the
@@ -315,13 +321,13 @@ you can see it sits flat (centered on zero) inside the noise band before the tes
315
321
  starts (a placebo check) and breaks out after — and the running **cumulative
316
322
  incremental**, each as a point estimate with a confidence band. The counterfactual
317
323
  is centered on the pre-period, so the gap shows fit quality rather than a level
318
- offset (SDID matches trends, not levels). The bands come from a **moving-block
319
- bootstrap** of the pre-period residuals: resampling whole blocks preserves their
320
- autocorrelation, so the intervals are more conservative than an iid normal
321
- approximation the cumulative band in particular widens faster than √k when the
322
- residuals are positively autocorrelated. Raise `block_len` to capture longer-range
323
- dependence (wider, more conservative cumulative bands). Pass `exclude=[…]` to drop
324
- markets from the control pool (e.g. ones you don't trust as donors).
324
+ offset (SDID matches trends, not levels). The bands come from the **in-space
325
+ placebo** distribution: at each horizon, the pointwise band is the spread of the
326
+ donor placebos' per-period effects, and the cumulative band is the spread of their
327
+ cumulative sums (so it fans out with horizon). Placebo inference needs a decent
328
+ donor pool to have power with only a handful of comparable donors the intervals
329
+ are necessarily wide. Pass `exclude=[…]` to drop markets from the control pool
330
+ (e.g. ones you don't trust as donors).
325
331
 
326
332
  ### Choosing a specification — `design.recommend(test_lengths, n_geos_options, target_lift, alphas=…)`
327
333
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelkit
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Topic :: Scientific/Engineering
@@ -273,7 +273,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
273
273
  **Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
274
274
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
275
275
  them into a weighted-average **ensemble** estimate, and report each one's lift,
276
- confidence interval (stationary block bootstrap), and cumulative incremental —
276
+ confidence interval (in-space placebo), and cumulative incremental —
277
277
  with an SC in-space placebo p-value:
278
278
 
279
279
  ![test evaluation](assets/geo_evaluate.png)
@@ -243,7 +243,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
243
243
  **Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
244
244
  the power analysis: fit SC / ASC / SDID on a test that already happened, blend
245
245
  them into a weighted-average **ensemble** estimate, and report each one's lift,
246
- confidence interval (stationary block bootstrap), and cumulative incremental —
246
+ confidence interval (in-space placebo), and cumulative incremental —
247
247
  with an SC in-space placebo p-value:
248
248
 
249
249
  ![test evaluation](assets/geo_evaluate.png)
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "panelkit"
7
- version = "0.2.3"
7
+ version = "0.2.4"
8
8
  description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -52,26 +52,6 @@ def _ensemble_weight_arg(spec):
52
52
  return w
53
53
 
54
54
 
55
- def _placebo_paths(pre_gaps, length, block_len, n_reps, seed):
56
- """Moving-block bootstrap of the (centered) pre-period residuals into placebo
57
- paths of ``length`` periods. Resampling whole blocks preserves the residual
58
- autocorrelation, so the resulting CI bands are more conservative than an iid
59
- normal approximation. Returns an ``(n_reps, length)`` array (empty if no
60
- pre-period or zero length)."""
61
- g = np.asarray(pre_gaps, dtype=float)
62
- m = len(g)
63
- if m == 0 or length <= 0 or n_reps <= 0:
64
- return np.empty((0, max(length, 0)))
65
- g = g - g.mean() # null is "no effect" → center the residuals
66
- rng = np.random.default_rng(int(seed))
67
- bl = max(1, min(int(block_len), m))
68
- n_blocks = int(np.ceil(length / bl))
69
- starts = rng.integers(0, m, size=(n_reps, n_blocks))
70
- idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
71
- paths = g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
72
- return paths
73
-
74
-
75
55
  class _PowerReport:
76
56
  """Result of a power analysis across methods, with a report and plots."""
77
57
 
@@ -701,8 +681,7 @@ class GeoDesign:
701
681
  methods: Sequence[str] = _METHODS,
702
682
  weights="auto",
703
683
  level: float = 0.90,
704
- n_boot: int = 2000,
705
- block_len: int = 4,
684
+ max_placebo: int = 200,
706
685
  seed: int = 0,
707
686
  exclude=None,
708
687
  ) -> "_EvalReport":
@@ -711,9 +690,15 @@ class GeoDesign:
711
690
  This is the measurement counterpart to :meth:`power`: given the treated
712
691
  markets and the period treatment began (``treat_start``, the first
713
692
  post-period column), it fits SC / ASC / SDID, reports each one's effect,
714
- and combines them into a weighted-average **ensemble** estimate. Each
715
- estimate gets a confidence interval from a stationary block bootstrap of
716
- its post-period effect path; an SC in-space placebo supplies a p-value.
693
+ and combines them into a weighted-average **ensemble** estimate.
694
+
695
+ Inference is **in-space placebo** (Abadie): every donor market is refit as
696
+ if it were the treated one, and the spread of *their* post-period effects
697
+ is the null reference. This captures out-of-sample extrapolation error —
698
+ the dominant source of uncertainty — so the intervals are calibrated
699
+ (unlike a bootstrap of the treated unit's own post-period, which only sees
700
+ in-sample noise and is far too narrow). Poorly-fit placebos (pre-period
701
+ RMSPE > 2× the treated unit's) are dropped, per Abadie.
717
702
 
718
703
  Parameters
719
704
  ----------
@@ -725,11 +710,13 @@ class GeoDesign:
725
710
  Which estimators to fit and blend.
726
711
  weights : "auto" | "equal" | dict
727
712
  Ensemble weighting. ``"auto"`` is inverse-variance (precision)
728
- weighting from each method's bootstrap standard error.
713
+ weighting from each method's placebo-null spread.
729
714
  level : float
730
715
  Confidence level for the intervals (e.g. 0.90).
731
- n_boot, block_len, seed :
732
- Stationary-bootstrap settings for the effect-path CIs.
716
+ max_placebo : int
717
+ Cap on the number of donor placebos used (sampled if exceeded).
718
+ seed : int
719
+ Seed for placebo sampling when ``max_placebo`` is exceeded.
733
720
 
734
721
  Returns
735
722
  -------
@@ -745,7 +732,7 @@ class GeoDesign:
745
732
  if bad:
746
733
  raise ValueError(f"treated markets were also excluded: {bad}")
747
734
  return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
748
- level=level, n_boot=n_boot, block_len=block_len, seed=seed)
735
+ level=level, max_placebo=max_placebo, seed=seed)
749
736
  idx = self._resolve(treated)
750
737
  names = [self.names[i] for i in idx]
751
738
  t0 = int(treat_start)
@@ -757,27 +744,28 @@ class GeoDesign:
757
744
  if unknown:
758
745
  raise ValueError(f"unknown methods {unknown}; choose from {_METHODS}")
759
746
 
760
- fitters = {
761
- "SC": lambda: _panelkit.fit_sc(self.Y, idx, t0, 0.0, False, level),
762
- "ASC": lambda: _panelkit.fit_asc(self.Y, idx, t0, 0.0, None),
763
- "SDID": lambda: _panelkit.fit_sdid(self.Y, idx, t0, 1.0),
764
- }
747
+ def _fit(method, tr):
748
+ if method == "SC":
749
+ return _panelkit.fit_sc(self.Y, tr, t0, 0.0, False, level)
750
+ if method == "ASC":
751
+ return _panelkit.fit_asc(self.Y, tr, t0, 0.0, None)
752
+ return _panelkit.fit_sdid(self.Y, tr, t0, 1.0)
753
+
765
754
  treated_series = self.Y[idx].mean(axis=0)
755
+ post_len = self.t - t0
756
+ order = methods
757
+
758
+ # --- point estimates on the treated set ---
766
759
  per = {}
767
760
  for m in methods:
768
- fit = fitters[m]()
761
+ fit = _fit(m, idx)
769
762
  att_path = np.asarray(fit.att_path, dtype=float)
770
763
  cf = np.asarray(fit.counterfactual, dtype=float)
771
764
  att = float(fit.att)
772
765
  cf_mean = float(np.mean(cf)) if cf.size else float("nan")
773
- se, lo, hi = _panelkit.bootstrap_mean(
774
- att_path.tolist(), "stationary", int(block_len), int(n_boot),
775
- int(seed), float(level))
776
- # Full-timeline counterfactual via donor weights (exact for SC; the
777
- # dominant term for ASC/SDID). Center on the pre-period so the gap
778
- # reflects FIT, not a level offset — SDID is level-agnostic (matches
779
- # trends, not levels), so its donor-weighted series sits at a constant
780
- # offset that would otherwise look like a non-zero pre-period.
766
+ # Full-timeline counterfactual via donor weights, centered on the
767
+ # pre-period so the gap reflects FIT, not a level offset (SDID matches
768
+ # trends, not levels).
781
769
  dids = np.asarray(fit.donor_ids, dtype=int)
782
770
  ws = np.asarray(fit.weights, dtype=float)
783
771
  if dids.size:
@@ -787,21 +775,35 @@ class GeoDesign:
787
775
  full_cf = np.full(self.t, np.nan)
788
776
  per[m] = {
789
777
  "att": att, "att_path": att_path, "counterfactual": cf,
790
- "full_cf": full_cf,
791
- "cf_mean": cf_mean, "lift": att / cf_mean if cf_mean else float("nan"),
792
- "se": se, "att_lo": lo, "att_hi": hi,
793
- "lift_lo": lo / cf_mean if cf_mean else float("nan"),
794
- "lift_hi": hi / cf_mean if cf_mean else float("nan"),
778
+ "full_cf": full_cf, "cf_mean": cf_mean,
779
+ "lift": att / cf_mean if cf_mean else float("nan"),
795
780
  "cumulative": float(att_path.sum()) * n_treated,
796
781
  "pre_rmspe": float(fit.pre_rmspe),
797
782
  }
798
783
 
799
- # Ensemble: weight-average the post-period effect paths, then summarize.
800
- order = methods
784
+ # --- in-space placebo: refit each donor as if it were treated ---
785
+ treated_set = set(idx)
786
+ donors = [u for u in range(self.n) if u not in treated_set]
787
+ if len(donors) > int(max_placebo):
788
+ rng = np.random.default_rng(int(seed))
789
+ donors = sorted(int(j) for j in rng.choice(donors, int(max_placebo), replace=False))
790
+ pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
791
+ for j in donors:
792
+ for m in methods:
793
+ fj = _fit(m, [j])
794
+ pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
795
+
796
+ # --- ensemble weights ---
797
+ def _placebo_att_sd(m):
798
+ if not pb[m]:
799
+ return 1.0
800
+ vals = np.array([p.mean() for (p, _) in pb[m]])
801
+ return float(np.std(vals)) if len(vals) > 1 else 1.0
801
802
  if isinstance(weights, str) and weights.lower() == "equal":
802
803
  wv = [1.0 / len(order)] * len(order)
803
804
  elif isinstance(weights, str) and weights.lower() == "auto":
804
- prec = [1.0 / max(per[m]["se"] ** 2, 1e-300) for m in order]
805
+ # inverse-variance from each method's placebo-null spread (precision)
806
+ prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
805
807
  s = sum(prec)
806
808
  wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
807
809
  elif isinstance(weights, dict):
@@ -817,71 +819,88 @@ class GeoDesign:
817
819
  s = sum(raw)
818
820
  wv = [r / s for r in raw]
819
821
  wmap = dict(zip(order, wv))
822
+ a = (1.0 - float(level)) / 2.0
820
823
 
824
+ def _ci(point, null_samples):
825
+ """Pivot CI: point estimate ± the placebo null spread (null ≈ 0)."""
826
+ if len(null_samples) >= 2:
827
+ return point + float(np.quantile(null_samples, a)), \
828
+ point + float(np.quantile(null_samples, 1.0 - a))
829
+ return point, point
830
+
831
+ # --- per-method point CIs from each method's placebo att spread ---
832
+ for m in order:
833
+ mp = np.array([p.mean() for (p, _) in pb[m]]) if pb[m] else np.array([])
834
+ lo, hi = _ci(per[m]["att"], mp)
835
+ cfm = per[m]["cf_mean"]
836
+ per[m]["att_lo"], per[m]["att_hi"] = lo, hi
837
+ per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
838
+ per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
839
+
840
+ # --- ensemble estimate + ensemble placebo paths (Abadie pre-fit filter) ---
821
841
  ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
822
842
  ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
823
843
  ens_att = float(ens_path.mean())
824
- se, lo, hi = _panelkit.bootstrap_mean(
825
- ens_path.tolist(), "stationary", int(block_len), int(n_boot),
826
- int(seed), float(level))
844
+ treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
845
+
846
+ ens_pb = [] # (path, pre_rmspe)
847
+ for di in range(len(donors)):
848
+ path = sum(wmap[m] * pb[m][di][0] for m in order)
849
+ pre = sum(wmap[m] * pb[m][di][1] for m in order)
850
+ ens_pb.append((path, pre))
851
+ kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
852
+ if len(kept) < 5: # too few comparable placebos → use all
853
+ kept = [p for (p, _) in ens_pb]
854
+ pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
855
+ n_pb = pb_mat.shape[0]
856
+
857
+ # pointwise + cumulative + mean CIs, all from the placebo null
858
+ if n_pb >= 2:
859
+ point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
860
+ point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
861
+ point_hw = float(np.quantile(np.abs(pb_mat), float(level)))
862
+ cum_pb = np.cumsum(pb_mat, axis=1)
863
+ run = np.cumsum(ens_path)
864
+ cum_lo_band = np.quantile(cum_pb, a, axis=0)
865
+ cum_hi_band = np.quantile(cum_pb, 1.0 - a, axis=0)
866
+ pb_att = pb_mat.mean(axis=1)
867
+ p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
868
+ else:
869
+ point_lo = point_hi = ens_path.copy()
870
+ point_hw = 0.0
871
+ run = np.cumsum(ens_path)
872
+ cum_lo_band = cum_hi_band = np.zeros(post_len)
873
+ pb_att = np.array([])
874
+ p_value = None
875
+ att_lo, att_hi = _ci(ens_att, pb_att)
876
+
877
+ cum_curve = run * n_treated
827
878
  ensemble = {
828
- "att": ens_att, "att_path": ens_path, "se": se,
829
- "att_lo": lo, "att_hi": hi,
879
+ "att": ens_att, "att_path": ens_path,
880
+ "att_lo": att_lo, "att_hi": att_hi,
830
881
  "lift": ens_att / ens_cf_mean if ens_cf_mean else float("nan"),
831
- "lift_lo": lo / ens_cf_mean if ens_cf_mean else float("nan"),
832
- "lift_hi": hi / ens_cf_mean if ens_cf_mean else float("nan"),
882
+ "lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
883
+ "lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
833
884
  "cumulative": float(ens_path.sum()) * n_treated,
834
- "weights": wmap,
885
+ "weights": wmap, "n_placebo": n_pb,
835
886
  }
836
887
 
837
- # Significance: SC in-space placebo p-value.
838
- sc = _panelkit.fit_sc(self.Y, idx, t0, 0.0, True, level)
839
- p_value = sc.p_value
840
-
841
- # Full-timeline ensemble counterfactual + gap path (pre-period shows fit,
842
- # post-period uses the exact ensemble effect).
888
+ # full-timeline counterfactual + gap path (pre shows fit; post = effect)
843
889
  ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
844
890
  full_gap = treated_series - ens_full_cf
845
- full_gap[t0:] = ens_path # exact ensemble post effect
846
- counterfactual = treated_series - full_gap # consistent everywhere
847
- pre_gaps = full_gap[:t0]
848
- sigma_pre = float(np.std(pre_gaps, ddof=1)) if t0 > 1 else float(np.std(pre_gaps))
849
-
850
- # CI bands from a MOVING-BLOCK BOOTSTRAP of the pre-period residuals.
851
- # Blocks preserve autocorrelation, so the bands are more conservative than
852
- # an iid normal approximation — especially the cumulative band, whose
853
- # spread grows faster than sqrt(k) under positive autocorrelation.
854
- post_len = self.t - t0
855
- a = (1.0 - float(level)) / 2.0
856
- paths = _placebo_paths(pre_gaps, post_len, int(block_len), int(n_boot), int(seed))
857
- if paths.size:
858
- point_lo = np.quantile(paths, a, axis=0)
859
- point_hi = np.quantile(paths, 1.0 - a, axis=0)
860
- point_hw = float(np.quantile(np.abs(paths), float(level))) # symmetric, full-timeline
861
- cum_paths = np.cumsum(paths, axis=1)
862
- cum_band_lo = np.quantile(cum_paths, a, axis=0)
863
- cum_band_hi = np.quantile(cum_paths, 1.0 - a, axis=0)
864
- else:
865
- point_lo = point_hi = np.zeros(post_len)
866
- point_hw = 0.0
867
- cum_band_lo = cum_band_hi = np.zeros(post_len)
868
-
869
- ens_post = ens_path
870
- run = np.cumsum(ens_post)
871
- cum_curve = run * n_treated
872
- cum_lo_curve = (run + cum_band_lo) * n_treated
873
- cum_hi_curve = (run + cum_band_hi) * n_treated
874
-
875
- ensemble["sigma_pre"] = sigma_pre
891
+ full_gap[t0:] = ens_path
892
+ counterfactual = treated_series - full_gap
876
893
  ensemble["full_gap"] = full_gap
877
- ensemble["point_hw"] = point_hw # constant pointwise half-width
878
- ensemble["point_lo"] = ens_post + point_lo # per-period CI on the effect
879
- ensemble["point_hi"] = ens_post + point_hi
880
- ensemble["cum_curve"] = cum_curve # cumulative incremental path
881
- ensemble["cum_lo_curve"] = cum_lo_curve
882
- ensemble["cum_hi_curve"] = cum_hi_curve
883
- ensemble["cum_lo"] = float(cum_lo_curve[-1]) if post_len else float("nan")
884
- ensemble["cum_hi"] = float(cum_hi_curve[-1]) if post_len else float("nan")
894
+ ensemble["sigma_pre"] = (float(np.std(full_gap[:t0], ddof=1)) if t0 > 1
895
+ else float(np.std(full_gap[:t0])))
896
+ ensemble["point_hw"] = point_hw
897
+ ensemble["point_lo"] = point_lo
898
+ ensemble["point_hi"] = point_hi
899
+ ensemble["cum_curve"] = cum_curve
900
+ ensemble["cum_lo_curve"] = (run + cum_lo_band) * n_treated
901
+ ensemble["cum_hi_curve"] = (run + cum_hi_band) * n_treated
902
+ ensemble["cum_lo"] = float(ensemble["cum_lo_curve"][-1]) if post_len else float("nan")
903
+ ensemble["cum_hi"] = float(ensemble["cum_hi_curve"][-1]) if post_len else float("nan")
885
904
 
886
905
  return _EvalReport(names, t0, n_treated, per, ensemble, p_value, level,
887
906
  treated_series, counterfactual)
@@ -1083,7 +1102,7 @@ class _EvalReport:
1083
1102
  if "cum_lo" in e:
1084
1103
  lines.append(f"Cumulative {cl}% CI : "
1085
1104
  f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
1086
- f"(moving-block bootstrap, block_len-aware)")
1105
+ f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
1087
1106
  lines.append(verdict)
1088
1107
  lines.append("=" * 66)
1089
1108
  return "\n".join(lines)
@@ -1582,10 +1601,10 @@ def _plot_eval(rep: "_EvalReport", path):
1582
1601
  def _plot_eval_timeline(rep: "_EvalReport", path):
1583
1602
  """Pointwise + cumulative effect over the full timeline, with CI bands.
1584
1603
 
1585
- Bands come from a moving-block bootstrap of the pre-period residuals (so they
1586
- capture autocorrelation): the pointwise band is the per-period placebo spread
1587
- around the estimate; the cumulative band grows with horizon as the bootstrap
1588
- placebo cumulative-sums spread out."""
1604
+ Bands come from the in-space placebo distribution (every donor refit as if
1605
+ treated): the pointwise band is the per-period placebo spread around the
1606
+ estimate; the cumulative band grows with horizon as the placebo
1607
+ cumulative-sums spread out."""
1589
1608
  _, plt = _require_mpl()
1590
1609
  import numpy as _np
1591
1610
  from matplotlib.gridspec import GridSpec
@@ -1632,7 +1651,7 @@ def _plot_eval_timeline(rep: "_EvalReport", path):
1632
1651
  cum = e["cum_curve"]
1633
1652
  axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
1634
1653
  axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
1635
- alpha=0.15, label=f"{cl}% band (block bootstrap)")
1654
+ alpha=0.15, label=f"{cl}% band (in-space placebo)")
1636
1655
  axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
1637
1656
  axc.axhline(0, color="#111827", lw=1.0)
1638
1657
  axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
File without changes
File without changes
File without changes
File without changes
File without changes