panelkit 0.2.3__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelkit-0.2.3 → panelkit-0.2.5}/Cargo.lock +5 -5
- {panelkit-0.2.3 → panelkit-0.2.5}/Cargo.toml +1 -1
- {panelkit-0.2.3 → panelkit-0.2.5}/GUIDE.md +17 -11
- {panelkit-0.2.3 → panelkit-0.2.5}/PKG-INFO +4 -4
- {panelkit-0.2.3 → panelkit-0.2.5}/README.md +3 -3
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/sdid.rs +4 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/opt/simplex.rs +0 -3
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/api_sc.rs +5 -3
- {panelkit-0.2.3 → panelkit-0.2.5}/pyproject.toml +1 -1
- {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/_panelkit.pyi +1 -1
- {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/design.py +173 -123
- {panelkit-0.2.3 → panelkit-0.2.5}/BENCHMARKS.md +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/LICENSE-APACHE +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/LICENSE-MIT +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/Cargo.toml +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/benches/estimators.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/bacon.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/callaway.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/sunab.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/did/twfe.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/fe/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/fe/within.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/lib.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/mcnnm/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/panel.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/result.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/augmented.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/cpasc.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/src/sc/synthetic.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/cpasc.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/did.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/sc.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/estimators/tests/sc_family.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/Cargo.toml +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/diagnostics.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/lib.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/power.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/selection.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/src/types.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/geo/tests/geo.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/Cargo.toml +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/batch.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/bootstrap.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/ci.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/lib.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/parallel.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/src/placebo.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/inference/tests/inference.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/Cargo.toml +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/error.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/cholesky.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/eig_sym.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/qr.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/randomized.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/svd.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/factor/svd_gram.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/lib.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/matrix.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/matmul.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/norms.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/ops/transform.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/opt/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/opt/softthresh.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/rng.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/solve/lstsq.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/solve/mod.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/src/solve/spd.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/linalg/tests/numerics.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/Cargo.toml +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/api_did.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/api_geo.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/convert.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/lib.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/crates/pypanelkit/src/results.rs +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/__init__.py +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/estimators.py +0 -0
- {panelkit-0.2.3 → panelkit-0.2.5}/python/panelkit/py.typed +0 -0
|
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
|
|
|
462
462
|
|
|
463
463
|
[[package]]
|
|
464
464
|
name = "panelkit-estimators"
|
|
465
|
-
version = "0.2.
|
|
465
|
+
version = "0.2.5"
|
|
466
466
|
dependencies = [
|
|
467
467
|
"criterion",
|
|
468
468
|
"panelkit-linalg",
|
|
@@ -471,7 +471,7 @@ dependencies = [
|
|
|
471
471
|
|
|
472
472
|
[[package]]
|
|
473
473
|
name = "panelkit-geo"
|
|
474
|
-
version = "0.2.
|
|
474
|
+
version = "0.2.5"
|
|
475
475
|
dependencies = [
|
|
476
476
|
"panelkit-estimators",
|
|
477
477
|
"panelkit-inference",
|
|
@@ -482,7 +482,7 @@ dependencies = [
|
|
|
482
482
|
|
|
483
483
|
[[package]]
|
|
484
484
|
name = "panelkit-inference"
|
|
485
|
-
version = "0.2.
|
|
485
|
+
version = "0.2.5"
|
|
486
486
|
dependencies = [
|
|
487
487
|
"panelkit-estimators",
|
|
488
488
|
"panelkit-linalg",
|
|
@@ -491,7 +491,7 @@ dependencies = [
|
|
|
491
491
|
|
|
492
492
|
[[package]]
|
|
493
493
|
name = "panelkit-linalg"
|
|
494
|
-
version = "0.2.
|
|
494
|
+
version = "0.2.5"
|
|
495
495
|
dependencies = [
|
|
496
496
|
"proptest",
|
|
497
497
|
"rayon",
|
|
@@ -623,7 +623,7 @@ dependencies = [
|
|
|
623
623
|
|
|
624
624
|
[[package]]
|
|
625
625
|
name = "pypanelkit"
|
|
626
|
-
version = "0.2.
|
|
626
|
+
version = "0.2.5"
|
|
627
627
|
dependencies = [
|
|
628
628
|
"numpy",
|
|
629
629
|
"panelkit-estimators",
|
|
@@ -300,10 +300,16 @@ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ C
|
|
|
300
300
|
ev.lift, ev.cumulative, ev.significant
|
|
301
301
|
```
|
|
302
302
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
303
|
+
Inference is **in-space placebo** (Abadie): every donor market is refit as if it
|
|
304
|
+
were the treated one, and the spread of *their* post-period effects is the null
|
|
305
|
+
reference — capturing out-of-sample extrapolation error, the real source of
|
|
306
|
+
uncertainty. (A bootstrap of the treated unit's own post-period only sees
|
|
307
|
+
in-sample noise and is wildly anti-conservative — on null data its 90% interval
|
|
308
|
+
falsely flags an effect ~50% of the time; the placebo version sits at/below the
|
|
309
|
+
nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
|
|
310
|
+
dropped, per Abadie. The p-value is the placebo rank of the treated effect, and
|
|
311
|
+
`"auto"` ensemble weights are inverse-variance from each method's placebo-null
|
|
312
|
+
spread. `ev` exposes
|
|
307
313
|
`.lift`, `.att`, `.cumulative`, `.significant`, the per-method results in `ev.per`,
|
|
308
314
|
and the ensemble in `ev.ensemble`. Reported numbers: **% lift** (effect ÷
|
|
309
315
|
counterfactual), **per-period ATT**, and **cumulative incremental** over the
|
|
@@ -315,13 +321,13 @@ you can see it sits flat (centered on zero) inside the noise band before the tes
|
|
|
315
321
|
starts (a placebo check) and breaks out after — and the running **cumulative
|
|
316
322
|
incremental**, each as a point estimate with a confidence band. The counterfactual
|
|
317
323
|
is centered on the pre-period, so the gap shows fit quality rather than a level
|
|
318
|
-
offset (SDID matches trends, not levels). The bands come from
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
324
|
+
offset (SDID matches trends, not levels). The bands come from the **in-space
|
|
325
|
+
placebo** distribution: at each horizon, the pointwise band is the spread of the
|
|
326
|
+
donor placebos' per-period effects, and the cumulative band is the spread of their
|
|
327
|
+
cumulative sums (so it fans out with horizon). Placebo inference needs a decent
|
|
328
|
+
donor pool to have power — with only a handful of comparable donors the intervals
|
|
329
|
+
are necessarily wide. Pass `exclude=[…]` to drop markets from the control pool
|
|
330
|
+
(e.g. ones you don't trust as donors).
|
|
325
331
|
|
|
326
332
|
### Choosing a specification — `design.recommend(test_lengths, n_geos_options, target_lift, alphas=…)`
|
|
327
333
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: panelkit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Topic :: Scientific/Engineering
|
|
@@ -273,8 +273,8 @@ per-cell MDE/confidence/holdout report and a combined figure:
|
|
|
273
273
|
**Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
|
|
274
274
|
the power analysis: fit SC / ASC / SDID on a test that already happened, blend
|
|
275
275
|
them into a weighted-average **ensemble** estimate, and report each one's lift,
|
|
276
|
-
confidence interval (
|
|
277
|
-
with an
|
|
276
|
+
confidence interval (in-space placebo), and cumulative incremental —
|
|
277
|
+
with an in-space placebo p-value:
|
|
278
278
|
|
|
279
279
|

|
|
280
280
|
|
|
@@ -316,7 +316,7 @@ What you get out of the box:
|
|
|
316
316
|
- **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
|
|
317
317
|
with auto inverse-variance weights) for a steadier estimate than any one method.
|
|
318
318
|
- **Post-test evaluation** — `evaluate()` measures a test that already ran:
|
|
319
|
-
per-method + ensemble lift,
|
|
319
|
+
per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
|
|
320
320
|
|
|
321
321
|
See [`examples/geo_demo.py`](examples/geo_demo.py).
|
|
322
322
|
|
|
@@ -243,8 +243,8 @@ per-cell MDE/confidence/holdout report and a combined figure:
|
|
|
243
243
|
**Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
|
|
244
244
|
the power analysis: fit SC / ASC / SDID on a test that already happened, blend
|
|
245
245
|
them into a weighted-average **ensemble** estimate, and report each one's lift,
|
|
246
|
-
confidence interval (
|
|
247
|
-
with an
|
|
246
|
+
confidence interval (in-space placebo), and cumulative incremental —
|
|
247
|
+
with an in-space placebo p-value:
|
|
248
248
|
|
|
249
249
|

|
|
250
250
|
|
|
@@ -286,7 +286,7 @@ What you get out of the box:
|
|
|
286
286
|
- **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
|
|
287
287
|
with auto inverse-variance weights) for a steadier estimate than any one method.
|
|
288
288
|
- **Post-test evaluation** — `evaluate()` measures a test that already ran:
|
|
289
|
-
per-method + ensemble lift,
|
|
289
|
+
per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
|
|
290
290
|
|
|
291
291
|
See [`examples/geo_demo.py`](examples/geo_demo.py).
|
|
292
292
|
|
|
@@ -87,6 +87,10 @@ pub fn fit_at(panel: &Panel, t0: usize, cfg: SdidConfig) -> ScFit {
|
|
|
87
87
|
let t = panel.n_periods();
|
|
88
88
|
let t_pre = t0;
|
|
89
89
|
let t_post = t - t0;
|
|
90
|
+
assert!(
|
|
91
|
+
t_pre >= 1 && t_post >= 1,
|
|
92
|
+
"SDID needs at least one pre- and one post-period (t0 in 1..n_periods)"
|
|
93
|
+
);
|
|
90
94
|
let n_tr = treated.len();
|
|
91
95
|
|
|
92
96
|
// Treated-average series.
|
|
@@ -30,17 +30,14 @@ pub fn project_simplex(v: &[f64]) -> Vec<f64> {
|
|
|
30
30
|
let mut u = v.to_vec();
|
|
31
31
|
u.sort_by(|a, b| b.partial_cmp(a).unwrap()); // descending
|
|
32
32
|
let mut css = 0.0;
|
|
33
|
-
let mut rho = 0usize;
|
|
34
33
|
let mut theta = 0.0;
|
|
35
34
|
for (j, &uj) in u.iter().enumerate() {
|
|
36
35
|
css += uj;
|
|
37
36
|
let t = (css - 1.0) / (j as f64 + 1.0);
|
|
38
37
|
if uj - t > 0.0 {
|
|
39
|
-
rho = j + 1;
|
|
40
38
|
theta = t;
|
|
41
39
|
}
|
|
42
40
|
}
|
|
43
|
-
let _ = rho;
|
|
44
41
|
v.iter().map(|&vi| (vi - theta).max(0.0)).collect()
|
|
45
42
|
}
|
|
46
43
|
|
|
@@ -111,13 +111,15 @@ pub fn fit_sdid(
|
|
|
111
111
|
/// Fit Matrix-Completion NNM (Athey et al. 2021). `max_rank`, when set, uses a
|
|
112
112
|
/// fast randomized truncated SVD inside SoftImpute (big speedup, low-rank cap).
|
|
113
113
|
#[pyfunction]
|
|
114
|
-
|
|
114
|
+
// `lambda_` (not `lambda`) so it is usable as a Python keyword argument —
|
|
115
|
+
// `lambda` is a reserved word in Python.
|
|
116
|
+
#[pyo3(signature = (y, treated, treat_time, lambda_=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
|
|
115
117
|
#[allow(clippy::too_many_arguments)]
|
|
116
118
|
pub fn fit_mcnnm(
|
|
117
119
|
y: PyReadonlyArray2<f64>,
|
|
118
120
|
treated: Vec<usize>,
|
|
119
121
|
treat_time: usize,
|
|
120
|
-
|
|
122
|
+
lambda_: Option<f64>,
|
|
121
123
|
max_iter: usize,
|
|
122
124
|
tol: f64,
|
|
123
125
|
seed: u64,
|
|
@@ -125,7 +127,7 @@ pub fn fit_mcnnm(
|
|
|
125
127
|
) -> PyResult<PyScResult> {
|
|
126
128
|
let panel = Panel::block(mat_from_numpy(&y), &treated, treat_time);
|
|
127
129
|
let cfg = McnnmConfig {
|
|
128
|
-
lambda,
|
|
130
|
+
lambda: lambda_,
|
|
129
131
|
max_iter,
|
|
130
132
|
tol,
|
|
131
133
|
seed,
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "panelkit"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.5"
|
|
8
8
|
description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -82,7 +82,7 @@ def fit_mcnnm(
|
|
|
82
82
|
y: npt.NDArray[np.float64],
|
|
83
83
|
treated: Sequence[int],
|
|
84
84
|
treat_time: int,
|
|
85
|
-
lambda_: Optional[float] = ...,
|
|
85
|
+
lambda_: Optional[float] = ..., # NOTE: matches the Rust binding's `lambda_`
|
|
86
86
|
max_iter: int = ...,
|
|
87
87
|
tol: float = ...,
|
|
88
88
|
seed: int = ...,
|
|
@@ -42,7 +42,8 @@ def _ensemble_weight_arg(spec):
|
|
|
42
42
|
raise ValueError(f"unknown ensemble_weights {spec!r} (use 'auto', 'equal', "
|
|
43
43
|
"a dict, or a 3-list)")
|
|
44
44
|
if isinstance(spec, dict):
|
|
45
|
-
|
|
45
|
+
norm = {str(k).upper(): v for k, v in spec.items()} # case-insensitive keys
|
|
46
|
+
w = [float(norm.get(m, 0.0)) for m in _ENSEMBLE_ORDER]
|
|
46
47
|
else:
|
|
47
48
|
w = [float(x) for x in spec]
|
|
48
49
|
if len(w) != 3:
|
|
@@ -52,26 +53,6 @@ def _ensemble_weight_arg(spec):
|
|
|
52
53
|
return w
|
|
53
54
|
|
|
54
55
|
|
|
55
|
-
def _placebo_paths(pre_gaps, length, block_len, n_reps, seed):
|
|
56
|
-
"""Moving-block bootstrap of the (centered) pre-period residuals into placebo
|
|
57
|
-
paths of ``length`` periods. Resampling whole blocks preserves the residual
|
|
58
|
-
autocorrelation, so the resulting CI bands are more conservative than an iid
|
|
59
|
-
normal approximation. Returns an ``(n_reps, length)`` array (empty if no
|
|
60
|
-
pre-period or zero length)."""
|
|
61
|
-
g = np.asarray(pre_gaps, dtype=float)
|
|
62
|
-
m = len(g)
|
|
63
|
-
if m == 0 or length <= 0 or n_reps <= 0:
|
|
64
|
-
return np.empty((0, max(length, 0)))
|
|
65
|
-
g = g - g.mean() # null is "no effect" → center the residuals
|
|
66
|
-
rng = np.random.default_rng(int(seed))
|
|
67
|
-
bl = max(1, min(int(block_len), m))
|
|
68
|
-
n_blocks = int(np.ceil(length / bl))
|
|
69
|
-
starts = rng.integers(0, m, size=(n_reps, n_blocks))
|
|
70
|
-
idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
|
|
71
|
-
paths = g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
|
|
72
|
-
return paths
|
|
73
|
-
|
|
74
|
-
|
|
75
56
|
class _PowerReport:
|
|
76
57
|
"""Result of a power analysis across methods, with a report and plots."""
|
|
77
58
|
|
|
@@ -427,7 +408,7 @@ class GeoDesign:
|
|
|
427
408
|
target_power=target_power, recommended=recommended,
|
|
428
409
|
lookback=lookback, ensemble=ensemble,
|
|
429
410
|
ensemble_weights=ensemble_weights)
|
|
430
|
-
idx = self._resolve(treated)
|
|
411
|
+
idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
|
|
431
412
|
names = [self.names[i] for i in idx]
|
|
432
413
|
lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
|
|
433
414
|
if 0.0 not in lifts:
|
|
@@ -463,7 +444,7 @@ class GeoDesign:
|
|
|
463
444
|
if bad:
|
|
464
445
|
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
465
446
|
return sub.diagnose(tnames, test_len)
|
|
466
|
-
idx = self._resolve(treated)
|
|
447
|
+
idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
|
|
467
448
|
names = [self.names[i] for i in idx]
|
|
468
449
|
t0 = self.t - int(test_len)
|
|
469
450
|
diag = _panelkit.geo_diagnostics(self.Y, idx, int(test_len))
|
|
@@ -701,8 +682,7 @@ class GeoDesign:
|
|
|
701
682
|
methods: Sequence[str] = _METHODS,
|
|
702
683
|
weights="auto",
|
|
703
684
|
level: float = 0.90,
|
|
704
|
-
|
|
705
|
-
block_len: int = 4,
|
|
685
|
+
max_placebo: int = 200,
|
|
706
686
|
seed: int = 0,
|
|
707
687
|
exclude=None,
|
|
708
688
|
) -> "_EvalReport":
|
|
@@ -711,9 +691,15 @@ class GeoDesign:
|
|
|
711
691
|
This is the measurement counterpart to :meth:`power`: given the treated
|
|
712
692
|
markets and the period treatment began (``treat_start``, the first
|
|
713
693
|
post-period column), it fits SC / ASC / SDID, reports each one's effect,
|
|
714
|
-
and combines them into a weighted-average **ensemble** estimate.
|
|
715
|
-
|
|
716
|
-
|
|
694
|
+
and combines them into a weighted-average **ensemble** estimate.
|
|
695
|
+
|
|
696
|
+
Inference is **in-space placebo** (Abadie): every donor market is refit as
|
|
697
|
+
if it were the treated one, and the spread of *their* post-period effects
|
|
698
|
+
is the null reference. This captures out-of-sample extrapolation error —
|
|
699
|
+
the dominant source of uncertainty — so the intervals are calibrated
|
|
700
|
+
(unlike a bootstrap of the treated unit's own post-period, which only sees
|
|
701
|
+
in-sample noise and is far too narrow). Poorly-fit placebos (pre-period
|
|
702
|
+
RMSPE > 2× the treated unit's) are dropped, per Abadie.
|
|
717
703
|
|
|
718
704
|
Parameters
|
|
719
705
|
----------
|
|
@@ -725,11 +711,13 @@ class GeoDesign:
|
|
|
725
711
|
Which estimators to fit and blend.
|
|
726
712
|
weights : "auto" | "equal" | dict
|
|
727
713
|
Ensemble weighting. ``"auto"`` is inverse-variance (precision)
|
|
728
|
-
weighting from each method's
|
|
714
|
+
weighting from each method's placebo-null spread.
|
|
729
715
|
level : float
|
|
730
716
|
Confidence level for the intervals (e.g. 0.90).
|
|
731
|
-
|
|
732
|
-
|
|
717
|
+
max_placebo : int
|
|
718
|
+
Cap on the number of donor placebos used (sampled if exceeded).
|
|
719
|
+
seed : int
|
|
720
|
+
Seed for placebo sampling when ``max_placebo`` is exceeded.
|
|
733
721
|
|
|
734
722
|
Returns
|
|
735
723
|
-------
|
|
@@ -745,8 +733,8 @@ class GeoDesign:
|
|
|
745
733
|
if bad:
|
|
746
734
|
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
747
735
|
return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
|
|
748
|
-
level=level,
|
|
749
|
-
idx = self._resolve(treated)
|
|
736
|
+
level=level, max_placebo=max_placebo, seed=seed)
|
|
737
|
+
idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
|
|
750
738
|
names = [self.names[i] for i in idx]
|
|
751
739
|
t0 = int(treat_start)
|
|
752
740
|
if not (1 <= t0 < self.t):
|
|
@@ -757,27 +745,28 @@ class GeoDesign:
|
|
|
757
745
|
if unknown:
|
|
758
746
|
raise ValueError(f"unknown methods {unknown}; choose from {_METHODS}")
|
|
759
747
|
|
|
760
|
-
|
|
761
|
-
"SC":
|
|
762
|
-
|
|
763
|
-
"
|
|
764
|
-
|
|
748
|
+
def _fit(method, tr):
|
|
749
|
+
if method == "SC":
|
|
750
|
+
return _panelkit.fit_sc(self.Y, tr, t0, 0.0, False, level)
|
|
751
|
+
if method == "ASC":
|
|
752
|
+
return _panelkit.fit_asc(self.Y, tr, t0, 0.0, None)
|
|
753
|
+
return _panelkit.fit_sdid(self.Y, tr, t0, 1.0)
|
|
754
|
+
|
|
765
755
|
treated_series = self.Y[idx].mean(axis=0)
|
|
756
|
+
post_len = self.t - t0
|
|
757
|
+
order = methods
|
|
758
|
+
|
|
759
|
+
# --- point estimates on the treated set ---
|
|
766
760
|
per = {}
|
|
767
761
|
for m in methods:
|
|
768
|
-
fit =
|
|
762
|
+
fit = _fit(m, idx)
|
|
769
763
|
att_path = np.asarray(fit.att_path, dtype=float)
|
|
770
764
|
cf = np.asarray(fit.counterfactual, dtype=float)
|
|
771
765
|
att = float(fit.att)
|
|
772
766
|
cf_mean = float(np.mean(cf)) if cf.size else float("nan")
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
# Full-timeline counterfactual via donor weights (exact for SC; the
|
|
777
|
-
# dominant term for ASC/SDID). Center on the pre-period so the gap
|
|
778
|
-
# reflects FIT, not a level offset — SDID is level-agnostic (matches
|
|
779
|
-
# trends, not levels), so its donor-weighted series sits at a constant
|
|
780
|
-
# offset that would otherwise look like a non-zero pre-period.
|
|
767
|
+
# Full-timeline counterfactual via donor weights, centered on the
|
|
768
|
+
# pre-period so the gap reflects FIT, not a level offset (SDID matches
|
|
769
|
+
# trends, not levels).
|
|
781
770
|
dids = np.asarray(fit.donor_ids, dtype=int)
|
|
782
771
|
ws = np.asarray(fit.weights, dtype=float)
|
|
783
772
|
if dids.size:
|
|
@@ -787,25 +776,40 @@ class GeoDesign:
|
|
|
787
776
|
full_cf = np.full(self.t, np.nan)
|
|
788
777
|
per[m] = {
|
|
789
778
|
"att": att, "att_path": att_path, "counterfactual": cf,
|
|
790
|
-
"full_cf": full_cf,
|
|
791
|
-
"
|
|
792
|
-
"se": se, "att_lo": lo, "att_hi": hi,
|
|
793
|
-
"lift_lo": lo / cf_mean if cf_mean else float("nan"),
|
|
794
|
-
"lift_hi": hi / cf_mean if cf_mean else float("nan"),
|
|
779
|
+
"full_cf": full_cf, "cf_mean": cf_mean,
|
|
780
|
+
"lift": att / cf_mean if cf_mean else float("nan"),
|
|
795
781
|
"cumulative": float(att_path.sum()) * n_treated,
|
|
796
782
|
"pre_rmspe": float(fit.pre_rmspe),
|
|
797
783
|
}
|
|
798
784
|
|
|
799
|
-
#
|
|
800
|
-
|
|
785
|
+
# --- in-space placebo: refit each donor as if it were treated ---
|
|
786
|
+
treated_set = set(idx)
|
|
787
|
+
donors = [u for u in range(self.n) if u not in treated_set]
|
|
788
|
+
if len(donors) > int(max_placebo):
|
|
789
|
+
rng = np.random.default_rng(int(seed))
|
|
790
|
+
donors = sorted(int(j) for j in rng.choice(donors, int(max_placebo), replace=False))
|
|
791
|
+
pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
|
|
792
|
+
for j in donors:
|
|
793
|
+
for m in methods:
|
|
794
|
+
fj = _fit(m, [j])
|
|
795
|
+
pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
|
|
796
|
+
|
|
797
|
+
# --- ensemble weights ---
|
|
798
|
+
def _placebo_att_sd(m):
|
|
799
|
+
if not pb[m]:
|
|
800
|
+
return 1.0
|
|
801
|
+
vals = np.array([p.mean() for (p, _) in pb[m]])
|
|
802
|
+
return float(np.std(vals)) if len(vals) > 1 else 1.0
|
|
801
803
|
if isinstance(weights, str) and weights.lower() == "equal":
|
|
802
804
|
wv = [1.0 / len(order)] * len(order)
|
|
803
805
|
elif isinstance(weights, str) and weights.lower() == "auto":
|
|
804
|
-
|
|
806
|
+
# inverse-variance from each method's placebo-null spread (precision)
|
|
807
|
+
prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
|
|
805
808
|
s = sum(prec)
|
|
806
809
|
wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
|
|
807
810
|
elif isinstance(weights, dict):
|
|
808
|
-
|
|
811
|
+
norm = {str(k).upper(): v for k, v in weights.items()} # case-insensitive
|
|
812
|
+
raw = [float(norm.get(m, 0.0)) for m in order]
|
|
809
813
|
s = sum(raw)
|
|
810
814
|
if s <= 0:
|
|
811
815
|
raise ValueError("ensemble weights must sum to > 0")
|
|
@@ -817,71 +821,103 @@ class GeoDesign:
|
|
|
817
821
|
s = sum(raw)
|
|
818
822
|
wv = [r / s for r in raw]
|
|
819
823
|
wmap = dict(zip(order, wv))
|
|
824
|
+
a = (1.0 - float(level)) / 2.0
|
|
820
825
|
|
|
826
|
+
def _ci(point, null_samples):
|
|
827
|
+
"""Pivot CI: point estimate ± the placebo null spread (null ≈ 0).
|
|
828
|
+
Returns NaN when there are too few placebos to form an interval —
|
|
829
|
+
never a fake zero-width CI."""
|
|
830
|
+
if len(null_samples) >= 2:
|
|
831
|
+
return point + float(np.quantile(null_samples, a)), \
|
|
832
|
+
point + float(np.quantile(null_samples, 1.0 - a))
|
|
833
|
+
return float("nan"), float("nan")
|
|
834
|
+
|
|
835
|
+
def _kept_att(samples, treated_pre_m):
|
|
836
|
+
"""Placebo att-means after the Abadie 2x pre-fit filter (fallback to
|
|
837
|
+
all placebos if too few comparable ones survive)."""
|
|
838
|
+
keep = [p.mean() for (p, pre) in samples
|
|
839
|
+
if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
|
|
840
|
+
if len(keep) < 5 and samples:
|
|
841
|
+
keep = [p.mean() for (p, _) in samples]
|
|
842
|
+
return np.array(keep)
|
|
843
|
+
|
|
844
|
+
# --- per-method point CIs from each method's placebo att spread (same
|
|
845
|
+
# 2x pre-fit filter as the ensemble, for internal consistency) ---
|
|
846
|
+
for m in order:
|
|
847
|
+
mp = _kept_att(pb[m], per[m]["pre_rmspe"])
|
|
848
|
+
lo, hi = _ci(per[m]["att"], mp)
|
|
849
|
+
cfm = per[m]["cf_mean"]
|
|
850
|
+
per[m]["att_lo"], per[m]["att_hi"] = lo, hi
|
|
851
|
+
per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
|
|
852
|
+
per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
|
|
853
|
+
|
|
854
|
+
# --- ensemble estimate + ensemble placebo paths (Abadie pre-fit filter) ---
|
|
821
855
|
ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
|
|
822
856
|
ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
|
|
823
857
|
ens_att = float(ens_path.mean())
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
858
|
+
treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
|
|
859
|
+
|
|
860
|
+
ens_pb = [] # (path, pre_rmspe)
|
|
861
|
+
for di in range(len(donors)):
|
|
862
|
+
path = sum(wmap[m] * pb[m][di][0] for m in order)
|
|
863
|
+
pre = sum(wmap[m] * pb[m][di][1] for m in order)
|
|
864
|
+
ens_pb.append((path, pre))
|
|
865
|
+
kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
|
|
866
|
+
if len(kept) < 5: # too few comparable placebos → use all
|
|
867
|
+
kept = [p for (p, _) in ens_pb]
|
|
868
|
+
pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
|
|
869
|
+
n_pb = pb_mat.shape[0]
|
|
870
|
+
|
|
871
|
+
# pointwise + cumulative + mean CIs, all from the placebo null
|
|
872
|
+
if n_pb >= 2:
|
|
873
|
+
point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
|
|
874
|
+
point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
|
|
875
|
+
point_hw = float(np.quantile(np.abs(pb_mat), float(level)))
|
|
876
|
+
cum_pb = np.cumsum(pb_mat, axis=1)
|
|
877
|
+
run = np.cumsum(ens_path)
|
|
878
|
+
cum_lo_band = np.quantile(cum_pb, a, axis=0)
|
|
879
|
+
cum_hi_band = np.quantile(cum_pb, 1.0 - a, axis=0)
|
|
880
|
+
pb_att = pb_mat.mean(axis=1)
|
|
881
|
+
p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
|
|
882
|
+
else:
|
|
883
|
+
# too few comparable placebos → inference undefined (no fake band)
|
|
884
|
+
run = np.cumsum(ens_path)
|
|
885
|
+
point_lo = np.full(post_len, np.nan)
|
|
886
|
+
point_hi = np.full(post_len, np.nan)
|
|
887
|
+
point_hw = 0.0
|
|
888
|
+
cum_lo_band = cum_hi_band = np.full(post_len, np.nan)
|
|
889
|
+
pb_att = np.array([])
|
|
890
|
+
p_value = None
|
|
891
|
+
att_lo, att_hi = _ci(ens_att, pb_att)
|
|
892
|
+
|
|
893
|
+
cum_curve = run * n_treated
|
|
827
894
|
ensemble = {
|
|
828
|
-
"att": ens_att, "att_path": ens_path,
|
|
829
|
-
"att_lo":
|
|
895
|
+
"att": ens_att, "att_path": ens_path,
|
|
896
|
+
"att_lo": att_lo, "att_hi": att_hi,
|
|
830
897
|
"lift": ens_att / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
831
|
-
"lift_lo":
|
|
832
|
-
"lift_hi":
|
|
898
|
+
"lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
899
|
+
"lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
833
900
|
"cumulative": float(ens_path.sum()) * n_treated,
|
|
834
|
-
"weights": wmap,
|
|
901
|
+
"weights": wmap, "n_placebo": n_pb,
|
|
902
|
+
"low_power": n_pb < 8, # too few placebos for reliable inference
|
|
835
903
|
}
|
|
836
904
|
|
|
837
|
-
#
|
|
838
|
-
sc = _panelkit.fit_sc(self.Y, idx, t0, 0.0, True, level)
|
|
839
|
-
p_value = sc.p_value
|
|
840
|
-
|
|
841
|
-
# Full-timeline ensemble counterfactual + gap path (pre-period shows fit,
|
|
842
|
-
# post-period uses the exact ensemble effect).
|
|
905
|
+
# full-timeline counterfactual + gap path (pre shows fit; post = effect)
|
|
843
906
|
ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
|
|
844
907
|
full_gap = treated_series - ens_full_cf
|
|
845
|
-
full_gap[t0:] = ens_path
|
|
846
|
-
counterfactual = treated_series - full_gap
|
|
847
|
-
pre_gaps = full_gap[:t0]
|
|
848
|
-
sigma_pre = float(np.std(pre_gaps, ddof=1)) if t0 > 1 else float(np.std(pre_gaps))
|
|
849
|
-
|
|
850
|
-
# CI bands from a MOVING-BLOCK BOOTSTRAP of the pre-period residuals.
|
|
851
|
-
# Blocks preserve autocorrelation, so the bands are more conservative than
|
|
852
|
-
# an iid normal approximation — especially the cumulative band, whose
|
|
853
|
-
# spread grows faster than sqrt(k) under positive autocorrelation.
|
|
854
|
-
post_len = self.t - t0
|
|
855
|
-
a = (1.0 - float(level)) / 2.0
|
|
856
|
-
paths = _placebo_paths(pre_gaps, post_len, int(block_len), int(n_boot), int(seed))
|
|
857
|
-
if paths.size:
|
|
858
|
-
point_lo = np.quantile(paths, a, axis=0)
|
|
859
|
-
point_hi = np.quantile(paths, 1.0 - a, axis=0)
|
|
860
|
-
point_hw = float(np.quantile(np.abs(paths), float(level))) # symmetric, full-timeline
|
|
861
|
-
cum_paths = np.cumsum(paths, axis=1)
|
|
862
|
-
cum_band_lo = np.quantile(cum_paths, a, axis=0)
|
|
863
|
-
cum_band_hi = np.quantile(cum_paths, 1.0 - a, axis=0)
|
|
864
|
-
else:
|
|
865
|
-
point_lo = point_hi = np.zeros(post_len)
|
|
866
|
-
point_hw = 0.0
|
|
867
|
-
cum_band_lo = cum_band_hi = np.zeros(post_len)
|
|
868
|
-
|
|
869
|
-
ens_post = ens_path
|
|
870
|
-
run = np.cumsum(ens_post)
|
|
871
|
-
cum_curve = run * n_treated
|
|
872
|
-
cum_lo_curve = (run + cum_band_lo) * n_treated
|
|
873
|
-
cum_hi_curve = (run + cum_band_hi) * n_treated
|
|
874
|
-
|
|
875
|
-
ensemble["sigma_pre"] = sigma_pre
|
|
908
|
+
full_gap[t0:] = ens_path
|
|
909
|
+
counterfactual = treated_series - full_gap
|
|
876
910
|
ensemble["full_gap"] = full_gap
|
|
877
|
-
ensemble["
|
|
878
|
-
|
|
879
|
-
ensemble["
|
|
880
|
-
ensemble["
|
|
881
|
-
ensemble["
|
|
882
|
-
ensemble["
|
|
883
|
-
ensemble["
|
|
884
|
-
ensemble["
|
|
911
|
+
ensemble["sigma_pre"] = (float(np.std(full_gap[:t0], ddof=1)) if t0 > 1
|
|
912
|
+
else float(np.std(full_gap[:t0])))
|
|
913
|
+
ensemble["point_hw"] = point_hw
|
|
914
|
+
ensemble["point_lo"] = point_lo
|
|
915
|
+
ensemble["point_hi"] = point_hi
|
|
916
|
+
ensemble["cum_curve"] = cum_curve
|
|
917
|
+
ensemble["cum_lo_curve"] = (run + cum_lo_band) * n_treated
|
|
918
|
+
ensemble["cum_hi_curve"] = (run + cum_hi_band) * n_treated
|
|
919
|
+
ensemble["cum_lo"] = float(ensemble["cum_lo_curve"][-1]) if post_len else float("nan")
|
|
920
|
+
ensemble["cum_hi"] = float(ensemble["cum_hi_curve"][-1]) if post_len else float("nan")
|
|
885
921
|
|
|
886
922
|
return _EvalReport(names, t0, n_treated, per, ensemble, p_value, level,
|
|
887
923
|
treated_series, counterfactual)
|
|
@@ -981,11 +1017,14 @@ class _MultiCellReport:
|
|
|
981
1017
|
f"({', '.join(map(str, self.cells))})")
|
|
982
1018
|
lines.append(f"Test duration : {self.test_len} periods")
|
|
983
1019
|
lines.append(f"Shared donor pool : {len(self.donor_names)} markets")
|
|
984
|
-
lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume"
|
|
1020
|
+
lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume "
|
|
1021
|
+
f"(all cells together)")
|
|
985
1022
|
lines.append(f"Powered at {int(100*self.target_power)}% power, "
|
|
986
1023
|
f"{int(100*(1-self.alpha))}% confidence "
|
|
987
1024
|
f"(each cell vs. the shared pool).")
|
|
988
1025
|
lines.append("")
|
|
1026
|
+
# Per-cell 'Holdout' is that cell's share of its OWN sub-panel (cell +
|
|
1027
|
+
# shared donors); the Combined holdout above is over the full panel.
|
|
989
1028
|
lines.append(f"{'Cell':<14}{'Markets':<28}{'MDE':>8}{'Conf':>7}{'Holdout':>9}")
|
|
990
1029
|
lines.append("-" * 64)
|
|
991
1030
|
for label, rep in self.cells.items():
|
|
@@ -1050,8 +1089,11 @@ class _EvalReport:
|
|
|
1050
1089
|
|
|
1051
1090
|
@property
|
|
1052
1091
|
def significant(self):
|
|
1053
|
-
"""True if the ensemble CI excludes zero
|
|
1092
|
+
"""True if the ensemble CI is well-defined and excludes zero. Returns
|
|
1093
|
+
False when inference is undefined (too few placebos → NaN interval)."""
|
|
1054
1094
|
lo, hi = self.ensemble["att_lo"], self.ensemble["att_hi"]
|
|
1095
|
+
if not (np.isfinite(lo) and np.isfinite(hi)):
|
|
1096
|
+
return False
|
|
1055
1097
|
return (lo > 0) or (hi < 0)
|
|
1056
1098
|
|
|
1057
1099
|
def summary(self) -> str:
|
|
@@ -1073,17 +1115,25 @@ class _EvalReport:
|
|
|
1073
1115
|
lines.append(f" ensemble weights: {wstr}")
|
|
1074
1116
|
lines.append("")
|
|
1075
1117
|
if self.p_value is not None:
|
|
1076
|
-
lines.append(f"
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1118
|
+
lines.append(f"In-space placebo p-value : {self.p_value:.3f} "
|
|
1119
|
+
f"(ensemble, {e.get('n_placebo', 0)} donors)")
|
|
1120
|
+
if e.get("low_power"):
|
|
1121
|
+
lines.append("⚠ Few comparable donors — inference is low-powered; treat "
|
|
1122
|
+
"intervals/p-value with caution.")
|
|
1123
|
+
if self.significant:
|
|
1124
|
+
verdict = "✓ Significant lift — the ensemble interval excludes zero."
|
|
1125
|
+
elif not (np.isfinite(e["att_lo"]) and np.isfinite(e["att_hi"])):
|
|
1126
|
+
verdict = ("? Inference undefined — too few comparable donor placebos "
|
|
1127
|
+
"to form an interval.")
|
|
1128
|
+
else:
|
|
1129
|
+
verdict = ("~ Not distinguishable from zero at this level — the ensemble "
|
|
1130
|
+
"interval includes zero.")
|
|
1081
1131
|
lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
|
|
1082
1132
|
f"{e['cumulative']:,.0f} cumulative incremental")
|
|
1083
1133
|
if "cum_lo" in e:
|
|
1084
1134
|
lines.append(f"Cumulative {cl}% CI : "
|
|
1085
1135
|
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
|
|
1086
|
-
f"(
|
|
1136
|
+
f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
|
|
1087
1137
|
lines.append(verdict)
|
|
1088
1138
|
lines.append("=" * 66)
|
|
1089
1139
|
return "\n".join(lines)
|
|
@@ -1569,7 +1619,7 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1569
1619
|
axc.set_title("Lift by method", fontweight="bold")
|
|
1570
1620
|
axc.grid(True, axis="x", alpha=0.25)
|
|
1571
1621
|
|
|
1572
|
-
pv = f" ·
|
|
1622
|
+
pv = f" · placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
|
|
1573
1623
|
verdict = "significant" if rep.significant else "not significant"
|
|
1574
1624
|
fig.suptitle(f"panelkit · test evaluation — ensemble lift "
|
|
1575
1625
|
f"{100*rep.ensemble['lift']:+.2f}% ({verdict}){pv}",
|
|
@@ -1582,10 +1632,10 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1582
1632
|
def _plot_eval_timeline(rep: "_EvalReport", path):
|
|
1583
1633
|
"""Pointwise + cumulative effect over the full timeline, with CI bands.
|
|
1584
1634
|
|
|
1585
|
-
Bands come from
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1635
|
+
Bands come from the in-space placebo distribution (every donor refit as if
|
|
1636
|
+
treated): the pointwise band is the per-period placebo spread around the
|
|
1637
|
+
estimate; the cumulative band grows with horizon as the placebo
|
|
1638
|
+
cumulative-sums spread out."""
|
|
1589
1639
|
_, plt = _require_mpl()
|
|
1590
1640
|
import numpy as _np
|
|
1591
1641
|
from matplotlib.gridspec import GridSpec
|
|
@@ -1632,7 +1682,7 @@ def _plot_eval_timeline(rep: "_EvalReport", path):
|
|
|
1632
1682
|
cum = e["cum_curve"]
|
|
1633
1683
|
axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
|
|
1634
1684
|
axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
|
|
1635
|
-
alpha=0.15, label=f"{cl}% band (
|
|
1685
|
+
alpha=0.15, label=f"{cl}% band (in-space placebo)")
|
|
1636
1686
|
axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
|
|
1637
1687
|
axc.axhline(0, color="#111827", lw=1.0)
|
|
1638
1688
|
axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|