panelkit 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelkit-0.2.4 → panelkit-0.2.6}/Cargo.lock +5 -5
- {panelkit-0.2.4 → panelkit-0.2.6}/Cargo.toml +1 -1
- {panelkit-0.2.4 → panelkit-0.2.6}/GUIDE.md +8 -4
- {panelkit-0.2.4 → panelkit-0.2.6}/PKG-INFO +3 -3
- {panelkit-0.2.4 → panelkit-0.2.6}/README.md +2 -2
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/sdid.rs +4 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/qr.rs +11 -1
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/opt/simplex.rs +6 -4
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/api_sc.rs +5 -3
- {panelkit-0.2.4 → panelkit-0.2.6}/pyproject.toml +1 -1
- {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/_panelkit.pyi +1 -1
- {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/design.py +152 -73
- {panelkit-0.2.4 → panelkit-0.2.6}/BENCHMARKS.md +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/LICENSE-APACHE +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/LICENSE-MIT +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/Cargo.toml +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/benches/estimators.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/bacon.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/callaway.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/sunab.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/did/twfe.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/fe/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/fe/within.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/lib.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/mcnnm/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/panel.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/result.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/augmented.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/cpasc.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/src/sc/synthetic.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/cpasc.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/did.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/sc.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/estimators/tests/sc_family.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/Cargo.toml +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/diagnostics.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/lib.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/power.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/selection.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/src/types.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/geo/tests/geo.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/Cargo.toml +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/batch.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/bootstrap.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/ci.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/lib.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/parallel.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/src/placebo.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/inference/tests/inference.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/Cargo.toml +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/error.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/cholesky.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/eig_sym.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/randomized.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/svd.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/factor/svd_gram.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/lib.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/matrix.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/matmul.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/norms.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/ops/transform.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/opt/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/opt/softthresh.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/rng.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/solve/lstsq.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/solve/mod.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/src/solve/spd.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/linalg/tests/numerics.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/Cargo.toml +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/api_did.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/api_geo.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/convert.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/lib.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/crates/pypanelkit/src/results.rs +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/__init__.py +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/estimators.py +0 -0
- {panelkit-0.2.4 → panelkit-0.2.6}/python/panelkit/py.typed +0 -0
|
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
|
|
|
462
462
|
|
|
463
463
|
[[package]]
|
|
464
464
|
name = "panelkit-estimators"
|
|
465
|
-
version = "0.2.
|
|
465
|
+
version = "0.2.6"
|
|
466
466
|
dependencies = [
|
|
467
467
|
"criterion",
|
|
468
468
|
"panelkit-linalg",
|
|
@@ -471,7 +471,7 @@ dependencies = [
|
|
|
471
471
|
|
|
472
472
|
[[package]]
|
|
473
473
|
name = "panelkit-geo"
|
|
474
|
-
version = "0.2.
|
|
474
|
+
version = "0.2.6"
|
|
475
475
|
dependencies = [
|
|
476
476
|
"panelkit-estimators",
|
|
477
477
|
"panelkit-inference",
|
|
@@ -482,7 +482,7 @@ dependencies = [
|
|
|
482
482
|
|
|
483
483
|
[[package]]
|
|
484
484
|
name = "panelkit-inference"
|
|
485
|
-
version = "0.2.
|
|
485
|
+
version = "0.2.6"
|
|
486
486
|
dependencies = [
|
|
487
487
|
"panelkit-estimators",
|
|
488
488
|
"panelkit-linalg",
|
|
@@ -491,7 +491,7 @@ dependencies = [
|
|
|
491
491
|
|
|
492
492
|
[[package]]
|
|
493
493
|
name = "panelkit-linalg"
|
|
494
|
-
version = "0.2.
|
|
494
|
+
version = "0.2.6"
|
|
495
495
|
dependencies = [
|
|
496
496
|
"proptest",
|
|
497
497
|
"rayon",
|
|
@@ -623,7 +623,7 @@ dependencies = [
|
|
|
623
623
|
|
|
624
624
|
[[package]]
|
|
625
625
|
name = "pypanelkit"
|
|
626
|
-
version = "0.2.
|
|
626
|
+
version = "0.2.6"
|
|
627
627
|
dependencies = [
|
|
628
628
|
"numpy",
|
|
629
629
|
"panelkit-estimators",
|
|
@@ -300,10 +300,14 @@ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ C
|
|
|
300
300
|
ev.lift, ev.cumulative, ev.significant
|
|
301
301
|
```
|
|
302
302
|
|
|
303
|
-
Inference
|
|
304
|
-
were the treated one, and the spread of *their*
|
|
305
|
-
reference — capturing out-of-sample extrapolation
|
|
306
|
-
|
|
303
|
+
Inference defaults to **in-space placebo** (Abadie, `inference="placebo"`): every
|
|
304
|
+
donor market is refit as if it were the treated one, and the spread of *their*
|
|
305
|
+
post-period effects is the null reference — capturing out-of-sample extrapolation
|
|
306
|
+
error, the real source of uncertainty. A second engine, `inference="bootstrap"`,
|
|
307
|
+
uses a moving-block bootstrap of the pre-period residuals; it's serial-correlation
|
|
308
|
+
aware and works as a **fallback when the donor pool is too small for placebo**, but
|
|
309
|
+
it only sees in-sample noise, so it is *optimistic* (the report is flagged
|
|
310
|
+
`optimistic` and you shouldn't lean on it for significance). (A bootstrap of the treated unit's own post-period only sees
|
|
307
311
|
in-sample noise and is wildly anti-conservative — on null data its 90% interval
|
|
308
312
|
falsely flags an effect ~50% of the time; the placebo version sits at/below the
|
|
309
313
|
nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: panelkit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Topic :: Scientific/Engineering
|
|
@@ -274,7 +274,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
|
|
|
274
274
|
the power analysis: fit SC / ASC / SDID on a test that already happened, blend
|
|
275
275
|
them into a weighted-average **ensemble** estimate, and report each one's lift,
|
|
276
276
|
confidence interval (in-space placebo), and cumulative incremental —
|
|
277
|
-
with an
|
|
277
|
+
with an in-space placebo p-value:
|
|
278
278
|
|
|
279
279
|

|
|
280
280
|
|
|
@@ -316,7 +316,7 @@ What you get out of the box:
|
|
|
316
316
|
- **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
|
|
317
317
|
with auto inverse-variance weights) for a steadier estimate than any one method.
|
|
318
318
|
- **Post-test evaluation** — `evaluate()` measures a test that already ran:
|
|
319
|
-
per-method + ensemble lift,
|
|
319
|
+
per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
|
|
320
320
|
|
|
321
321
|
See [`examples/geo_demo.py`](examples/geo_demo.py).
|
|
322
322
|
|
|
@@ -244,7 +244,7 @@ per-cell MDE/confidence/holdout report and a combined figure:
|
|
|
244
244
|
the power analysis: fit SC / ASC / SDID on a test that already happened, blend
|
|
245
245
|
them into a weighted-average **ensemble** estimate, and report each one's lift,
|
|
246
246
|
confidence interval (in-space placebo), and cumulative incremental —
|
|
247
|
-
with an
|
|
247
|
+
with an in-space placebo p-value:
|
|
248
248
|
|
|
249
249
|

|
|
250
250
|
|
|
@@ -286,7 +286,7 @@ What you get out of the box:
|
|
|
286
286
|
- **A weighted-average ensemble** of SC + ASC + SDID (combined per placebo window,
|
|
287
287
|
with auto inverse-variance weights) for a steadier estimate than any one method.
|
|
288
288
|
- **Post-test evaluation** — `evaluate()` measures a test that already ran:
|
|
289
|
-
per-method + ensemble lift,
|
|
289
|
+
per-method + ensemble lift, in-space placebo CIs, cumulative incremental, and a p-value.
|
|
290
290
|
|
|
291
291
|
See [`examples/geo_demo.py`](examples/geo_demo.py).
|
|
292
292
|
|
|
@@ -87,6 +87,10 @@ pub fn fit_at(panel: &Panel, t0: usize, cfg: SdidConfig) -> ScFit {
|
|
|
87
87
|
let t = panel.n_periods();
|
|
88
88
|
let t_pre = t0;
|
|
89
89
|
let t_post = t - t0;
|
|
90
|
+
assert!(
|
|
91
|
+
t_pre >= 1 && t_post >= 1,
|
|
92
|
+
"SDID needs at least one pre- and one post-period (t0 in 1..n_periods)"
|
|
93
|
+
);
|
|
90
94
|
let n_tr = treated.len();
|
|
91
95
|
|
|
92
96
|
// Treated-average series.
|
|
@@ -92,8 +92,18 @@ impl Qr {
|
|
|
92
92
|
}
|
|
93
93
|
|
|
94
94
|
/// Back-substitute `R x = rhs[0..n]`, returning `x` (length `n`).
|
|
95
|
+
///
|
|
96
|
+
/// Householder QR does not rank-reveal, so a rank-deficient design can leave a
|
|
97
|
+
/// (near-)zero pivot on the diagonal. Rather than emit `inf`/`NaN` (which would
|
|
98
|
+
/// silently poison downstream OLS coefficients), we zero that component — a
|
|
99
|
+
/// minimum-norm-style choice — using a relative pivot threshold.
|
|
95
100
|
fn back_solve(&self, rhs: &[f64]) -> Vec<f64> {
|
|
96
101
|
let n = self.n;
|
|
102
|
+
let mut max_diag = 0.0_f64;
|
|
103
|
+
for i in 0..n {
|
|
104
|
+
max_diag = max_diag.max(self.packed.get(i, i).abs());
|
|
105
|
+
}
|
|
106
|
+
let eps = 1e-12 * max_diag.max(1.0);
|
|
97
107
|
let mut x = vec![0.0; n];
|
|
98
108
|
for i in (0..n).rev() {
|
|
99
109
|
let mut s = rhs[i];
|
|
@@ -101,7 +111,7 @@ impl Qr {
|
|
|
101
111
|
s -= self.packed.get(i, k) * x[k];
|
|
102
112
|
}
|
|
103
113
|
let rii = self.packed.get(i, i);
|
|
104
|
-
x[i] = s / rii;
|
|
114
|
+
x[i] = if rii.abs() > eps { s / rii } else { 0.0 };
|
|
105
115
|
}
|
|
106
116
|
x
|
|
107
117
|
}
|
|
@@ -30,17 +30,14 @@ pub fn project_simplex(v: &[f64]) -> Vec<f64> {
|
|
|
30
30
|
let mut u = v.to_vec();
|
|
31
31
|
u.sort_by(|a, b| b.partial_cmp(a).unwrap()); // descending
|
|
32
32
|
let mut css = 0.0;
|
|
33
|
-
let mut rho = 0usize;
|
|
34
33
|
let mut theta = 0.0;
|
|
35
34
|
for (j, &uj) in u.iter().enumerate() {
|
|
36
35
|
css += uj;
|
|
37
36
|
let t = (css - 1.0) / (j as f64 + 1.0);
|
|
38
37
|
if uj - t > 0.0 {
|
|
39
|
-
rho = j + 1;
|
|
40
38
|
theta = t;
|
|
41
39
|
}
|
|
42
40
|
}
|
|
43
|
-
let _ = rho;
|
|
44
41
|
v.iter().map(|&vi| (vi - theta).max(0.0)).collect()
|
|
45
42
|
}
|
|
46
43
|
|
|
@@ -148,8 +145,13 @@ pub fn solve_fw(gram: &Mat, b: &[f64], eta: f64, max_iter: usize, tol: f64) -> S
|
|
|
148
145
|
let dgd = dot(&d, &gd_vec);
|
|
149
146
|
let gamma = if dgd > 0.0 {
|
|
150
147
|
(-gd / dgd).clamp(0.0, gamma_max)
|
|
151
|
-
} else {
|
|
148
|
+
} else if gd < 0.0 {
|
|
149
|
+
// Non-positive curvature along a descent direction → go to the
|
|
150
|
+
// feasible cap (bounded so the step never leaves the simplex).
|
|
152
151
|
gamma_max.min(1.0)
|
|
152
|
+
} else {
|
|
153
|
+
// Not a descent direction → don't move.
|
|
154
|
+
0.0
|
|
153
155
|
};
|
|
154
156
|
for i in 0..j {
|
|
155
157
|
w[i] += gamma * d[i];
|
|
@@ -111,13 +111,15 @@ pub fn fit_sdid(
|
|
|
111
111
|
/// Fit Matrix-Completion NNM (Athey et al. 2021). `max_rank`, when set, uses a
|
|
112
112
|
/// fast randomized truncated SVD inside SoftImpute (big speedup, low-rank cap).
|
|
113
113
|
#[pyfunction]
|
|
114
|
-
|
|
114
|
+
// `lambda_` (not `lambda`) so it is usable as a Python keyword argument —
|
|
115
|
+
// `lambda` is a reserved word in Python.
|
|
116
|
+
#[pyo3(signature = (y, treated, treat_time, lambda_=None, max_iter=200, tol=1e-5, seed=0, max_rank=None))]
|
|
115
117
|
#[allow(clippy::too_many_arguments)]
|
|
116
118
|
pub fn fit_mcnnm(
|
|
117
119
|
y: PyReadonlyArray2<f64>,
|
|
118
120
|
treated: Vec<usize>,
|
|
119
121
|
treat_time: usize,
|
|
120
|
-
|
|
122
|
+
lambda_: Option<f64>,
|
|
121
123
|
max_iter: usize,
|
|
122
124
|
tol: f64,
|
|
123
125
|
seed: u64,
|
|
@@ -125,7 +127,7 @@ pub fn fit_mcnnm(
|
|
|
125
127
|
) -> PyResult<PyScResult> {
|
|
126
128
|
let panel = Panel::block(mat_from_numpy(&y), &treated, treat_time);
|
|
127
129
|
let cfg = McnnmConfig {
|
|
128
|
-
lambda,
|
|
130
|
+
lambda: lambda_,
|
|
129
131
|
max_iter,
|
|
130
132
|
tol,
|
|
131
133
|
seed,
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "panelkit"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.6"
|
|
8
8
|
description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -82,7 +82,7 @@ def fit_mcnnm(
|
|
|
82
82
|
y: npt.NDArray[np.float64],
|
|
83
83
|
treated: Sequence[int],
|
|
84
84
|
treat_time: int,
|
|
85
|
-
lambda_: Optional[float] = ...,
|
|
85
|
+
lambda_: Optional[float] = ..., # NOTE: matches the Rust binding's `lambda_`
|
|
86
86
|
max_iter: int = ...,
|
|
87
87
|
tol: float = ...,
|
|
88
88
|
seed: int = ...,
|
|
@@ -42,7 +42,8 @@ def _ensemble_weight_arg(spec):
|
|
|
42
42
|
raise ValueError(f"unknown ensemble_weights {spec!r} (use 'auto', 'equal', "
|
|
43
43
|
"a dict, or a 3-list)")
|
|
44
44
|
if isinstance(spec, dict):
|
|
45
|
-
|
|
45
|
+
norm = {str(k).upper(): v for k, v in spec.items()} # case-insensitive keys
|
|
46
|
+
w = [float(norm.get(m, 0.0)) for m in _ENSEMBLE_ORDER]
|
|
46
47
|
else:
|
|
47
48
|
w = [float(x) for x in spec]
|
|
48
49
|
if len(w) != 3:
|
|
@@ -52,6 +53,24 @@ def _ensemble_weight_arg(spec):
|
|
|
52
53
|
return w
|
|
53
54
|
|
|
54
55
|
|
|
56
|
+
def _block_bootstrap_paths(pre_gaps, length, block_len, n_reps, seed):
|
|
57
|
+
"""Moving-block bootstrap of the (centered) pre-period residuals into placebo
|
|
58
|
+
paths of ``length`` periods. Resampling whole blocks preserves the residual
|
|
59
|
+
autocorrelation. Returns an ``(n_reps, length)`` array (empty if no pre-period
|
|
60
|
+
or zero length)."""
|
|
61
|
+
g = np.asarray(pre_gaps, dtype=float)
|
|
62
|
+
m = len(g)
|
|
63
|
+
if m == 0 or length <= 0 or n_reps <= 0:
|
|
64
|
+
return np.empty((0, max(length, 0)))
|
|
65
|
+
g = g - g.mean() # null is "no effect" → center residuals
|
|
66
|
+
rng = np.random.default_rng(int(seed))
|
|
67
|
+
bl = max(1, min(int(block_len), m))
|
|
68
|
+
n_blocks = int(np.ceil(length / bl))
|
|
69
|
+
starts = rng.integers(0, m, size=(n_reps, n_blocks))
|
|
70
|
+
idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
|
|
71
|
+
return g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
|
|
72
|
+
|
|
73
|
+
|
|
55
74
|
class _PowerReport:
|
|
56
75
|
"""Result of a power analysis across methods, with a report and plots."""
|
|
57
76
|
|
|
@@ -407,7 +426,7 @@ class GeoDesign:
|
|
|
407
426
|
target_power=target_power, recommended=recommended,
|
|
408
427
|
lookback=lookback, ensemble=ensemble,
|
|
409
428
|
ensemble_weights=ensemble_weights)
|
|
410
|
-
idx = self._resolve(treated)
|
|
429
|
+
idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
|
|
411
430
|
names = [self.names[i] for i in idx]
|
|
412
431
|
lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
|
|
413
432
|
if 0.0 not in lifts:
|
|
@@ -443,7 +462,7 @@ class GeoDesign:
|
|
|
443
462
|
if bad:
|
|
444
463
|
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
445
464
|
return sub.diagnose(tnames, test_len)
|
|
446
|
-
idx = self._resolve(treated)
|
|
465
|
+
idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
|
|
447
466
|
names = [self.names[i] for i in idx]
|
|
448
467
|
t0 = self.t - int(test_len)
|
|
449
468
|
diag = _panelkit.geo_diagnostics(self.Y, idx, int(test_len))
|
|
@@ -681,7 +700,10 @@ class GeoDesign:
|
|
|
681
700
|
methods: Sequence[str] = _METHODS,
|
|
682
701
|
weights="auto",
|
|
683
702
|
level: float = 0.90,
|
|
703
|
+
inference: str = "placebo",
|
|
684
704
|
max_placebo: int = 200,
|
|
705
|
+
n_boot: int = 2000,
|
|
706
|
+
block_len: int = 4,
|
|
685
707
|
seed: int = 0,
|
|
686
708
|
exclude=None,
|
|
687
709
|
) -> "_EvalReport":
|
|
@@ -692,13 +714,20 @@ class GeoDesign:
|
|
|
692
714
|
post-period column), it fits SC / ASC / SDID, reports each one's effect,
|
|
693
715
|
and combines them into a weighted-average **ensemble** estimate.
|
|
694
716
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
717
|
+
Two inference engines (``inference=``):
|
|
718
|
+
|
|
719
|
+
- ``"placebo"`` (default) — **in-space placebo** (Abadie): every donor
|
|
720
|
+
market is refit as if it were treated, and the spread of *their*
|
|
721
|
+
post-period effects is the null. This captures out-of-sample
|
|
722
|
+
extrapolation error (the dominant uncertainty), so it is calibrated.
|
|
723
|
+
Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
|
|
724
|
+
dropped. Needs a reasonable donor pool to have power.
|
|
725
|
+
- ``"bootstrap"`` — a **moving-block bootstrap of the pre-period
|
|
726
|
+
residuals** (serial-correlation-aware). Useful as a within-sample noise
|
|
727
|
+
band and as a fallback when the donor pool is too small for placebo
|
|
728
|
+
inference, **but it is optimistic**: it only sees in-sample noise, not
|
|
729
|
+
extrapolation error, so do not rely on it for significance. The report
|
|
730
|
+
is flagged ``optimistic`` in this mode.
|
|
702
731
|
|
|
703
732
|
Parameters
|
|
704
733
|
----------
|
|
@@ -732,8 +761,9 @@ class GeoDesign:
|
|
|
732
761
|
if bad:
|
|
733
762
|
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
734
763
|
return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
|
|
735
|
-
level=level, max_placebo=max_placebo,
|
|
736
|
-
|
|
764
|
+
level=level, inference=inference, max_placebo=max_placebo,
|
|
765
|
+
n_boot=n_boot, block_len=block_len, seed=seed)
|
|
766
|
+
idx = list(dict.fromkeys(self._resolve(treated))) # dedup, preserve order
|
|
737
767
|
names = [self.names[i] for i in idx]
|
|
738
768
|
t0 = int(treat_start)
|
|
739
769
|
if not (1 <= t0 < self.t):
|
|
@@ -781,33 +811,60 @@ class GeoDesign:
|
|
|
781
811
|
"pre_rmspe": float(fit.pre_rmspe),
|
|
782
812
|
}
|
|
783
813
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
814
|
+
inference = str(inference).lower()
|
|
815
|
+
if inference not in ("placebo", "bootstrap"):
|
|
816
|
+
raise ValueError("inference must be 'placebo' or 'bootstrap'")
|
|
817
|
+
a = (1.0 - float(level)) / 2.0
|
|
818
|
+
|
|
819
|
+
def _ci(point, null_samples):
|
|
820
|
+
"""Pivot CI: point estimate ± the null spread (null ≈ 0). Returns NaN
|
|
821
|
+
when there are too few null samples — never a fake zero-width CI."""
|
|
822
|
+
if len(null_samples) >= 2:
|
|
823
|
+
return point + float(np.quantile(null_samples, a)), \
|
|
824
|
+
point + float(np.quantile(null_samples, 1.0 - a))
|
|
825
|
+
return float("nan"), float("nan")
|
|
826
|
+
|
|
827
|
+
# --- engine: per-method null att-samples (+ donor placebo paths if used) ---
|
|
828
|
+
if inference == "placebo":
|
|
829
|
+
treated_set = set(idx)
|
|
830
|
+
donors = [u for u in range(self.n) if u not in treated_set]
|
|
831
|
+
if len(donors) > int(max_placebo):
|
|
832
|
+
rng = np.random.default_rng(int(seed))
|
|
833
|
+
donors = sorted(int(j) for j in
|
|
834
|
+
rng.choice(donors, int(max_placebo), replace=False))
|
|
835
|
+
pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
|
|
836
|
+
for j in donors:
|
|
837
|
+
for m in methods:
|
|
838
|
+
fj = _fit(m, [j])
|
|
839
|
+
pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
|
|
840
|
+
|
|
841
|
+
def _kept_att(samples, treated_pre_m):
|
|
842
|
+
keep = [p.mean() for (p, pre) in samples
|
|
843
|
+
if treated_pre_m <= 0 or pre <= 2.0 * treated_pre_m]
|
|
844
|
+
if len(keep) < 5 and samples:
|
|
845
|
+
keep = [p.mean() for (p, _) in samples]
|
|
846
|
+
return np.array(keep)
|
|
847
|
+
null_att = {m: _kept_att(pb[m], per[m]["pre_rmspe"]) for m in order}
|
|
848
|
+
else: # bootstrap of pre-period residuals
|
|
849
|
+
null_att = {}
|
|
850
|
+
for m in order:
|
|
851
|
+
pre_resid = treated_series[:t0] - per[m]["full_cf"][:t0]
|
|
852
|
+
Bm = _block_bootstrap_paths(pre_resid, post_len, block_len, n_boot, seed)
|
|
853
|
+
null_att[m] = Bm.mean(axis=1) if Bm.size else np.array([])
|
|
854
|
+
|
|
855
|
+
# --- ensemble weights (auto = inverse null-att variance per method) ---
|
|
856
|
+
def _null_sd(m):
|
|
857
|
+
v = null_att[m]
|
|
858
|
+
return float(np.std(v)) if len(v) > 1 else 1.0
|
|
802
859
|
if isinstance(weights, str) and weights.lower() == "equal":
|
|
803
860
|
wv = [1.0 / len(order)] * len(order)
|
|
804
861
|
elif isinstance(weights, str) and weights.lower() == "auto":
|
|
805
|
-
|
|
806
|
-
prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
|
|
862
|
+
prec = [1.0 / max(_null_sd(m) ** 2, 1e-300) for m in order]
|
|
807
863
|
s = sum(prec)
|
|
808
864
|
wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
|
|
809
865
|
elif isinstance(weights, dict):
|
|
810
|
-
|
|
866
|
+
norm = {str(k).upper(): v for k, v in weights.items()} # case-insensitive
|
|
867
|
+
raw = [float(norm.get(m, 0.0)) for m in order]
|
|
811
868
|
s = sum(raw)
|
|
812
869
|
if s <= 0:
|
|
813
870
|
raise ValueError("ensemble weights must sum to > 0")
|
|
@@ -819,42 +876,41 @@ class GeoDesign:
|
|
|
819
876
|
s = sum(raw)
|
|
820
877
|
wv = [r / s for r in raw]
|
|
821
878
|
wmap = dict(zip(order, wv))
|
|
822
|
-
a = (1.0 - float(level)) / 2.0
|
|
823
879
|
|
|
824
|
-
|
|
825
|
-
"""Pivot CI: point estimate ± the placebo null spread (null ≈ 0)."""
|
|
826
|
-
if len(null_samples) >= 2:
|
|
827
|
-
return point + float(np.quantile(null_samples, a)), \
|
|
828
|
-
point + float(np.quantile(null_samples, 1.0 - a))
|
|
829
|
-
return point, point
|
|
830
|
-
|
|
831
|
-
# --- per-method point CIs from each method's placebo att spread ---
|
|
880
|
+
# --- per-method point CIs from each method's null att spread ---
|
|
832
881
|
for m in order:
|
|
833
|
-
|
|
834
|
-
lo, hi = _ci(per[m]["att"], mp)
|
|
882
|
+
lo, hi = _ci(per[m]["att"], null_att[m])
|
|
835
883
|
cfm = per[m]["cf_mean"]
|
|
836
884
|
per[m]["att_lo"], per[m]["att_hi"] = lo, hi
|
|
837
885
|
per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
|
|
838
886
|
per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
|
|
839
887
|
|
|
840
|
-
# --- ensemble estimate
|
|
888
|
+
# --- ensemble estimate ---
|
|
841
889
|
ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
|
|
842
890
|
ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
|
|
843
891
|
ens_att = float(ens_path.mean())
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
892
|
+
ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
|
|
893
|
+
|
|
894
|
+
# --- ensemble null-path matrix (engine-specific) ---
|
|
895
|
+
if inference == "placebo":
|
|
896
|
+
treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
|
|
897
|
+
ens_pb = []
|
|
898
|
+
for di in range(len(donors)):
|
|
899
|
+
path = sum(wmap[m] * pb[m][di][0] for m in order)
|
|
900
|
+
pre = sum(wmap[m] * pb[m][di][1] for m in order)
|
|
901
|
+
ens_pb.append((path, pre))
|
|
902
|
+
kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
|
|
903
|
+
if len(kept) < 5: # too few comparable placebos → use all
|
|
904
|
+
kept = [p for (p, _) in ens_pb]
|
|
905
|
+
pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
|
|
906
|
+
label = "in-space placebo"
|
|
907
|
+
else:
|
|
908
|
+
ens_pre = treated_series[:t0] - ens_full_cf[:t0]
|
|
909
|
+
pb_mat = _block_bootstrap_paths(ens_pre, post_len, block_len, n_boot, seed)
|
|
910
|
+
label = "block bootstrap"
|
|
855
911
|
n_pb = pb_mat.shape[0]
|
|
856
912
|
|
|
857
|
-
# pointwise
|
|
913
|
+
# --- shared: pointwise / cumulative / mean CIs + p-value from the null ---
|
|
858
914
|
if n_pb >= 2:
|
|
859
915
|
point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
|
|
860
916
|
point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
|
|
@@ -866,10 +922,12 @@ class GeoDesign:
|
|
|
866
922
|
pb_att = pb_mat.mean(axis=1)
|
|
867
923
|
p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
|
|
868
924
|
else:
|
|
869
|
-
|
|
870
|
-
point_hw = 0.0
|
|
925
|
+
# too few comparable placebos → inference undefined (no fake band)
|
|
871
926
|
run = np.cumsum(ens_path)
|
|
872
|
-
|
|
927
|
+
point_lo = np.full(post_len, np.nan)
|
|
928
|
+
point_hi = np.full(post_len, np.nan)
|
|
929
|
+
point_hw = 0.0
|
|
930
|
+
cum_lo_band = cum_hi_band = np.full(post_len, np.nan)
|
|
873
931
|
pb_att = np.array([])
|
|
874
932
|
p_value = None
|
|
875
933
|
att_lo, att_hi = _ci(ens_att, pb_att)
|
|
@@ -882,11 +940,14 @@ class GeoDesign:
|
|
|
882
940
|
"lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
883
941
|
"lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
884
942
|
"cumulative": float(ens_path.sum()) * n_treated,
|
|
885
|
-
"weights": wmap, "n_placebo": n_pb,
|
|
943
|
+
"weights": wmap, "n_placebo": n_pb, "inference": label,
|
|
944
|
+
# placebo with too few donors is undefined/low-power; bootstrap is
|
|
945
|
+
# serial-correlation-aware but optimistic (in-sample noise only).
|
|
946
|
+
"low_power": (inference == "placebo" and n_pb < 8),
|
|
947
|
+
"optimistic": (inference == "bootstrap"),
|
|
886
948
|
}
|
|
887
949
|
|
|
888
950
|
# full-timeline counterfactual + gap path (pre shows fit; post = effect)
|
|
889
|
-
ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
|
|
890
951
|
full_gap = treated_series - ens_full_cf
|
|
891
952
|
full_gap[t0:] = ens_path
|
|
892
953
|
counterfactual = treated_series - full_gap
|
|
@@ -1000,11 +1061,14 @@ class _MultiCellReport:
|
|
|
1000
1061
|
f"({', '.join(map(str, self.cells))})")
|
|
1001
1062
|
lines.append(f"Test duration : {self.test_len} periods")
|
|
1002
1063
|
lines.append(f"Shared donor pool : {len(self.donor_names)} markets")
|
|
1003
|
-
lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume"
|
|
1064
|
+
lines.append(f"Combined holdout : {100*self.pooled_holdout:.1f}% of total volume "
|
|
1065
|
+
f"(all cells together)")
|
|
1004
1066
|
lines.append(f"Powered at {int(100*self.target_power)}% power, "
|
|
1005
1067
|
f"{int(100*(1-self.alpha))}% confidence "
|
|
1006
1068
|
f"(each cell vs. the shared pool).")
|
|
1007
1069
|
lines.append("")
|
|
1070
|
+
# Per-cell 'Holdout' is that cell's share of its OWN sub-panel (cell +
|
|
1071
|
+
# shared donors); the Combined holdout above is over the full panel.
|
|
1008
1072
|
lines.append(f"{'Cell':<14}{'Markets':<28}{'MDE':>8}{'Conf':>7}{'Holdout':>9}")
|
|
1009
1073
|
lines.append("-" * 64)
|
|
1010
1074
|
for label, rep in self.cells.items():
|
|
@@ -1069,8 +1133,11 @@ class _EvalReport:
|
|
|
1069
1133
|
|
|
1070
1134
|
@property
|
|
1071
1135
|
def significant(self):
|
|
1072
|
-
"""True if the ensemble CI excludes zero
|
|
1136
|
+
"""True if the ensemble CI is well-defined and excludes zero. Returns
|
|
1137
|
+
False when inference is undefined (too few placebos → NaN interval)."""
|
|
1073
1138
|
lo, hi = self.ensemble["att_lo"], self.ensemble["att_hi"]
|
|
1139
|
+
if not (np.isfinite(lo) and np.isfinite(hi)):
|
|
1140
|
+
return False
|
|
1074
1141
|
return (lo > 0) or (hi < 0)
|
|
1075
1142
|
|
|
1076
1143
|
def summary(self) -> str:
|
|
@@ -1091,18 +1158,30 @@ class _EvalReport:
|
|
|
1091
1158
|
wstr = ", ".join(f"{m} {100*w:.0f}%" for m, w in e["weights"].items())
|
|
1092
1159
|
lines.append(f" ensemble weights: {wstr}")
|
|
1093
1160
|
lines.append("")
|
|
1161
|
+
engine = e.get("inference", "in-space placebo")
|
|
1162
|
+
unit = "draws" if engine == "block bootstrap" else "donors"
|
|
1094
1163
|
if self.p_value is not None:
|
|
1095
|
-
lines.append(f"
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1164
|
+
lines.append(f"Placebo/bootstrap p-value : {self.p_value:.3f} "
|
|
1165
|
+
f"({engine}, {e.get('n_placebo', 0)} {unit})")
|
|
1166
|
+
if e.get("low_power"):
|
|
1167
|
+
lines.append("⚠ Few comparable donors — inference is low-powered; treat "
|
|
1168
|
+
"intervals/p-value with caution.")
|
|
1169
|
+
if e.get("optimistic"):
|
|
1170
|
+
lines.append("⚠ Bootstrap CIs see in-sample noise only (optimistic) — use "
|
|
1171
|
+
"inference='placebo' for significance when donors allow.")
|
|
1172
|
+
if self.significant:
|
|
1173
|
+
verdict = "✓ Significant lift — the ensemble interval excludes zero."
|
|
1174
|
+
elif not (np.isfinite(e["att_lo"]) and np.isfinite(e["att_hi"])):
|
|
1175
|
+
verdict = ("? Inference undefined — too few comparable donor placebos "
|
|
1176
|
+
"to form an interval.")
|
|
1177
|
+
else:
|
|
1178
|
+
verdict = ("~ Not distinguishable from zero at this level — the ensemble "
|
|
1179
|
+
"interval includes zero.")
|
|
1100
1180
|
lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
|
|
1101
1181
|
f"{e['cumulative']:,.0f} cumulative incremental")
|
|
1102
1182
|
if "cum_lo" in e:
|
|
1103
1183
|
lines.append(f"Cumulative {cl}% CI : "
|
|
1104
|
-
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
|
|
1105
|
-
f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
|
|
1184
|
+
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] ({engine})")
|
|
1106
1185
|
lines.append(verdict)
|
|
1107
1186
|
lines.append("=" * 66)
|
|
1108
1187
|
return "\n".join(lines)
|
|
@@ -1588,7 +1667,7 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1588
1667
|
axc.set_title("Lift by method", fontweight="bold")
|
|
1589
1668
|
axc.grid(True, axis="x", alpha=0.25)
|
|
1590
1669
|
|
|
1591
|
-
pv = f" ·
|
|
1670
|
+
pv = f" · placebo p={rep.p_value:.3f}" if rep.p_value is not None else ""
|
|
1592
1671
|
verdict = "significant" if rep.significant else "not significant"
|
|
1593
1672
|
fig.suptitle(f"panelkit · test evaluation — ensemble lift "
|
|
1594
1673
|
f"{100*rep.ensemble['lift']:+.2f}% ({verdict}){pv}",
|
|
@@ -1651,7 +1730,7 @@ def _plot_eval_timeline(rep: "_EvalReport", path):
|
|
|
1651
1730
|
cum = e["cum_curve"]
|
|
1652
1731
|
axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
|
|
1653
1732
|
axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
|
|
1654
|
-
alpha=0.15, label=f"{cl}% band (in-space placebo)")
|
|
1733
|
+
alpha=0.15, label=f"{cl}% band ({e.get('inference', 'in-space placebo')})")
|
|
1655
1734
|
axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
|
|
1656
1735
|
axc.axhline(0, color="#111827", lw=1.0)
|
|
1657
1736
|
axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|