panelkit 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelkit-0.2.2 → panelkit-0.2.3}/Cargo.lock +5 -5
- {panelkit-0.2.2 → panelkit-0.2.3}/Cargo.toml +1 -1
- {panelkit-0.2.2 → panelkit-0.2.3}/GUIDE.md +27 -2
- {panelkit-0.2.2 → panelkit-0.2.3}/PKG-INFO +18 -1
- {panelkit-0.2.2 → panelkit-0.2.3}/README.md +17 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/selection.rs +68 -20
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/tests/geo.rs +10 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/api_geo.rs +3 -1
- {panelkit-0.2.2 → panelkit-0.2.3}/pyproject.toml +1 -1
- {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/_panelkit.pyi +1 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/design.py +269 -18
- {panelkit-0.2.2 → panelkit-0.2.3}/BENCHMARKS.md +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/LICENSE-APACHE +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/LICENSE-MIT +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/benches/estimators.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/bacon.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/callaway.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/sunab.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/twfe.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/fe/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/fe/within.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/mcnnm/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/panel.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/result.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/augmented.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/cpasc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/sdid.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/synthetic.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/cpasc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/did.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/sc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/sc_family.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/diagnostics.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/power.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/types.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/batch.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/bootstrap.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/ci.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/parallel.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/placebo.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/tests/inference.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/error.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/cholesky.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/eig_sym.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/qr.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/randomized.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/svd.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/svd_gram.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/matrix.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/matmul.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/norms.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/transform.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/opt/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/opt/simplex.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/opt/softthresh.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/rng.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/solve/lstsq.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/solve/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/solve/spd.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/tests/numerics.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/api_did.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/api_sc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/convert.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/results.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/__init__.py +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/estimators.py +0 -0
- {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/py.typed +0 -0
|
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
|
|
|
462
462
|
|
|
463
463
|
[[package]]
|
|
464
464
|
name = "panelkit-estimators"
|
|
465
|
-
version = "0.2.
|
|
465
|
+
version = "0.2.3"
|
|
466
466
|
dependencies = [
|
|
467
467
|
"criterion",
|
|
468
468
|
"panelkit-linalg",
|
|
@@ -471,7 +471,7 @@ dependencies = [
|
|
|
471
471
|
|
|
472
472
|
[[package]]
|
|
473
473
|
name = "panelkit-geo"
|
|
474
|
-
version = "0.2.
|
|
474
|
+
version = "0.2.3"
|
|
475
475
|
dependencies = [
|
|
476
476
|
"panelkit-estimators",
|
|
477
477
|
"panelkit-inference",
|
|
@@ -482,7 +482,7 @@ dependencies = [
|
|
|
482
482
|
|
|
483
483
|
[[package]]
|
|
484
484
|
name = "panelkit-inference"
|
|
485
|
-
version = "0.2.
|
|
485
|
+
version = "0.2.3"
|
|
486
486
|
dependencies = [
|
|
487
487
|
"panelkit-estimators",
|
|
488
488
|
"panelkit-linalg",
|
|
@@ -491,7 +491,7 @@ dependencies = [
|
|
|
491
491
|
|
|
492
492
|
[[package]]
|
|
493
493
|
name = "panelkit-linalg"
|
|
494
|
-
version = "0.2.
|
|
494
|
+
version = "0.2.3"
|
|
495
495
|
dependencies = [
|
|
496
496
|
"proptest",
|
|
497
497
|
"rayon",
|
|
@@ -623,7 +623,7 @@ dependencies = [
|
|
|
623
623
|
|
|
624
624
|
[[package]]
|
|
625
625
|
name = "pypanelkit"
|
|
626
|
-
version = "0.2.
|
|
626
|
+
version = "0.2.3"
|
|
627
627
|
dependencies = [
|
|
628
628
|
"numpy",
|
|
629
629
|
"panelkit-estimators",
|
|
@@ -294,8 +294,9 @@ weighted-average **ensemble** estimate.
|
|
|
294
294
|
|
|
295
295
|
```python
|
|
296
296
|
ev = design.evaluate(treated=["chicago", "denver"], treat_start=52, level=0.90)
|
|
297
|
-
print(ev.summary())
|
|
298
|
-
ev.plot("evaluate.png")
|
|
297
|
+
print(ev.summary()) # per-method + ensemble lift, CI, cumulative
|
|
298
|
+
ev.plot("evaluate.png") # observed-vs-cf, effect path (CI band), lift bar
|
|
299
|
+
ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
|
|
299
300
|
ev.lift, ev.cumulative, ev.significant
|
|
300
301
|
```
|
|
301
302
|
|
|
@@ -308,6 +309,20 @@ and the ensemble in `ev.ensemble`. Reported numbers: **% lift** (effect ÷
|
|
|
308
309
|
counterfactual), **per-period ATT**, and **cumulative incremental** over the
|
|
309
310
|
window (summed across treated markets).
|
|
310
311
|
|
|
312
|
+
**Effect over time** (`ev.plot_effect_over_time(...)`) gives the event-study view:
|
|
313
|
+
the **pointwise** effect across the full timeline — *including the pre-period*, so
|
|
314
|
+
you can see it sits flat (centered on zero) inside the noise band before the test
|
|
315
|
+
starts (a placebo check) and breaks out after — and the running **cumulative
|
|
316
|
+
incremental**, each as a point estimate with a confidence band. The counterfactual
|
|
317
|
+
is centered on the pre-period, so the gap shows fit quality rather than a level
|
|
318
|
+
offset (SDID matches trends, not levels). The bands come from a **moving-block
|
|
319
|
+
bootstrap** of the pre-period residuals: resampling whole blocks preserves their
|
|
320
|
+
autocorrelation, so the intervals are more conservative than an iid normal
|
|
321
|
+
approximation — the cumulative band in particular widens faster than √k when the
|
|
322
|
+
residuals are positively autocorrelated. Raise `block_len` to capture longer-range
|
|
323
|
+
dependence (wider, more conservative cumulative bands). Pass `exclude=[…]` to drop
|
|
324
|
+
markets from the control pool (e.g. ones you don't trust as donors).
|
|
325
|
+
|
|
311
326
|
### Choosing a specification — `design.recommend(test_lengths, n_geos_options, target_lift, alphas=…)`
|
|
312
327
|
|
|
313
328
|
Sweeps designs across **test length × number of geos × alpha** and recommends the
|
|
@@ -345,6 +360,16 @@ Searches candidate treatment-market sets and ranks them by power, MDE, pre-fit,
|
|
|
345
360
|
holdout, and confidence. Pass `eligible=[…]` to restrict to markets you can
|
|
346
361
|
actually run in.
|
|
347
362
|
|
|
363
|
+
Two real-world controls for *which* markets the search may use:
|
|
364
|
+
|
|
365
|
+
- **`include=[…]`** — force specific markets into **every** candidate treatment
|
|
366
|
+
set (must-treat markets, e.g. a flagship region you've already committed to).
|
|
367
|
+
The search fills the remaining slots from `eligible`, up to `max_treated`.
|
|
368
|
+
- **`exclude=[…]`** — drop markets **entirely**: they're never treated *and*
|
|
369
|
+
never used as a donor/control (e.g. a market with contaminated data or its own
|
|
370
|
+
concurrent campaign). `exclude` is also accepted by `power()`, `diagnose()`,
|
|
371
|
+
`evaluate()`, and `recommend()` to keep a market out of the control pool.
|
|
372
|
+
|
|
348
373
|
### Multi-cell tests — `design.multi_cell(cells, test_len, …)`
|
|
349
374
|
|
|
350
375
|
Often you run several treatment cells at once — different creatives, budgets, or
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: panelkit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Topic :: Scientific/Engineering
|
|
@@ -231,10 +231,15 @@ mc = design.multi_cell(cells={"west": ["los_angeles", "san_diego"],
|
|
|
231
231
|
print(mc.summary()) # per-cell MDE / confidence / holdout
|
|
232
232
|
mc.plot("multicell.png") # the multi-cell figure below
|
|
233
233
|
|
|
234
|
+
# pin in must-have markets, drop ones you don't trust:
|
|
235
|
+
ranked = design.select_markets(test_len=8, target_lift=0.05, max_treated=3,
|
|
236
|
+
include=["chicago"], exclude=["miami"])
|
|
237
|
+
|
|
234
238
|
# already ran the test? measure it (SC/ASC/SDID + a weighted-average ensemble):
|
|
235
239
|
ev = design.evaluate(treated=["chicago", "denver"], treat_start=52)
|
|
236
240
|
print(ev.summary()) # per-method + ensemble lift, CI, cumulative
|
|
237
241
|
ev.plot("evaluate.png") # observed vs counterfactual + lift-by-method
|
|
242
|
+
ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
|
|
238
243
|
|
|
239
244
|
# or sweep specifications (length × #geos × significance) and recommend one:
|
|
240
245
|
grid = design.recommend(test_lengths=[4, 6, 8, 12], n_geos_options=[3, 5, 10, 20],
|
|
@@ -273,6 +278,18 @@ with an SC in-space placebo p-value:
|
|
|
273
278
|
|
|
274
279
|

|
|
275
280
|
|
|
281
|
+
And the **effect over time** — the pointwise effect across the full timeline
|
|
282
|
+
(pre-period included, so you can see it sit flat in the noise band before the test
|
|
283
|
+
and break out after) plus the running cumulative incremental, each as a point
|
|
284
|
+
estimate with a confidence band:
|
|
285
|
+
|
|
286
|
+

|
|
287
|
+
|
|
288
|
+
**Pin in / drop markets.** `select_markets`/`recommend` take `include=[…]`
|
|
289
|
+
(force must-treat markets into every candidate) and `exclude=[…]` (drop markets
|
|
290
|
+
entirely — never treated, never a control). `exclude` is also accepted by
|
|
291
|
+
`power`, `diagnose`, and `evaluate` to keep a market out of the donor pool.
|
|
292
|
+
|
|
276
293
|
**Messy DataFrame? No problem.** `from_long` coerces real-world data: outcome
|
|
277
294
|
strings → numeric (with a clear error on genuinely non-numeric values), dates
|
|
278
295
|
(string or unsorted) → chronological columns, locations → market names, duplicate
|
|
@@ -201,10 +201,15 @@ mc = design.multi_cell(cells={"west": ["los_angeles", "san_diego"],
|
|
|
201
201
|
print(mc.summary()) # per-cell MDE / confidence / holdout
|
|
202
202
|
mc.plot("multicell.png") # the multi-cell figure below
|
|
203
203
|
|
|
204
|
+
# pin in must-have markets, drop ones you don't trust:
|
|
205
|
+
ranked = design.select_markets(test_len=8, target_lift=0.05, max_treated=3,
|
|
206
|
+
include=["chicago"], exclude=["miami"])
|
|
207
|
+
|
|
204
208
|
# already ran the test? measure it (SC/ASC/SDID + a weighted-average ensemble):
|
|
205
209
|
ev = design.evaluate(treated=["chicago", "denver"], treat_start=52)
|
|
206
210
|
print(ev.summary()) # per-method + ensemble lift, CI, cumulative
|
|
207
211
|
ev.plot("evaluate.png") # observed vs counterfactual + lift-by-method
|
|
212
|
+
ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
|
|
208
213
|
|
|
209
214
|
# or sweep specifications (length × #geos × significance) and recommend one:
|
|
210
215
|
grid = design.recommend(test_lengths=[4, 6, 8, 12], n_geos_options=[3, 5, 10, 20],
|
|
@@ -243,6 +248,18 @@ with an SC in-space placebo p-value:
|
|
|
243
248
|
|
|
244
249
|

|
|
245
250
|
|
|
251
|
+
And the **effect over time** — the pointwise effect across the full timeline
|
|
252
|
+
(pre-period included, so you can see it sit flat in the noise band before the test
|
|
253
|
+
and break out after) plus the running cumulative incremental, each as a point
|
|
254
|
+
estimate with a confidence band:
|
|
255
|
+
|
|
256
|
+

|
|
257
|
+
|
|
258
|
+
**Pin in / drop markets.** `select_markets`/`recommend` take `include=[…]`
|
|
259
|
+
(force must-treat markets into every candidate) and `exclude=[…]` (drop markets
|
|
260
|
+
entirely — never treated, never a control). `exclude` is also accepted by
|
|
261
|
+
`power`, `diagnose`, and `evaluate` to keep a market out of the donor pool.
|
|
262
|
+
|
|
246
263
|
**Messy DataFrame? No problem.** `from_long` coerces real-world data: outcome
|
|
247
264
|
strings → numeric (with a clear error on genuinely non-numeric values), dates
|
|
248
265
|
(string or unsorted) → chronological columns, locations → market names, duplicate
|
|
@@ -34,7 +34,10 @@ pub struct MarketCandidate {
|
|
|
34
34
|
pub struct SelectConfig {
|
|
35
35
|
/// Units eligible to be treated (e.g. markets you could actually run in).
|
|
36
36
|
pub eligible: Vec<usize>,
|
|
37
|
-
///
|
|
37
|
+
/// Units **forced into every** candidate treatment set (must-treat markets).
|
|
38
|
+
/// The search fills the remaining slots from `eligible`. Empty = no forcing.
|
|
39
|
+
pub include: Vec<usize>,
|
|
40
|
+
/// Maximum number of treated markets in a candidate set (counts `include`).
|
|
38
41
|
pub max_treated: usize,
|
|
39
42
|
pub test_len: usize,
|
|
40
43
|
/// The lift you care about detecting (fraction, e.g. 0.05 = 5%).
|
|
@@ -95,46 +98,91 @@ pub fn evaluate(y: &Mat, treated: &[usize], cfg: &SelectConfig) -> MarketCandida
|
|
|
95
98
|
}
|
|
96
99
|
}
|
|
97
100
|
|
|
98
|
-
/// Build the candidate list.
|
|
99
|
-
///
|
|
100
|
-
///
|
|
101
|
+
/// Build the candidate list. Every candidate always contains the forced
|
|
102
|
+
/// `include` markets; the remaining slots are drawn from `eligible` (minus the
|
|
103
|
+
/// forced ones). With `exact_size = Some(k)`, every candidate has exactly `k`
|
|
104
|
+
/// markets total; otherwise it's the forced set plus each single extra market
|
|
105
|
+
/// plus sampled larger subsets up to `max_treated`.
|
|
101
106
|
fn candidate_sets(cfg: &SelectConfig) -> Vec<Vec<usize>> {
|
|
102
107
|
let mut rng = Xoshiro256pp::seed_from_u64(cfg.seed);
|
|
103
|
-
let mut seen = std::collections::HashSet::new();
|
|
108
|
+
let mut seen: std::collections::HashSet<Vec<usize>> = std::collections::HashSet::new();
|
|
104
109
|
let mut sets: Vec<Vec<usize>> = Vec::new();
|
|
105
110
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
111
|
+
// Forced (must-treat) markets, de-duplicated, and the pool of extra picks.
|
|
112
|
+
let mut forced: Vec<usize> = cfg.include.clone();
|
|
113
|
+
forced.sort_unstable();
|
|
114
|
+
forced.dedup();
|
|
115
|
+
let forced_set: std::collections::HashSet<usize> = forced.iter().copied().collect();
|
|
116
|
+
let extra_pool: Vec<usize> = cfg
|
|
117
|
+
.eligible
|
|
118
|
+
.iter()
|
|
119
|
+
.copied()
|
|
120
|
+
.filter(|u| !forced_set.contains(u))
|
|
121
|
+
.collect();
|
|
122
|
+
|
|
123
|
+
if let Some(k0) = cfg.exact_size {
|
|
124
|
+
let k = k0.max(1);
|
|
125
|
+
let need = k.saturating_sub(forced.len());
|
|
126
|
+
if need == 0 {
|
|
127
|
+
// The forced set already fills the requested size.
|
|
128
|
+
if !forced.is_empty() {
|
|
129
|
+
sets.push(forced.clone());
|
|
130
|
+
}
|
|
131
|
+
return sets;
|
|
132
|
+
}
|
|
133
|
+
if need == 1 {
|
|
134
|
+
// Deterministic: forced + each eligible single (preserves the old
|
|
135
|
+
// "all singletons" behavior when nothing is forced and k == 1).
|
|
136
|
+
for &u in &extra_pool {
|
|
137
|
+
let mut pick = forced.clone();
|
|
138
|
+
pick.push(u);
|
|
139
|
+
pick.sort_unstable();
|
|
140
|
+
if seen.insert(pick.clone()) {
|
|
141
|
+
sets.push(pick);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return sets;
|
|
110
145
|
}
|
|
111
146
|
let mut attempts = 0;
|
|
112
147
|
while sets.len() < cfg.n_candidates && attempts < cfg.n_candidates * 40 {
|
|
113
148
|
attempts += 1;
|
|
114
|
-
let mut pool =
|
|
149
|
+
let mut pool = extra_pool.clone();
|
|
115
150
|
rng.shuffle(&mut pool);
|
|
116
|
-
let mut pick: Vec<usize> =
|
|
151
|
+
let mut pick: Vec<usize> = forced.clone();
|
|
152
|
+
pick.extend(pool.into_iter().take(need));
|
|
117
153
|
pick.sort_unstable();
|
|
118
|
-
if seen.insert(pick.clone()) {
|
|
154
|
+
if pick.len() == k && seen.insert(pick.clone()) {
|
|
119
155
|
sets.push(pick);
|
|
120
156
|
}
|
|
121
157
|
}
|
|
122
158
|
return sets;
|
|
123
159
|
}
|
|
124
160
|
|
|
125
|
-
// Mixed-size search
|
|
126
|
-
|
|
127
|
-
if
|
|
128
|
-
|
|
129
|
-
|
|
161
|
+
// Mixed-size search. Extra slots available on top of the forced set.
|
|
162
|
+
let budget = cfg.max_treated.saturating_sub(forced.len());
|
|
163
|
+
if !forced.is_empty() {
|
|
164
|
+
seen.insert(forced.clone());
|
|
165
|
+
sets.push(forced.clone());
|
|
166
|
+
}
|
|
167
|
+
if budget >= 1 {
|
|
168
|
+
for &u in &extra_pool {
|
|
169
|
+
let mut pick = forced.clone();
|
|
170
|
+
pick.push(u);
|
|
171
|
+
pick.sort_unstable();
|
|
172
|
+
if seen.insert(pick.clone()) {
|
|
173
|
+
sets.push(pick);
|
|
174
|
+
}
|
|
130
175
|
}
|
|
176
|
+
}
|
|
177
|
+
if budget >= 2 && extra_pool.len() >= 2 {
|
|
131
178
|
let mut attempts = 0;
|
|
132
179
|
while sets.len() < cfg.n_candidates && attempts < cfg.n_candidates * 20 {
|
|
133
180
|
attempts += 1;
|
|
134
|
-
let
|
|
135
|
-
let mut pool =
|
|
181
|
+
let extra = 2 + rng.gen_range(budget - 1); // 2..=budget extra markets
|
|
182
|
+
let mut pool = extra_pool.clone();
|
|
136
183
|
rng.shuffle(&mut pool);
|
|
137
|
-
let mut pick: Vec<usize> =
|
|
184
|
+
let mut pick: Vec<usize> = forced.clone();
|
|
185
|
+
pick.extend(pool.into_iter().take(extra));
|
|
138
186
|
pick.sort_unstable();
|
|
139
187
|
if seen.insert(pick.clone()) {
|
|
140
188
|
sets.push(pick);
|
|
@@ -114,6 +114,7 @@ fn market_selection_ranks_candidates() {
|
|
|
114
114
|
let y = geo_panel(12, 60, 5);
|
|
115
115
|
let cfg = SelectConfig {
|
|
116
116
|
eligible: (0..12).collect(),
|
|
117
|
+
include: vec![],
|
|
117
118
|
max_treated: 3,
|
|
118
119
|
test_len: 10,
|
|
119
120
|
target_lift: 0.10,
|
|
@@ -139,6 +140,15 @@ fn market_selection_ranks_candidates() {
|
|
|
139
140
|
};
|
|
140
141
|
let ranked2 = select_markets(&y, &cfg2);
|
|
141
142
|
assert!(ranked2.iter().all(|c| c.treated.len() == 2));
|
|
143
|
+
// include: market 5 is forced into every candidate set.
|
|
144
|
+
let cfg3 = SelectConfig {
|
|
145
|
+
include: vec![5],
|
|
146
|
+
..cfg.clone()
|
|
147
|
+
};
|
|
148
|
+
let ranked3 = select_markets(&y, &cfg3);
|
|
149
|
+
assert!(!ranked3.is_empty());
|
|
150
|
+
assert!(ranked3.iter().all(|c| c.treated.contains(&5)));
|
|
151
|
+
assert!(ranked3.iter().all(|c| c.treated.len() <= 3));
|
|
142
152
|
// Every candidate has a valid holdout and confidence.
|
|
143
153
|
for c in &ranked {
|
|
144
154
|
assert!(c.holdout_pct > 0.0 && c.holdout_pct < 1.0);
|
|
@@ -169,7 +169,7 @@ pub fn geo_diagnostics(
|
|
|
169
169
|
|
|
170
170
|
/// Search and rank candidate treatment-market sets.
|
|
171
171
|
#[pyfunction]
|
|
172
|
-
#[pyo3(signature = (y, eligible, max_treated, test_len, target_lift, method="sdid", alpha=0.1, target_power=0.8, min_pre=0, n_candidates=200, seed=0, exact_size=None, lookback=None))]
|
|
172
|
+
#[pyo3(signature = (y, eligible, max_treated, test_len, target_lift, method="sdid", alpha=0.1, target_power=0.8, min_pre=0, n_candidates=200, seed=0, exact_size=None, lookback=None, include=None))]
|
|
173
173
|
#[allow(clippy::too_many_arguments)]
|
|
174
174
|
pub fn geo_select(
|
|
175
175
|
py: Python<'_>,
|
|
@@ -186,6 +186,7 @@ pub fn geo_select(
|
|
|
186
186
|
seed: u64,
|
|
187
187
|
exact_size: Option<usize>,
|
|
188
188
|
lookback: Option<usize>,
|
|
189
|
+
include: Option<Vec<usize>>,
|
|
189
190
|
) -> PyResult<Vec<PyMarketCandidate>> {
|
|
190
191
|
let m = parse_method(method)?;
|
|
191
192
|
let mat = mat_from_numpy(&y);
|
|
@@ -196,6 +197,7 @@ pub fn geo_select(
|
|
|
196
197
|
};
|
|
197
198
|
let cfg = SelectConfig {
|
|
198
199
|
eligible,
|
|
200
|
+
include: include.unwrap_or_default(),
|
|
199
201
|
max_treated,
|
|
200
202
|
test_len,
|
|
201
203
|
target_lift,
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "panelkit"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.3"
|
|
8
8
|
description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -52,6 +52,26 @@ def _ensemble_weight_arg(spec):
|
|
|
52
52
|
return w
|
|
53
53
|
|
|
54
54
|
|
|
55
|
+
def _placebo_paths(pre_gaps, length, block_len, n_reps, seed):
|
|
56
|
+
"""Moving-block bootstrap of the (centered) pre-period residuals into placebo
|
|
57
|
+
paths of ``length`` periods. Resampling whole blocks preserves the residual
|
|
58
|
+
autocorrelation, so the resulting CI bands are more conservative than an iid
|
|
59
|
+
normal approximation. Returns an ``(n_reps, length)`` array (empty if no
|
|
60
|
+
pre-period or zero length)."""
|
|
61
|
+
g = np.asarray(pre_gaps, dtype=float)
|
|
62
|
+
m = len(g)
|
|
63
|
+
if m == 0 or length <= 0 or n_reps <= 0:
|
|
64
|
+
return np.empty((0, max(length, 0)))
|
|
65
|
+
g = g - g.mean() # null is "no effect" → center the residuals
|
|
66
|
+
rng = np.random.default_rng(int(seed))
|
|
67
|
+
bl = max(1, min(int(block_len), m))
|
|
68
|
+
n_blocks = int(np.ceil(length / bl))
|
|
69
|
+
starts = rng.integers(0, m, size=(n_reps, n_blocks))
|
|
70
|
+
idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
|
|
71
|
+
paths = g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
|
|
72
|
+
return paths
|
|
73
|
+
|
|
74
|
+
|
|
55
75
|
class _PowerReport:
|
|
56
76
|
"""Result of a power analysis across methods, with a report and plots."""
|
|
57
77
|
|
|
@@ -352,6 +372,22 @@ class GeoDesign:
|
|
|
352
372
|
out.append(self._index[m])
|
|
353
373
|
return out
|
|
354
374
|
|
|
375
|
+
def _names_of(self, markets) -> list:
|
|
376
|
+
"""Resolve markets (names or indices) to their string names."""
|
|
377
|
+
return [self.names[i] for i in self._resolve(markets)]
|
|
378
|
+
|
|
379
|
+
def _without(self, exclude):
|
|
380
|
+
"""Return ``(sub_design, excluded_name_set)`` with the excluded markets
|
|
381
|
+
dropped entirely (so they're neither treated nor used as controls). Names
|
|
382
|
+
are preserved, so callers can pass markets to the sub-design by name."""
|
|
383
|
+
ex = set(self._names_of(exclude)) if exclude else set()
|
|
384
|
+
if not ex:
|
|
385
|
+
return self, ex
|
|
386
|
+
keep = [i for i in range(self.n) if self.names[i] not in ex]
|
|
387
|
+
if not keep:
|
|
388
|
+
raise ValueError("exclude removes every market — nothing left to analyze")
|
|
389
|
+
return GeoDesign(self.Y[keep], names=[self.names[i] for i in keep]), ex
|
|
390
|
+
|
|
355
391
|
def power(
|
|
356
392
|
self,
|
|
357
393
|
treated,
|
|
@@ -364,6 +400,7 @@ class GeoDesign:
|
|
|
364
400
|
lookback: int | None = None,
|
|
365
401
|
ensemble: bool = True,
|
|
366
402
|
ensemble_weights="auto",
|
|
403
|
+
exclude=None,
|
|
367
404
|
) -> _PowerReport:
|
|
368
405
|
"""Power analysis for a specified treated-market set across methods.
|
|
369
406
|
|
|
@@ -376,7 +413,20 @@ class GeoDesign:
|
|
|
376
413
|
power reflects the averaged estimator, which is usually steadier than any
|
|
377
414
|
one method). ``ensemble_weights`` is ``"auto"`` (data-driven inverse-variance
|
|
378
415
|
weighting from each method's historical-null spread), ``"equal"``, or a dict
|
|
379
|
-
like ``{"SC": 0.5, "ASC": 0.2, "SDID": 0.3}``.
|
|
416
|
+
like ``{"SC": 0.5, "ASC": 0.2, "SDID": 0.3}``.
|
|
417
|
+
|
|
418
|
+
``exclude`` drops markets entirely (e.g. contaminated or untrustworthy
|
|
419
|
+
ones) so they're never used as donors/controls."""
|
|
420
|
+
if exclude:
|
|
421
|
+
sub, ex = self._without(exclude)
|
|
422
|
+
tnames = self._names_of(treated)
|
|
423
|
+
bad = [n for n in tnames if n in ex]
|
|
424
|
+
if bad:
|
|
425
|
+
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
426
|
+
return sub.power(tnames, test_len, lifts=lifts, methods=methods, alpha=alpha,
|
|
427
|
+
target_power=target_power, recommended=recommended,
|
|
428
|
+
lookback=lookback, ensemble=ensemble,
|
|
429
|
+
ensemble_weights=ensemble_weights)
|
|
380
430
|
idx = self._resolve(treated)
|
|
381
431
|
names = [self.names[i] for i in idx]
|
|
382
432
|
lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
|
|
@@ -398,13 +448,21 @@ class GeoDesign:
|
|
|
398
448
|
rec = recommended if recommended in results else list(results)[0]
|
|
399
449
|
return _PowerReport(self, idx, names, test_len, results, diag, rec, alpha, target_power)
|
|
400
450
|
|
|
401
|
-
def diagnose(self, treated, test_len: int) -> "_DiagnosticsReport":
|
|
451
|
+
def diagnose(self, treated, test_len: int, exclude=None) -> "_DiagnosticsReport":
|
|
402
452
|
"""Real-world guardrails for a treated-market set: pre-period fit,
|
|
403
453
|
seasonality, holdout, stability, and warnings — with a visual.
|
|
404
454
|
|
|
405
455
|
Returns a report with ``.summary()`` and ``.plot(path)`` (the guardrails
|
|
406
456
|
figure: treated-vs-synthetic pre-fit, seasonality ACF, holdout share, and
|
|
407
|
-
a scorecard listing any warnings).
|
|
457
|
+
a scorecard listing any warnings). ``exclude`` drops markets from the
|
|
458
|
+
control pool entirely."""
|
|
459
|
+
if exclude:
|
|
460
|
+
sub, ex = self._without(exclude)
|
|
461
|
+
tnames = self._names_of(treated)
|
|
462
|
+
bad = [n for n in tnames if n in ex]
|
|
463
|
+
if bad:
|
|
464
|
+
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
465
|
+
return sub.diagnose(tnames, test_len)
|
|
408
466
|
idx = self._resolve(treated)
|
|
409
467
|
names = [self.names[i] for i in idx]
|
|
410
468
|
t0 = self.t - int(test_len)
|
|
@@ -431,18 +489,46 @@ class GeoDesign:
|
|
|
431
489
|
top: int = 10,
|
|
432
490
|
exact_size: int | None = None,
|
|
433
491
|
lookback: int | None = None,
|
|
492
|
+
include=None,
|
|
493
|
+
exclude=None,
|
|
434
494
|
) -> list:
|
|
435
495
|
"""Search candidate treatment-market sets and return the top ranked.
|
|
436
496
|
|
|
437
497
|
``exact_size=k`` restricts the search to sets of exactly ``k`` markets
|
|
438
498
|
(otherwise sizes 1..``max_treated`` are considered). ``lookback=k`` powers
|
|
439
|
-
over the most-recent ``k`` historical windows.
|
|
499
|
+
over the most-recent ``k`` historical windows.
|
|
500
|
+
|
|
501
|
+
``include`` forces specific markets into **every** candidate treatment set
|
|
502
|
+
(must-treat markets); the search fills the remaining slots from
|
|
503
|
+
``eligible``. ``exclude`` drops markets entirely — they're never treated
|
|
504
|
+
and never used as controls."""
|
|
505
|
+
if exclude:
|
|
506
|
+
sub, ex = self._without(exclude)
|
|
507
|
+
elig_names = self._names_of(eligible) if eligible is not None else None
|
|
508
|
+
if elig_names is not None:
|
|
509
|
+
elig_names = [n for n in elig_names if n not in ex]
|
|
510
|
+
inc_names = self._names_of(include) if include else None
|
|
511
|
+
if inc_names is not None:
|
|
512
|
+
bad = [n for n in inc_names if n in ex]
|
|
513
|
+
if bad:
|
|
514
|
+
raise ValueError(f"markets in both include and exclude: {bad}")
|
|
515
|
+
return sub.select_markets(
|
|
516
|
+
test_len, target_lift, max_treated, eligible=elig_names, method=method,
|
|
517
|
+
alpha=alpha, target_power=target_power, n_candidates=n_candidates,
|
|
518
|
+
seed=seed, top=top, exact_size=exact_size, lookback=lookback,
|
|
519
|
+
include=inc_names, exclude=None)
|
|
520
|
+
|
|
440
521
|
elig = self._resolve(eligible) if eligible is not None else list(range(self.n))
|
|
522
|
+
inc = sorted(set(self._resolve(include))) if include else []
|
|
523
|
+
if len(inc) > int(max_treated):
|
|
524
|
+
raise ValueError(f"include has {len(inc)} markets but max_treated="
|
|
525
|
+
f"{max_treated}; raise max_treated or include fewer")
|
|
441
526
|
ranked = _panelkit.geo_select(
|
|
442
527
|
self.Y, elig, int(max_treated), int(test_len), float(target_lift),
|
|
443
528
|
method.lower(), alpha, target_power, 0, int(n_candidates), int(seed),
|
|
444
529
|
None if exact_size is None else int(exact_size),
|
|
445
530
|
None if lookback is None else int(lookback),
|
|
531
|
+
inc or None,
|
|
446
532
|
)
|
|
447
533
|
out = []
|
|
448
534
|
for c in ranked[:top]:
|
|
@@ -470,6 +556,8 @@ class GeoDesign:
|
|
|
470
556
|
seed: int = 0,
|
|
471
557
|
min_confidence: float = 60.0,
|
|
472
558
|
lookback: int | None = None,
|
|
559
|
+
include=None,
|
|
560
|
+
exclude=None,
|
|
473
561
|
) -> "_ScenarioGrid":
|
|
474
562
|
"""Sweep designs across **specifications** — test length × number of geos
|
|
475
563
|
× significance level (alpha) — and recommend the best.
|
|
@@ -477,7 +565,9 @@ class GeoDesign:
|
|
|
477
565
|
For each (alpha, test_len, n_geos) cell it searches for the best set of
|
|
478
566
|
exactly ``n_geos`` treatment markets and records its MDE, power, holdout,
|
|
479
567
|
and confidence. Returns a :class:`_ScenarioGrid` with a recommendation,
|
|
480
|
-
a plain-English summary, and a tradeoffs figure.
|
|
568
|
+
a plain-English summary, and a tradeoffs figure. ``include`` forces
|
|
569
|
+
must-treat markets into every candidate; ``exclude`` drops markets
|
|
570
|
+
entirely.
|
|
481
571
|
"""
|
|
482
572
|
rows = []
|
|
483
573
|
for alpha in alphas:
|
|
@@ -488,6 +578,7 @@ class GeoDesign:
|
|
|
488
578
|
eligible=eligible, method=method, alpha=alpha,
|
|
489
579
|
target_power=target_power, n_candidates=n_candidates,
|
|
490
580
|
seed=seed, top=1, exact_size=ng, lookback=lookback,
|
|
581
|
+
include=include, exclude=exclude,
|
|
491
582
|
)
|
|
492
583
|
best = ranked[0] if ranked else None
|
|
493
584
|
if best is None:
|
|
@@ -613,6 +704,7 @@ class GeoDesign:
|
|
|
613
704
|
n_boot: int = 2000,
|
|
614
705
|
block_len: int = 4,
|
|
615
706
|
seed: int = 0,
|
|
707
|
+
exclude=None,
|
|
616
708
|
) -> "_EvalReport":
|
|
617
709
|
"""Estimate the realized effect of a geo test that has **already run**.
|
|
618
710
|
|
|
@@ -643,8 +735,17 @@ class GeoDesign:
|
|
|
643
735
|
-------
|
|
644
736
|
_EvalReport
|
|
645
737
|
With ``.summary()``, ``.plot(path)``, per-method results, and the
|
|
646
|
-
ensemble point estimate / interval / lift.
|
|
738
|
+
ensemble point estimate / interval / lift. ``exclude`` drops markets
|
|
739
|
+
from the control pool entirely.
|
|
647
740
|
"""
|
|
741
|
+
if exclude:
|
|
742
|
+
sub, ex = self._without(exclude)
|
|
743
|
+
tnames = self._names_of(treated)
|
|
744
|
+
bad = [n for n in tnames if n in ex]
|
|
745
|
+
if bad:
|
|
746
|
+
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
747
|
+
return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
|
|
748
|
+
level=level, n_boot=n_boot, block_len=block_len, seed=seed)
|
|
648
749
|
idx = self._resolve(treated)
|
|
649
750
|
names = [self.names[i] for i in idx]
|
|
650
751
|
t0 = int(treat_start)
|
|
@@ -661,6 +762,7 @@ class GeoDesign:
|
|
|
661
762
|
"ASC": lambda: _panelkit.fit_asc(self.Y, idx, t0, 0.0, None),
|
|
662
763
|
"SDID": lambda: _panelkit.fit_sdid(self.Y, idx, t0, 1.0),
|
|
663
764
|
}
|
|
765
|
+
treated_series = self.Y[idx].mean(axis=0)
|
|
664
766
|
per = {}
|
|
665
767
|
for m in methods:
|
|
666
768
|
fit = fitters[m]()
|
|
@@ -671,8 +773,21 @@ class GeoDesign:
|
|
|
671
773
|
se, lo, hi = _panelkit.bootstrap_mean(
|
|
672
774
|
att_path.tolist(), "stationary", int(block_len), int(n_boot),
|
|
673
775
|
int(seed), float(level))
|
|
776
|
+
# Full-timeline counterfactual via donor weights (exact for SC; the
|
|
777
|
+
# dominant term for ASC/SDID). Center on the pre-period so the gap
|
|
778
|
+
# reflects FIT, not a level offset — SDID is level-agnostic (matches
|
|
779
|
+
# trends, not levels), so its donor-weighted series sits at a constant
|
|
780
|
+
# offset that would otherwise look like a non-zero pre-period.
|
|
781
|
+
dids = np.asarray(fit.donor_ids, dtype=int)
|
|
782
|
+
ws = np.asarray(fit.weights, dtype=float)
|
|
783
|
+
if dids.size:
|
|
784
|
+
full_cf = self.Y[dids].T @ ws
|
|
785
|
+
full_cf = full_cf + (treated_series[:t0].mean() - full_cf[:t0].mean())
|
|
786
|
+
else:
|
|
787
|
+
full_cf = np.full(self.t, np.nan)
|
|
674
788
|
per[m] = {
|
|
675
789
|
"att": att, "att_path": att_path, "counterfactual": cf,
|
|
790
|
+
"full_cf": full_cf,
|
|
676
791
|
"cf_mean": cf_mean, "lift": att / cf_mean if cf_mean else float("nan"),
|
|
677
792
|
"se": se, "att_lo": lo, "att_hi": hi,
|
|
678
793
|
"lift_lo": lo / cf_mean if cf_mean else float("nan"),
|
|
@@ -719,16 +834,57 @@ class GeoDesign:
|
|
|
719
834
|
"weights": wmap,
|
|
720
835
|
}
|
|
721
836
|
|
|
722
|
-
# Significance: SC in-space placebo p-value
|
|
723
|
-
# (donor-weight reconstruction) for the timeline plot.
|
|
837
|
+
# Significance: SC in-space placebo p-value.
|
|
724
838
|
sc = _panelkit.fit_sc(self.Y, idx, t0, 0.0, True, level)
|
|
725
839
|
p_value = sc.p_value
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
840
|
+
|
|
841
|
+
# Full-timeline ensemble counterfactual + gap path (pre-period shows fit,
|
|
842
|
+
# post-period uses the exact ensemble effect).
|
|
843
|
+
ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
|
|
844
|
+
full_gap = treated_series - ens_full_cf
|
|
845
|
+
full_gap[t0:] = ens_path # exact ensemble post effect
|
|
846
|
+
counterfactual = treated_series - full_gap # consistent everywhere
|
|
847
|
+
pre_gaps = full_gap[:t0]
|
|
848
|
+
sigma_pre = float(np.std(pre_gaps, ddof=1)) if t0 > 1 else float(np.std(pre_gaps))
|
|
849
|
+
|
|
850
|
+
# CI bands from a MOVING-BLOCK BOOTSTRAP of the pre-period residuals.
|
|
851
|
+
# Blocks preserve autocorrelation, so the bands are more conservative than
|
|
852
|
+
# an iid normal approximation — especially the cumulative band, whose
|
|
853
|
+
# spread grows faster than sqrt(k) under positive autocorrelation.
|
|
854
|
+
post_len = self.t - t0
|
|
855
|
+
a = (1.0 - float(level)) / 2.0
|
|
856
|
+
paths = _placebo_paths(pre_gaps, post_len, int(block_len), int(n_boot), int(seed))
|
|
857
|
+
if paths.size:
|
|
858
|
+
point_lo = np.quantile(paths, a, axis=0)
|
|
859
|
+
point_hi = np.quantile(paths, 1.0 - a, axis=0)
|
|
860
|
+
point_hw = float(np.quantile(np.abs(paths), float(level))) # symmetric, full-timeline
|
|
861
|
+
cum_paths = np.cumsum(paths, axis=1)
|
|
862
|
+
cum_band_lo = np.quantile(cum_paths, a, axis=0)
|
|
863
|
+
cum_band_hi = np.quantile(cum_paths, 1.0 - a, axis=0)
|
|
864
|
+
else:
|
|
865
|
+
point_lo = point_hi = np.zeros(post_len)
|
|
866
|
+
point_hw = 0.0
|
|
867
|
+
cum_band_lo = cum_band_hi = np.zeros(post_len)
|
|
868
|
+
|
|
869
|
+
ens_post = ens_path
|
|
870
|
+
run = np.cumsum(ens_post)
|
|
871
|
+
cum_curve = run * n_treated
|
|
872
|
+
cum_lo_curve = (run + cum_band_lo) * n_treated
|
|
873
|
+
cum_hi_curve = (run + cum_band_hi) * n_treated
|
|
874
|
+
|
|
875
|
+
ensemble["sigma_pre"] = sigma_pre
|
|
876
|
+
ensemble["full_gap"] = full_gap
|
|
877
|
+
ensemble["point_hw"] = point_hw # constant pointwise half-width
|
|
878
|
+
ensemble["point_lo"] = ens_post + point_lo # per-period CI on the effect
|
|
879
|
+
ensemble["point_hi"] = ens_post + point_hi
|
|
880
|
+
ensemble["cum_curve"] = cum_curve # cumulative incremental path
|
|
881
|
+
ensemble["cum_lo_curve"] = cum_lo_curve
|
|
882
|
+
ensemble["cum_hi_curve"] = cum_hi_curve
|
|
883
|
+
ensemble["cum_lo"] = float(cum_lo_curve[-1]) if post_len else float("nan")
|
|
884
|
+
ensemble["cum_hi"] = float(cum_hi_curve[-1]) if post_len else float("nan")
|
|
885
|
+
|
|
730
886
|
return _EvalReport(names, t0, n_treated, per, ensemble, p_value, level,
|
|
731
|
-
treated_series,
|
|
887
|
+
treated_series, counterfactual)
|
|
732
888
|
|
|
733
889
|
|
|
734
890
|
class _ScenarioGrid:
|
|
@@ -924,15 +1080,26 @@ class _EvalReport:
|
|
|
924
1080
|
"interval includes zero.")
|
|
925
1081
|
lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
|
|
926
1082
|
f"{e['cumulative']:,.0f} cumulative incremental")
|
|
1083
|
+
if "cum_lo" in e:
|
|
1084
|
+
lines.append(f"Cumulative {cl}% CI : "
|
|
1085
|
+
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
|
|
1086
|
+
f"(moving-block bootstrap, block_len-aware)")
|
|
927
1087
|
lines.append(verdict)
|
|
928
1088
|
lines.append("=" * 66)
|
|
929
1089
|
return "\n".join(lines)
|
|
930
1090
|
|
|
931
1091
|
def plot(self, path: str | None = None):
|
|
932
|
-
"""Render the evaluation figure (observed vs counterfactual, effect path
|
|
933
|
-
and a lift-by-method bar). Returns the matplotlib Figure."""
|
|
1092
|
+
"""Render the evaluation figure (observed vs counterfactual, effect path
|
|
1093
|
+
with CI band, and a lift-by-method bar). Returns the matplotlib Figure."""
|
|
934
1094
|
return _plot_eval(self, path)
|
|
935
1095
|
|
|
1096
|
+
def plot_effect_over_time(self, path: str | None = None):
|
|
1097
|
+
"""Render the effect-over-time figure: the **pointwise** effect across the
|
|
1098
|
+
full timeline (pre-period included, as a placebo check) and the running
|
|
1099
|
+
**cumulative** incremental, each as a point estimate with a confidence
|
|
1100
|
+
band. Returns the matplotlib Figure."""
|
|
1101
|
+
return _plot_eval_timeline(self, path)
|
|
1102
|
+
|
|
936
1103
|
def __repr__(self):
|
|
937
1104
|
sig = "sig" if self.significant else "ns"
|
|
938
1105
|
return (f"EvalReport(lift={100*self.lift:+.2f}%, "
|
|
@@ -1362,13 +1529,18 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1362
1529
|
ax.grid(True, alpha=0.25)
|
|
1363
1530
|
ax.legend(loc="best", framealpha=0.9, fontsize=9)
|
|
1364
1531
|
|
|
1365
|
-
# ---- B: effect path over the post-period (ensemble + per method).
|
|
1532
|
+
# ---- B: effect path over the post-period (ensemble + per method) + CI band.
|
|
1366
1533
|
axb = fig.add_subplot(gs[1, 0])
|
|
1367
1534
|
for m, r in rep.per.items():
|
|
1368
1535
|
axb.plot(post, r["att_path"], color=_METHOD_COLORS.get(m, _PK_GREY),
|
|
1369
1536
|
lw=1.3, alpha=0.7, label=m)
|
|
1370
|
-
|
|
1371
|
-
|
|
1537
|
+
ens_post = rep.ensemble["att_path"]
|
|
1538
|
+
p_lo = rep.ensemble.get("point_lo")
|
|
1539
|
+
p_hi = rep.ensemble.get("point_hi")
|
|
1540
|
+
if p_lo is not None:
|
|
1541
|
+
axb.fill_between(post, p_lo, p_hi, color=_PK_PURPLE, alpha=0.18,
|
|
1542
|
+
label=f"ensemble {int(round(100*rep.level))}% band")
|
|
1543
|
+
axb.plot(post, ens_post, color=_PK_PURPLE, lw=2.6, label="ENSEMBLE")
|
|
1372
1544
|
axb.axhline(0, color="#111827", lw=1.0)
|
|
1373
1545
|
axb.set_title("Effect over time (per-period ATT)", fontweight="bold")
|
|
1374
1546
|
axb.set_xlabel("period")
|
|
@@ -1405,3 +1577,82 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1405
1577
|
if path:
|
|
1406
1578
|
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
1407
1579
|
return fig
|
|
1580
|
+
|
|
1581
|
+
|
|
1582
|
+
def _plot_eval_timeline(rep: "_EvalReport", path):
|
|
1583
|
+
"""Pointwise + cumulative effect over the full timeline, with CI bands.
|
|
1584
|
+
|
|
1585
|
+
Bands come from a moving-block bootstrap of the pre-period residuals (so they
|
|
1586
|
+
capture autocorrelation): the pointwise band is the per-period placebo spread
|
|
1587
|
+
around the estimate; the cumulative band grows with horizon as the bootstrap
|
|
1588
|
+
placebo cumulative-sums spread out."""
|
|
1589
|
+
_, plt = _require_mpl()
|
|
1590
|
+
import numpy as _np
|
|
1591
|
+
from matplotlib.gridspec import GridSpec
|
|
1592
|
+
|
|
1593
|
+
T = len(rep.treated_series)
|
|
1594
|
+
t0 = rep.t0
|
|
1595
|
+
e = rep.ensemble
|
|
1596
|
+
x = _np.arange(T)
|
|
1597
|
+
seg = x[t0:]
|
|
1598
|
+
gap = _np.asarray(e["full_gap"], dtype=float)
|
|
1599
|
+
hw = e.get("point_hw", 0.0)
|
|
1600
|
+
cl = int(round(100 * rep.level))
|
|
1601
|
+
|
|
1602
|
+
plt.rcParams.update({"font.size": 11, "axes.titlesize": 12})
|
|
1603
|
+
fig = plt.figure(figsize=(12, 7.8))
|
|
1604
|
+
fig.patch.set_facecolor("white")
|
|
1605
|
+
gs = GridSpec(2, 1, figure=fig, height_ratios=[1.0, 1.0], hspace=0.32)
|
|
1606
|
+
|
|
1607
|
+
# ---- Top: pointwise effect (treated − counterfactual), full timeline. ----
|
|
1608
|
+
ax = fig.add_subplot(gs[0])
|
|
1609
|
+
ax.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
|
|
1610
|
+
# Constant placebo band across the whole timeline (the pre-period sits inside
|
|
1611
|
+
# it as a fit/placebo check); the per-period CI on the post effect is shown
|
|
1612
|
+
# as a tighter band around the estimate.
|
|
1613
|
+
ax.fill_between(x, gap - hw, gap + hw, color=_PK_PURPLE, alpha=0.12,
|
|
1614
|
+
label=f"{cl}% placebo band")
|
|
1615
|
+
ax.fill_between(seg, e["point_lo"], e["point_hi"], color=_PK_PURPLE, alpha=0.22)
|
|
1616
|
+
ax.plot(x, gap, color=_PK_PURPLE, lw=2.0, label="pointwise effect")
|
|
1617
|
+
ax.axhline(0, color="#111827", lw=1.0)
|
|
1618
|
+
ax.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
|
|
1619
|
+
ax.annotate("pre-period (placebo)", (t0 / 2, ax.get_ylim()[1]), ha="center",
|
|
1620
|
+
va="top", color="#6b7280", fontsize=9)
|
|
1621
|
+
ax.annotate("test window", (t0 + (T - t0) / 2, ax.get_ylim()[1]), ha="center",
|
|
1622
|
+
va="top", color="#6b21a8", fontsize=9)
|
|
1623
|
+
ax.set_title("Pointwise effect over time (treated − counterfactual)",
|
|
1624
|
+
fontweight="bold")
|
|
1625
|
+
ax.set_xlabel("period")
|
|
1626
|
+
ax.set_ylabel("per-period effect")
|
|
1627
|
+
ax.grid(True, alpha=0.25)
|
|
1628
|
+
ax.legend(loc="upper left", framealpha=0.9, fontsize=9)
|
|
1629
|
+
|
|
1630
|
+
# ---- Bottom: cumulative incremental over the test window (×n_treated). ----
|
|
1631
|
+
axc = fig.add_subplot(gs[1])
|
|
1632
|
+
cum = e["cum_curve"]
|
|
1633
|
+
axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
|
|
1634
|
+
axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
|
|
1635
|
+
alpha=0.15, label=f"{cl}% band (block bootstrap)")
|
|
1636
|
+
axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
|
|
1637
|
+
axc.axhline(0, color="#111827", lw=1.0)
|
|
1638
|
+
axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
|
|
1639
|
+
final = cum[-1]
|
|
1640
|
+
axc.annotate(f"{final:,.0f}\n[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}]",
|
|
1641
|
+
(T - 1, final), textcoords="offset points", xytext=(-6, 0),
|
|
1642
|
+
ha="right", va="center", fontweight="bold", color="#065f46", fontsize=9)
|
|
1643
|
+
axc.set_title("Cumulative incremental effect over the test window",
|
|
1644
|
+
fontweight="bold")
|
|
1645
|
+
axc.set_xlabel("period")
|
|
1646
|
+
axc.set_ylabel("cumulative incremental")
|
|
1647
|
+
axc.set_xlim(-0.5, T - 0.5)
|
|
1648
|
+
axc.grid(True, alpha=0.25)
|
|
1649
|
+
axc.legend(loc="upper left", framealpha=0.9, fontsize=9)
|
|
1650
|
+
|
|
1651
|
+
fig.suptitle(f"panelkit · effect over time — ensemble "
|
|
1652
|
+
f"{100*rep.ensemble['lift']:+.2f}% lift, "
|
|
1653
|
+
f"{rep.ensemble['cumulative']:,.0f} cumulative "
|
|
1654
|
+
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}]",
|
|
1655
|
+
fontsize=14, fontweight="bold", x=0.012, ha="left")
|
|
1656
|
+
if path:
|
|
1657
|
+
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
1658
|
+
return fig
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|