panelkit 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {panelkit-0.2.2 → panelkit-0.2.4}/Cargo.lock +5 -5
- {panelkit-0.2.2 → panelkit-0.2.4}/Cargo.toml +1 -1
- {panelkit-0.2.2 → panelkit-0.2.4}/GUIDE.md +37 -6
- {panelkit-0.2.2 → panelkit-0.2.4}/PKG-INFO +19 -2
- {panelkit-0.2.2 → panelkit-0.2.4}/README.md +18 -1
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/src/selection.rs +68 -20
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/tests/geo.rs +10 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/src/api_geo.rs +3 -1
- {panelkit-0.2.2 → panelkit-0.2.4}/pyproject.toml +1 -1
- {panelkit-0.2.2 → panelkit-0.2.4}/python/panelkit/_panelkit.pyi +1 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/python/panelkit/design.py +323 -53
- {panelkit-0.2.2 → panelkit-0.2.4}/BENCHMARKS.md +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/LICENSE-APACHE +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/LICENSE-MIT +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/benches/estimators.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/did/bacon.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/did/callaway.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/did/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/did/sunab.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/did/twfe.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/fe/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/fe/within.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/mcnnm/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/panel.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/result.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/sc/augmented.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/sc/cpasc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/sc/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/sc/sdid.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/src/sc/synthetic.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/tests/cpasc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/tests/did.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/tests/sc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/estimators/tests/sc_family.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/src/diagnostics.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/src/power.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/geo/src/types.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/src/batch.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/src/bootstrap.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/src/ci.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/src/parallel.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/src/placebo.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/inference/tests/inference.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/error.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/cholesky.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/eig_sym.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/qr.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/randomized.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/svd.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/factor/svd_gram.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/matrix.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/ops/matmul.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/ops/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/ops/norms.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/ops/transform.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/opt/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/opt/simplex.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/opt/softthresh.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/rng.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/solve/lstsq.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/solve/mod.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/src/solve/spd.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/linalg/tests/numerics.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/Cargo.toml +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/src/api_did.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/src/api_sc.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/src/convert.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/src/lib.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/crates/pypanelkit/src/results.rs +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/python/panelkit/__init__.py +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/python/panelkit/estimators.py +0 -0
- {panelkit-0.2.2 → panelkit-0.2.4}/python/panelkit/py.typed +0 -0
|
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
|
|
|
462
462
|
|
|
463
463
|
[[package]]
|
|
464
464
|
name = "panelkit-estimators"
|
|
465
|
-
version = "0.2.
|
|
465
|
+
version = "0.2.4"
|
|
466
466
|
dependencies = [
|
|
467
467
|
"criterion",
|
|
468
468
|
"panelkit-linalg",
|
|
@@ -471,7 +471,7 @@ dependencies = [
|
|
|
471
471
|
|
|
472
472
|
[[package]]
|
|
473
473
|
name = "panelkit-geo"
|
|
474
|
-
version = "0.2.
|
|
474
|
+
version = "0.2.4"
|
|
475
475
|
dependencies = [
|
|
476
476
|
"panelkit-estimators",
|
|
477
477
|
"panelkit-inference",
|
|
@@ -482,7 +482,7 @@ dependencies = [
|
|
|
482
482
|
|
|
483
483
|
[[package]]
|
|
484
484
|
name = "panelkit-inference"
|
|
485
|
-
version = "0.2.
|
|
485
|
+
version = "0.2.4"
|
|
486
486
|
dependencies = [
|
|
487
487
|
"panelkit-estimators",
|
|
488
488
|
"panelkit-linalg",
|
|
@@ -491,7 +491,7 @@ dependencies = [
|
|
|
491
491
|
|
|
492
492
|
[[package]]
|
|
493
493
|
name = "panelkit-linalg"
|
|
494
|
-
version = "0.2.
|
|
494
|
+
version = "0.2.4"
|
|
495
495
|
dependencies = [
|
|
496
496
|
"proptest",
|
|
497
497
|
"rayon",
|
|
@@ -623,7 +623,7 @@ dependencies = [
|
|
|
623
623
|
|
|
624
624
|
[[package]]
|
|
625
625
|
name = "pypanelkit"
|
|
626
|
-
version = "0.2.
|
|
626
|
+
version = "0.2.4"
|
|
627
627
|
dependencies = [
|
|
628
628
|
"numpy",
|
|
629
629
|
"panelkit-estimators",
|
|
@@ -294,20 +294,41 @@ weighted-average **ensemble** estimate.
|
|
|
294
294
|
|
|
295
295
|
```python
|
|
296
296
|
ev = design.evaluate(treated=["chicago", "denver"], treat_start=52, level=0.90)
|
|
297
|
-
print(ev.summary())
|
|
298
|
-
ev.plot("evaluate.png")
|
|
297
|
+
print(ev.summary()) # per-method + ensemble lift, CI, cumulative
|
|
298
|
+
ev.plot("evaluate.png") # observed-vs-cf, effect path (CI band), lift bar
|
|
299
|
+
ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
|
|
299
300
|
ev.lift, ev.cumulative, ev.significant
|
|
300
301
|
```
|
|
301
302
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
303
|
+
Inference is **in-space placebo** (Abadie): every donor market is refit as if it
|
|
304
|
+
were the treated one, and the spread of *their* post-period effects is the null
|
|
305
|
+
reference — capturing out-of-sample extrapolation error, the real source of
|
|
306
|
+
uncertainty. (A bootstrap of the treated unit's own post-period only sees
|
|
307
|
+
in-sample noise and is wildly anti-conservative — on null data its 90% interval
|
|
308
|
+
falsely flags an effect ~50% of the time; the placebo version sits at/below the
|
|
309
|
+
nominal 10%.) Poorly-fit placebos (pre-period RMSPE > 2× the treated unit's) are
|
|
310
|
+
dropped, per Abadie. The p-value is the placebo rank of the treated effect, and
|
|
311
|
+
`"auto"` ensemble weights are inverse-variance from each method's placebo-null
|
|
312
|
+
spread. `ev` exposes
|
|
306
313
|
`.lift`, `.att`, `.cumulative`, `.significant`, the per-method results in `ev.per`,
|
|
307
314
|
and the ensemble in `ev.ensemble`. Reported numbers: **% lift** (effect ÷
|
|
308
315
|
counterfactual), **per-period ATT**, and **cumulative incremental** over the
|
|
309
316
|
window (summed across treated markets).
|
|
310
317
|
|
|
318
|
+
**Effect over time** (`ev.plot_effect_over_time(...)`) gives the event-study view:
|
|
319
|
+
the **pointwise** effect across the full timeline — *including the pre-period*, so
|
|
320
|
+
you can see it sits flat (centered on zero) inside the noise band before the test
|
|
321
|
+
starts (a placebo check) and breaks out after — and the running **cumulative
|
|
322
|
+
incremental**, each as a point estimate with a confidence band. The counterfactual
|
|
323
|
+
is centered on the pre-period, so the gap shows fit quality rather than a level
|
|
324
|
+
offset (SDID matches trends, not levels). The bands come from the **in-space
|
|
325
|
+
placebo** distribution: at each horizon, the pointwise band is the spread of the
|
|
326
|
+
donor placebos' per-period effects, and the cumulative band is the spread of their
|
|
327
|
+
cumulative sums (so it fans out with horizon). Placebo inference needs a decent
|
|
328
|
+
donor pool to have power — with only a handful of comparable donors the intervals
|
|
329
|
+
are necessarily wide. Pass `exclude=[…]` to drop markets from the control pool
|
|
330
|
+
(e.g. ones you don't trust as donors).
|
|
331
|
+
|
|
311
332
|
### Choosing a specification — `design.recommend(test_lengths, n_geos_options, target_lift, alphas=…)`
|
|
312
333
|
|
|
313
334
|
Sweeps designs across **test length × number of geos × alpha** and recommends the
|
|
@@ -345,6 +366,16 @@ Searches candidate treatment-market sets and ranks them by power, MDE, pre-fit,
|
|
|
345
366
|
holdout, and confidence. Pass `eligible=[…]` to restrict to markets you can
|
|
346
367
|
actually run in.
|
|
347
368
|
|
|
369
|
+
Two real-world controls for *which* markets the search may use:
|
|
370
|
+
|
|
371
|
+
- **`include=[…]`** — force specific markets into **every** candidate treatment
|
|
372
|
+
set (must-treat markets, e.g. a flagship region you've already committed to).
|
|
373
|
+
The search fills the remaining slots from `eligible`, up to `max_treated`.
|
|
374
|
+
- **`exclude=[…]`** — drop markets **entirely**: they're never treated *and*
|
|
375
|
+
never used as a donor/control (e.g. a market with contaminated data or its own
|
|
376
|
+
concurrent campaign). `exclude` is also accepted by `power()`, `diagnose()`,
|
|
377
|
+
`evaluate()`, and `recommend()` to keep a market out of the control pool.
|
|
378
|
+
|
|
348
379
|
### Multi-cell tests — `design.multi_cell(cells, test_len, …)`
|
|
349
380
|
|
|
350
381
|
Often you run several treatment cells at once — different creatives, budgets, or
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: panelkit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Topic :: Scientific/Engineering
|
|
@@ -231,10 +231,15 @@ mc = design.multi_cell(cells={"west": ["los_angeles", "san_diego"],
|
|
|
231
231
|
print(mc.summary()) # per-cell MDE / confidence / holdout
|
|
232
232
|
mc.plot("multicell.png") # the multi-cell figure below
|
|
233
233
|
|
|
234
|
+
# pin in must-have markets, drop ones you don't trust:
|
|
235
|
+
ranked = design.select_markets(test_len=8, target_lift=0.05, max_treated=3,
|
|
236
|
+
include=["chicago"], exclude=["miami"])
|
|
237
|
+
|
|
234
238
|
# already ran the test? measure it (SC/ASC/SDID + a weighted-average ensemble):
|
|
235
239
|
ev = design.evaluate(treated=["chicago", "denver"], treat_start=52)
|
|
236
240
|
print(ev.summary()) # per-method + ensemble lift, CI, cumulative
|
|
237
241
|
ev.plot("evaluate.png") # observed vs counterfactual + lift-by-method
|
|
242
|
+
ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
|
|
238
243
|
|
|
239
244
|
# or sweep specifications (length × #geos × significance) and recommend one:
|
|
240
245
|
grid = design.recommend(test_lengths=[4, 6, 8, 12], n_geos_options=[3, 5, 10, 20],
|
|
@@ -268,11 +273,23 @@ per-cell MDE/confidence/holdout report and a combined figure:
|
|
|
268
273
|
**Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
|
|
269
274
|
the power analysis: fit SC / ASC / SDID on a test that already happened, blend
|
|
270
275
|
them into a weighted-average **ensemble** estimate, and report each one's lift,
|
|
271
|
-
confidence interval (
|
|
276
|
+
confidence interval (in-space placebo), and cumulative incremental —
|
|
272
277
|
with an SC in-space placebo p-value:
|
|
273
278
|
|
|
274
279
|

|
|
275
280
|
|
|
281
|
+
And the **effect over time** — the pointwise effect across the full timeline
|
|
282
|
+
(pre-period included, so you can see it sit flat in the noise band before the test
|
|
283
|
+
and break out after) plus the running cumulative incremental, each as a point
|
|
284
|
+
estimate with a confidence band:
|
|
285
|
+
|
|
286
|
+

|
|
287
|
+
|
|
288
|
+
**Pin in / drop markets.** `select_markets`/`recommend` take `include=[…]`
|
|
289
|
+
(force must-treat markets into every candidate) and `exclude=[…]` (drop markets
|
|
290
|
+
entirely — never treated, never a control). `exclude` is also accepted by
|
|
291
|
+
`power`, `diagnose`, and `evaluate` to keep a market out of the donor pool.
|
|
292
|
+
|
|
276
293
|
**Messy DataFrame? No problem.** `from_long` coerces real-world data: outcome
|
|
277
294
|
strings → numeric (with a clear error on genuinely non-numeric values), dates
|
|
278
295
|
(string or unsorted) → chronological columns, locations → market names, duplicate
|
|
@@ -201,10 +201,15 @@ mc = design.multi_cell(cells={"west": ["los_angeles", "san_diego"],
|
|
|
201
201
|
print(mc.summary()) # per-cell MDE / confidence / holdout
|
|
202
202
|
mc.plot("multicell.png") # the multi-cell figure below
|
|
203
203
|
|
|
204
|
+
# pin in must-have markets, drop ones you don't trust:
|
|
205
|
+
ranked = design.select_markets(test_len=8, target_lift=0.05, max_treated=3,
|
|
206
|
+
include=["chicago"], exclude=["miami"])
|
|
207
|
+
|
|
204
208
|
# already ran the test? measure it (SC/ASC/SDID + a weighted-average ensemble):
|
|
205
209
|
ev = design.evaluate(treated=["chicago", "denver"], treat_start=52)
|
|
206
210
|
print(ev.summary()) # per-method + ensemble lift, CI, cumulative
|
|
207
211
|
ev.plot("evaluate.png") # observed vs counterfactual + lift-by-method
|
|
212
|
+
ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
|
|
208
213
|
|
|
209
214
|
# or sweep specifications (length × #geos × significance) and recommend one:
|
|
210
215
|
grid = design.recommend(test_lengths=[4, 6, 8, 12], n_geos_options=[3, 5, 10, 20],
|
|
@@ -238,11 +243,23 @@ per-cell MDE/confidence/holdout report and a combined figure:
|
|
|
238
243
|
**Evaluate a test that ran.** `evaluate(...)` is the measurement counterpart to
|
|
239
244
|
the power analysis: fit SC / ASC / SDID on a test that already happened, blend
|
|
240
245
|
them into a weighted-average **ensemble** estimate, and report each one's lift,
|
|
241
|
-
confidence interval (
|
|
246
|
+
confidence interval (in-space placebo), and cumulative incremental —
|
|
242
247
|
with an SC in-space placebo p-value:
|
|
243
248
|
|
|
244
249
|

|
|
245
250
|
|
|
251
|
+
And the **effect over time** — the pointwise effect across the full timeline
|
|
252
|
+
(pre-period included, so you can see it sit flat in the noise band before the test
|
|
253
|
+
and break out after) plus the running cumulative incremental, each as a point
|
|
254
|
+
estimate with a confidence band:
|
|
255
|
+
|
|
256
|
+

|
|
257
|
+
|
|
258
|
+
**Pin in / drop markets.** `select_markets`/`recommend` take `include=[…]`
|
|
259
|
+
(force must-treat markets into every candidate) and `exclude=[…]` (drop markets
|
|
260
|
+
entirely — never treated, never a control). `exclude` is also accepted by
|
|
261
|
+
`power`, `diagnose`, and `evaluate` to keep a market out of the donor pool.
|
|
262
|
+
|
|
246
263
|
**Messy DataFrame? No problem.** `from_long` coerces real-world data: outcome
|
|
247
264
|
strings → numeric (with a clear error on genuinely non-numeric values), dates
|
|
248
265
|
(string or unsorted) → chronological columns, locations → market names, duplicate
|
|
@@ -34,7 +34,10 @@ pub struct MarketCandidate {
|
|
|
34
34
|
pub struct SelectConfig {
|
|
35
35
|
/// Units eligible to be treated (e.g. markets you could actually run in).
|
|
36
36
|
pub eligible: Vec<usize>,
|
|
37
|
-
///
|
|
37
|
+
/// Units **forced into every** candidate treatment set (must-treat markets).
|
|
38
|
+
/// The search fills the remaining slots from `eligible`. Empty = no forcing.
|
|
39
|
+
pub include: Vec<usize>,
|
|
40
|
+
/// Maximum number of treated markets in a candidate set (counts `include`).
|
|
38
41
|
pub max_treated: usize,
|
|
39
42
|
pub test_len: usize,
|
|
40
43
|
/// The lift you care about detecting (fraction, e.g. 0.05 = 5%).
|
|
@@ -95,46 +98,91 @@ pub fn evaluate(y: &Mat, treated: &[usize], cfg: &SelectConfig) -> MarketCandida
|
|
|
95
98
|
}
|
|
96
99
|
}
|
|
97
100
|
|
|
98
|
-
/// Build the candidate list.
|
|
99
|
-
///
|
|
100
|
-
///
|
|
101
|
+
/// Build the candidate list. Every candidate always contains the forced
|
|
102
|
+
/// `include` markets; the remaining slots are drawn from `eligible` (minus the
|
|
103
|
+
/// forced ones). With `exact_size = Some(k)`, every candidate has exactly `k`
|
|
104
|
+
/// markets total; otherwise it's the forced set plus each single extra market
|
|
105
|
+
/// plus sampled larger subsets up to `max_treated`.
|
|
101
106
|
fn candidate_sets(cfg: &SelectConfig) -> Vec<Vec<usize>> {
|
|
102
107
|
let mut rng = Xoshiro256pp::seed_from_u64(cfg.seed);
|
|
103
|
-
let mut seen = std::collections::HashSet::new();
|
|
108
|
+
let mut seen: std::collections::HashSet<Vec<usize>> = std::collections::HashSet::new();
|
|
104
109
|
let mut sets: Vec<Vec<usize>> = Vec::new();
|
|
105
110
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
111
|
+
// Forced (must-treat) markets, de-duplicated, and the pool of extra picks.
|
|
112
|
+
let mut forced: Vec<usize> = cfg.include.clone();
|
|
113
|
+
forced.sort_unstable();
|
|
114
|
+
forced.dedup();
|
|
115
|
+
let forced_set: std::collections::HashSet<usize> = forced.iter().copied().collect();
|
|
116
|
+
let extra_pool: Vec<usize> = cfg
|
|
117
|
+
.eligible
|
|
118
|
+
.iter()
|
|
119
|
+
.copied()
|
|
120
|
+
.filter(|u| !forced_set.contains(u))
|
|
121
|
+
.collect();
|
|
122
|
+
|
|
123
|
+
if let Some(k0) = cfg.exact_size {
|
|
124
|
+
let k = k0.max(1);
|
|
125
|
+
let need = k.saturating_sub(forced.len());
|
|
126
|
+
if need == 0 {
|
|
127
|
+
// The forced set already fills the requested size.
|
|
128
|
+
if !forced.is_empty() {
|
|
129
|
+
sets.push(forced.clone());
|
|
130
|
+
}
|
|
131
|
+
return sets;
|
|
132
|
+
}
|
|
133
|
+
if need == 1 {
|
|
134
|
+
// Deterministic: forced + each eligible single (preserves the old
|
|
135
|
+
// "all singletons" behavior when nothing is forced and k == 1).
|
|
136
|
+
for &u in &extra_pool {
|
|
137
|
+
let mut pick = forced.clone();
|
|
138
|
+
pick.push(u);
|
|
139
|
+
pick.sort_unstable();
|
|
140
|
+
if seen.insert(pick.clone()) {
|
|
141
|
+
sets.push(pick);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return sets;
|
|
110
145
|
}
|
|
111
146
|
let mut attempts = 0;
|
|
112
147
|
while sets.len() < cfg.n_candidates && attempts < cfg.n_candidates * 40 {
|
|
113
148
|
attempts += 1;
|
|
114
|
-
let mut pool =
|
|
149
|
+
let mut pool = extra_pool.clone();
|
|
115
150
|
rng.shuffle(&mut pool);
|
|
116
|
-
let mut pick: Vec<usize> =
|
|
151
|
+
let mut pick: Vec<usize> = forced.clone();
|
|
152
|
+
pick.extend(pool.into_iter().take(need));
|
|
117
153
|
pick.sort_unstable();
|
|
118
|
-
if seen.insert(pick.clone()) {
|
|
154
|
+
if pick.len() == k && seen.insert(pick.clone()) {
|
|
119
155
|
sets.push(pick);
|
|
120
156
|
}
|
|
121
157
|
}
|
|
122
158
|
return sets;
|
|
123
159
|
}
|
|
124
160
|
|
|
125
|
-
// Mixed-size search
|
|
126
|
-
|
|
127
|
-
if
|
|
128
|
-
|
|
129
|
-
|
|
161
|
+
// Mixed-size search. Extra slots available on top of the forced set.
|
|
162
|
+
let budget = cfg.max_treated.saturating_sub(forced.len());
|
|
163
|
+
if !forced.is_empty() {
|
|
164
|
+
seen.insert(forced.clone());
|
|
165
|
+
sets.push(forced.clone());
|
|
166
|
+
}
|
|
167
|
+
if budget >= 1 {
|
|
168
|
+
for &u in &extra_pool {
|
|
169
|
+
let mut pick = forced.clone();
|
|
170
|
+
pick.push(u);
|
|
171
|
+
pick.sort_unstable();
|
|
172
|
+
if seen.insert(pick.clone()) {
|
|
173
|
+
sets.push(pick);
|
|
174
|
+
}
|
|
130
175
|
}
|
|
176
|
+
}
|
|
177
|
+
if budget >= 2 && extra_pool.len() >= 2 {
|
|
131
178
|
let mut attempts = 0;
|
|
132
179
|
while sets.len() < cfg.n_candidates && attempts < cfg.n_candidates * 20 {
|
|
133
180
|
attempts += 1;
|
|
134
|
-
let
|
|
135
|
-
let mut pool =
|
|
181
|
+
let extra = 2 + rng.gen_range(budget - 1); // 2..=budget extra markets
|
|
182
|
+
let mut pool = extra_pool.clone();
|
|
136
183
|
rng.shuffle(&mut pool);
|
|
137
|
-
let mut pick: Vec<usize> =
|
|
184
|
+
let mut pick: Vec<usize> = forced.clone();
|
|
185
|
+
pick.extend(pool.into_iter().take(extra));
|
|
138
186
|
pick.sort_unstable();
|
|
139
187
|
if seen.insert(pick.clone()) {
|
|
140
188
|
sets.push(pick);
|
|
@@ -114,6 +114,7 @@ fn market_selection_ranks_candidates() {
|
|
|
114
114
|
let y = geo_panel(12, 60, 5);
|
|
115
115
|
let cfg = SelectConfig {
|
|
116
116
|
eligible: (0..12).collect(),
|
|
117
|
+
include: vec![],
|
|
117
118
|
max_treated: 3,
|
|
118
119
|
test_len: 10,
|
|
119
120
|
target_lift: 0.10,
|
|
@@ -139,6 +140,15 @@ fn market_selection_ranks_candidates() {
|
|
|
139
140
|
};
|
|
140
141
|
let ranked2 = select_markets(&y, &cfg2);
|
|
141
142
|
assert!(ranked2.iter().all(|c| c.treated.len() == 2));
|
|
143
|
+
// include: market 5 is forced into every candidate set.
|
|
144
|
+
let cfg3 = SelectConfig {
|
|
145
|
+
include: vec![5],
|
|
146
|
+
..cfg.clone()
|
|
147
|
+
};
|
|
148
|
+
let ranked3 = select_markets(&y, &cfg3);
|
|
149
|
+
assert!(!ranked3.is_empty());
|
|
150
|
+
assert!(ranked3.iter().all(|c| c.treated.contains(&5)));
|
|
151
|
+
assert!(ranked3.iter().all(|c| c.treated.len() <= 3));
|
|
142
152
|
// Every candidate has a valid holdout and confidence.
|
|
143
153
|
for c in &ranked {
|
|
144
154
|
assert!(c.holdout_pct > 0.0 && c.holdout_pct < 1.0);
|
|
@@ -169,7 +169,7 @@ pub fn geo_diagnostics(
|
|
|
169
169
|
|
|
170
170
|
/// Search and rank candidate treatment-market sets.
|
|
171
171
|
#[pyfunction]
|
|
172
|
-
#[pyo3(signature = (y, eligible, max_treated, test_len, target_lift, method="sdid", alpha=0.1, target_power=0.8, min_pre=0, n_candidates=200, seed=0, exact_size=None, lookback=None))]
|
|
172
|
+
#[pyo3(signature = (y, eligible, max_treated, test_len, target_lift, method="sdid", alpha=0.1, target_power=0.8, min_pre=0, n_candidates=200, seed=0, exact_size=None, lookback=None, include=None))]
|
|
173
173
|
#[allow(clippy::too_many_arguments)]
|
|
174
174
|
pub fn geo_select(
|
|
175
175
|
py: Python<'_>,
|
|
@@ -186,6 +186,7 @@ pub fn geo_select(
|
|
|
186
186
|
seed: u64,
|
|
187
187
|
exact_size: Option<usize>,
|
|
188
188
|
lookback: Option<usize>,
|
|
189
|
+
include: Option<Vec<usize>>,
|
|
189
190
|
) -> PyResult<Vec<PyMarketCandidate>> {
|
|
190
191
|
let m = parse_method(method)?;
|
|
191
192
|
let mat = mat_from_numpy(&y);
|
|
@@ -196,6 +197,7 @@ pub fn geo_select(
|
|
|
196
197
|
};
|
|
197
198
|
let cfg = SelectConfig {
|
|
198
199
|
eligible,
|
|
200
|
+
include: include.unwrap_or_default(),
|
|
199
201
|
max_treated,
|
|
200
202
|
test_len,
|
|
201
203
|
target_lift,
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "panelkit"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -352,6 +352,22 @@ class GeoDesign:
|
|
|
352
352
|
out.append(self._index[m])
|
|
353
353
|
return out
|
|
354
354
|
|
|
355
|
+
def _names_of(self, markets) -> list:
|
|
356
|
+
"""Resolve markets (names or indices) to their string names."""
|
|
357
|
+
return [self.names[i] for i in self._resolve(markets)]
|
|
358
|
+
|
|
359
|
+
def _without(self, exclude):
|
|
360
|
+
"""Return ``(sub_design, excluded_name_set)`` with the excluded markets
|
|
361
|
+
dropped entirely (so they're neither treated nor used as controls). Names
|
|
362
|
+
are preserved, so callers can pass markets to the sub-design by name."""
|
|
363
|
+
ex = set(self._names_of(exclude)) if exclude else set()
|
|
364
|
+
if not ex:
|
|
365
|
+
return self, ex
|
|
366
|
+
keep = [i for i in range(self.n) if self.names[i] not in ex]
|
|
367
|
+
if not keep:
|
|
368
|
+
raise ValueError("exclude removes every market — nothing left to analyze")
|
|
369
|
+
return GeoDesign(self.Y[keep], names=[self.names[i] for i in keep]), ex
|
|
370
|
+
|
|
355
371
|
def power(
|
|
356
372
|
self,
|
|
357
373
|
treated,
|
|
@@ -364,6 +380,7 @@ class GeoDesign:
|
|
|
364
380
|
lookback: int | None = None,
|
|
365
381
|
ensemble: bool = True,
|
|
366
382
|
ensemble_weights="auto",
|
|
383
|
+
exclude=None,
|
|
367
384
|
) -> _PowerReport:
|
|
368
385
|
"""Power analysis for a specified treated-market set across methods.
|
|
369
386
|
|
|
@@ -376,7 +393,20 @@ class GeoDesign:
|
|
|
376
393
|
power reflects the averaged estimator, which is usually steadier than any
|
|
377
394
|
one method). ``ensemble_weights`` is ``"auto"`` (data-driven inverse-variance
|
|
378
395
|
weighting from each method's historical-null spread), ``"equal"``, or a dict
|
|
379
|
-
like ``{"SC": 0.5, "ASC": 0.2, "SDID": 0.3}``.
|
|
396
|
+
like ``{"SC": 0.5, "ASC": 0.2, "SDID": 0.3}``.
|
|
397
|
+
|
|
398
|
+
``exclude`` drops markets entirely (e.g. contaminated or untrustworthy
|
|
399
|
+
ones) so they're never used as donors/controls."""
|
|
400
|
+
if exclude:
|
|
401
|
+
sub, ex = self._without(exclude)
|
|
402
|
+
tnames = self._names_of(treated)
|
|
403
|
+
bad = [n for n in tnames if n in ex]
|
|
404
|
+
if bad:
|
|
405
|
+
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
406
|
+
return sub.power(tnames, test_len, lifts=lifts, methods=methods, alpha=alpha,
|
|
407
|
+
target_power=target_power, recommended=recommended,
|
|
408
|
+
lookback=lookback, ensemble=ensemble,
|
|
409
|
+
ensemble_weights=ensemble_weights)
|
|
380
410
|
idx = self._resolve(treated)
|
|
381
411
|
names = [self.names[i] for i in idx]
|
|
382
412
|
lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
|
|
@@ -398,13 +428,21 @@ class GeoDesign:
|
|
|
398
428
|
rec = recommended if recommended in results else list(results)[0]
|
|
399
429
|
return _PowerReport(self, idx, names, test_len, results, diag, rec, alpha, target_power)
|
|
400
430
|
|
|
401
|
-
def diagnose(self, treated, test_len: int) -> "_DiagnosticsReport":
|
|
431
|
+
def diagnose(self, treated, test_len: int, exclude=None) -> "_DiagnosticsReport":
|
|
402
432
|
"""Real-world guardrails for a treated-market set: pre-period fit,
|
|
403
433
|
seasonality, holdout, stability, and warnings — with a visual.
|
|
404
434
|
|
|
405
435
|
Returns a report with ``.summary()`` and ``.plot(path)`` (the guardrails
|
|
406
436
|
figure: treated-vs-synthetic pre-fit, seasonality ACF, holdout share, and
|
|
407
|
-
a scorecard listing any warnings).
|
|
437
|
+
a scorecard listing any warnings). ``exclude`` drops markets from the
|
|
438
|
+
control pool entirely."""
|
|
439
|
+
if exclude:
|
|
440
|
+
sub, ex = self._without(exclude)
|
|
441
|
+
tnames = self._names_of(treated)
|
|
442
|
+
bad = [n for n in tnames if n in ex]
|
|
443
|
+
if bad:
|
|
444
|
+
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
445
|
+
return sub.diagnose(tnames, test_len)
|
|
408
446
|
idx = self._resolve(treated)
|
|
409
447
|
names = [self.names[i] for i in idx]
|
|
410
448
|
t0 = self.t - int(test_len)
|
|
@@ -431,18 +469,46 @@ class GeoDesign:
|
|
|
431
469
|
top: int = 10,
|
|
432
470
|
exact_size: int | None = None,
|
|
433
471
|
lookback: int | None = None,
|
|
472
|
+
include=None,
|
|
473
|
+
exclude=None,
|
|
434
474
|
) -> list:
|
|
435
475
|
"""Search candidate treatment-market sets and return the top ranked.
|
|
436
476
|
|
|
437
477
|
``exact_size=k`` restricts the search to sets of exactly ``k`` markets
|
|
438
478
|
(otherwise sizes 1..``max_treated`` are considered). ``lookback=k`` powers
|
|
439
|
-
over the most-recent ``k`` historical windows.
|
|
479
|
+
over the most-recent ``k`` historical windows.
|
|
480
|
+
|
|
481
|
+
``include`` forces specific markets into **every** candidate treatment set
|
|
482
|
+
(must-treat markets); the search fills the remaining slots from
|
|
483
|
+
``eligible``. ``exclude`` drops markets entirely — they're never treated
|
|
484
|
+
and never used as controls."""
|
|
485
|
+
if exclude:
|
|
486
|
+
sub, ex = self._without(exclude)
|
|
487
|
+
elig_names = self._names_of(eligible) if eligible is not None else None
|
|
488
|
+
if elig_names is not None:
|
|
489
|
+
elig_names = [n for n in elig_names if n not in ex]
|
|
490
|
+
inc_names = self._names_of(include) if include else None
|
|
491
|
+
if inc_names is not None:
|
|
492
|
+
bad = [n for n in inc_names if n in ex]
|
|
493
|
+
if bad:
|
|
494
|
+
raise ValueError(f"markets in both include and exclude: {bad}")
|
|
495
|
+
return sub.select_markets(
|
|
496
|
+
test_len, target_lift, max_treated, eligible=elig_names, method=method,
|
|
497
|
+
alpha=alpha, target_power=target_power, n_candidates=n_candidates,
|
|
498
|
+
seed=seed, top=top, exact_size=exact_size, lookback=lookback,
|
|
499
|
+
include=inc_names, exclude=None)
|
|
500
|
+
|
|
440
501
|
elig = self._resolve(eligible) if eligible is not None else list(range(self.n))
|
|
502
|
+
inc = sorted(set(self._resolve(include))) if include else []
|
|
503
|
+
if len(inc) > int(max_treated):
|
|
504
|
+
raise ValueError(f"include has {len(inc)} markets but max_treated="
|
|
505
|
+
f"{max_treated}; raise max_treated or include fewer")
|
|
441
506
|
ranked = _panelkit.geo_select(
|
|
442
507
|
self.Y, elig, int(max_treated), int(test_len), float(target_lift),
|
|
443
508
|
method.lower(), alpha, target_power, 0, int(n_candidates), int(seed),
|
|
444
509
|
None if exact_size is None else int(exact_size),
|
|
445
510
|
None if lookback is None else int(lookback),
|
|
511
|
+
inc or None,
|
|
446
512
|
)
|
|
447
513
|
out = []
|
|
448
514
|
for c in ranked[:top]:
|
|
@@ -470,6 +536,8 @@ class GeoDesign:
|
|
|
470
536
|
seed: int = 0,
|
|
471
537
|
min_confidence: float = 60.0,
|
|
472
538
|
lookback: int | None = None,
|
|
539
|
+
include=None,
|
|
540
|
+
exclude=None,
|
|
473
541
|
) -> "_ScenarioGrid":
|
|
474
542
|
"""Sweep designs across **specifications** — test length × number of geos
|
|
475
543
|
× significance level (alpha) — and recommend the best.
|
|
@@ -477,7 +545,9 @@ class GeoDesign:
|
|
|
477
545
|
For each (alpha, test_len, n_geos) cell it searches for the best set of
|
|
478
546
|
exactly ``n_geos`` treatment markets and records its MDE, power, holdout,
|
|
479
547
|
and confidence. Returns a :class:`_ScenarioGrid` with a recommendation,
|
|
480
|
-
a plain-English summary, and a tradeoffs figure.
|
|
548
|
+
a plain-English summary, and a tradeoffs figure. ``include`` forces
|
|
549
|
+
must-treat markets into every candidate; ``exclude`` drops markets
|
|
550
|
+
entirely.
|
|
481
551
|
"""
|
|
482
552
|
rows = []
|
|
483
553
|
for alpha in alphas:
|
|
@@ -488,6 +558,7 @@ class GeoDesign:
|
|
|
488
558
|
eligible=eligible, method=method, alpha=alpha,
|
|
489
559
|
target_power=target_power, n_candidates=n_candidates,
|
|
490
560
|
seed=seed, top=1, exact_size=ng, lookback=lookback,
|
|
561
|
+
include=include, exclude=exclude,
|
|
491
562
|
)
|
|
492
563
|
best = ranked[0] if ranked else None
|
|
493
564
|
if best is None:
|
|
@@ -610,18 +681,24 @@ class GeoDesign:
|
|
|
610
681
|
methods: Sequence[str] = _METHODS,
|
|
611
682
|
weights="auto",
|
|
612
683
|
level: float = 0.90,
|
|
613
|
-
|
|
614
|
-
block_len: int = 4,
|
|
684
|
+
max_placebo: int = 200,
|
|
615
685
|
seed: int = 0,
|
|
686
|
+
exclude=None,
|
|
616
687
|
) -> "_EvalReport":
|
|
617
688
|
"""Estimate the realized effect of a geo test that has **already run**.
|
|
618
689
|
|
|
619
690
|
This is the measurement counterpart to :meth:`power`: given the treated
|
|
620
691
|
markets and the period treatment began (``treat_start``, the first
|
|
621
692
|
post-period column), it fits SC / ASC / SDID, reports each one's effect,
|
|
622
|
-
and combines them into a weighted-average **ensemble** estimate.
|
|
623
|
-
|
|
624
|
-
|
|
693
|
+
and combines them into a weighted-average **ensemble** estimate.
|
|
694
|
+
|
|
695
|
+
Inference is **in-space placebo** (Abadie): every donor market is refit as
|
|
696
|
+
if it were the treated one, and the spread of *their* post-period effects
|
|
697
|
+
is the null reference. This captures out-of-sample extrapolation error —
|
|
698
|
+
the dominant source of uncertainty — so the intervals are calibrated
|
|
699
|
+
(unlike a bootstrap of the treated unit's own post-period, which only sees
|
|
700
|
+
in-sample noise and is far too narrow). Poorly-fit placebos (pre-period
|
|
701
|
+
RMSPE > 2× the treated unit's) are dropped, per Abadie.
|
|
625
702
|
|
|
626
703
|
Parameters
|
|
627
704
|
----------
|
|
@@ -633,18 +710,29 @@ class GeoDesign:
|
|
|
633
710
|
Which estimators to fit and blend.
|
|
634
711
|
weights : "auto" | "equal" | dict
|
|
635
712
|
Ensemble weighting. ``"auto"`` is inverse-variance (precision)
|
|
636
|
-
weighting from each method's
|
|
713
|
+
weighting from each method's placebo-null spread.
|
|
637
714
|
level : float
|
|
638
715
|
Confidence level for the intervals (e.g. 0.90).
|
|
639
|
-
|
|
640
|
-
|
|
716
|
+
max_placebo : int
|
|
717
|
+
Cap on the number of donor placebos used (sampled if exceeded).
|
|
718
|
+
seed : int
|
|
719
|
+
Seed for placebo sampling when ``max_placebo`` is exceeded.
|
|
641
720
|
|
|
642
721
|
Returns
|
|
643
722
|
-------
|
|
644
723
|
_EvalReport
|
|
645
724
|
With ``.summary()``, ``.plot(path)``, per-method results, and the
|
|
646
|
-
ensemble point estimate / interval / lift.
|
|
725
|
+
ensemble point estimate / interval / lift. ``exclude`` drops markets
|
|
726
|
+
from the control pool entirely.
|
|
647
727
|
"""
|
|
728
|
+
if exclude:
|
|
729
|
+
sub, ex = self._without(exclude)
|
|
730
|
+
tnames = self._names_of(treated)
|
|
731
|
+
bad = [n for n in tnames if n in ex]
|
|
732
|
+
if bad:
|
|
733
|
+
raise ValueError(f"treated markets were also excluded: {bad}")
|
|
734
|
+
return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
|
|
735
|
+
level=level, max_placebo=max_placebo, seed=seed)
|
|
648
736
|
idx = self._resolve(treated)
|
|
649
737
|
names = [self.names[i] for i in idx]
|
|
650
738
|
t0 = int(treat_start)
|
|
@@ -656,37 +744,66 @@ class GeoDesign:
|
|
|
656
744
|
if unknown:
|
|
657
745
|
raise ValueError(f"unknown methods {unknown}; choose from {_METHODS}")
|
|
658
746
|
|
|
659
|
-
|
|
660
|
-
"SC":
|
|
661
|
-
|
|
662
|
-
"
|
|
663
|
-
|
|
747
|
+
def _fit(method, tr):
|
|
748
|
+
if method == "SC":
|
|
749
|
+
return _panelkit.fit_sc(self.Y, tr, t0, 0.0, False, level)
|
|
750
|
+
if method == "ASC":
|
|
751
|
+
return _panelkit.fit_asc(self.Y, tr, t0, 0.0, None)
|
|
752
|
+
return _panelkit.fit_sdid(self.Y, tr, t0, 1.0)
|
|
753
|
+
|
|
754
|
+
treated_series = self.Y[idx].mean(axis=0)
|
|
755
|
+
post_len = self.t - t0
|
|
756
|
+
order = methods
|
|
757
|
+
|
|
758
|
+
# --- point estimates on the treated set ---
|
|
664
759
|
per = {}
|
|
665
760
|
for m in methods:
|
|
666
|
-
fit =
|
|
761
|
+
fit = _fit(m, idx)
|
|
667
762
|
att_path = np.asarray(fit.att_path, dtype=float)
|
|
668
763
|
cf = np.asarray(fit.counterfactual, dtype=float)
|
|
669
764
|
att = float(fit.att)
|
|
670
765
|
cf_mean = float(np.mean(cf)) if cf.size else float("nan")
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
766
|
+
# Full-timeline counterfactual via donor weights, centered on the
|
|
767
|
+
# pre-period so the gap reflects FIT, not a level offset (SDID matches
|
|
768
|
+
# trends, not levels).
|
|
769
|
+
dids = np.asarray(fit.donor_ids, dtype=int)
|
|
770
|
+
ws = np.asarray(fit.weights, dtype=float)
|
|
771
|
+
if dids.size:
|
|
772
|
+
full_cf = self.Y[dids].T @ ws
|
|
773
|
+
full_cf = full_cf + (treated_series[:t0].mean() - full_cf[:t0].mean())
|
|
774
|
+
else:
|
|
775
|
+
full_cf = np.full(self.t, np.nan)
|
|
674
776
|
per[m] = {
|
|
675
777
|
"att": att, "att_path": att_path, "counterfactual": cf,
|
|
676
|
-
"
|
|
677
|
-
"
|
|
678
|
-
"lift_lo": lo / cf_mean if cf_mean else float("nan"),
|
|
679
|
-
"lift_hi": hi / cf_mean if cf_mean else float("nan"),
|
|
778
|
+
"full_cf": full_cf, "cf_mean": cf_mean,
|
|
779
|
+
"lift": att / cf_mean if cf_mean else float("nan"),
|
|
680
780
|
"cumulative": float(att_path.sum()) * n_treated,
|
|
681
781
|
"pre_rmspe": float(fit.pre_rmspe),
|
|
682
782
|
}
|
|
683
783
|
|
|
684
|
-
#
|
|
685
|
-
|
|
784
|
+
# --- in-space placebo: refit each donor as if it were treated ---
|
|
785
|
+
treated_set = set(idx)
|
|
786
|
+
donors = [u for u in range(self.n) if u not in treated_set]
|
|
787
|
+
if len(donors) > int(max_placebo):
|
|
788
|
+
rng = np.random.default_rng(int(seed))
|
|
789
|
+
donors = sorted(int(j) for j in rng.choice(donors, int(max_placebo), replace=False))
|
|
790
|
+
pb = {m: [] for m in methods} # per method: list of (att_path, pre_rmspe)
|
|
791
|
+
for j in donors:
|
|
792
|
+
for m in methods:
|
|
793
|
+
fj = _fit(m, [j])
|
|
794
|
+
pb[m].append((np.asarray(fj.att_path, dtype=float), float(fj.pre_rmspe)))
|
|
795
|
+
|
|
796
|
+
# --- ensemble weights ---
|
|
797
|
+
def _placebo_att_sd(m):
|
|
798
|
+
if not pb[m]:
|
|
799
|
+
return 1.0
|
|
800
|
+
vals = np.array([p.mean() for (p, _) in pb[m]])
|
|
801
|
+
return float(np.std(vals)) if len(vals) > 1 else 1.0
|
|
686
802
|
if isinstance(weights, str) and weights.lower() == "equal":
|
|
687
803
|
wv = [1.0 / len(order)] * len(order)
|
|
688
804
|
elif isinstance(weights, str) and weights.lower() == "auto":
|
|
689
|
-
|
|
805
|
+
# inverse-variance from each method's placebo-null spread (precision)
|
|
806
|
+
prec = [1.0 / max(_placebo_att_sd(m) ** 2, 1e-300) for m in order]
|
|
690
807
|
s = sum(prec)
|
|
691
808
|
wv = [p / s for p in prec] if s > 0 else [1.0 / len(order)] * len(order)
|
|
692
809
|
elif isinstance(weights, dict):
|
|
@@ -702,33 +819,91 @@ class GeoDesign:
|
|
|
702
819
|
s = sum(raw)
|
|
703
820
|
wv = [r / s for r in raw]
|
|
704
821
|
wmap = dict(zip(order, wv))
|
|
705
|
-
|
|
822
|
+
a = (1.0 - float(level)) / 2.0
|
|
823
|
+
|
|
824
|
+
def _ci(point, null_samples):
|
|
825
|
+
"""Pivot CI: point estimate ± the placebo null spread (null ≈ 0)."""
|
|
826
|
+
if len(null_samples) >= 2:
|
|
827
|
+
return point + float(np.quantile(null_samples, a)), \
|
|
828
|
+
point + float(np.quantile(null_samples, 1.0 - a))
|
|
829
|
+
return point, point
|
|
830
|
+
|
|
831
|
+
# --- per-method point CIs from each method's placebo att spread ---
|
|
832
|
+
for m in order:
|
|
833
|
+
mp = np.array([p.mean() for (p, _) in pb[m]]) if pb[m] else np.array([])
|
|
834
|
+
lo, hi = _ci(per[m]["att"], mp)
|
|
835
|
+
cfm = per[m]["cf_mean"]
|
|
836
|
+
per[m]["att_lo"], per[m]["att_hi"] = lo, hi
|
|
837
|
+
per[m]["lift_lo"] = lo / cfm if cfm else float("nan")
|
|
838
|
+
per[m]["lift_hi"] = hi / cfm if cfm else float("nan")
|
|
839
|
+
|
|
840
|
+
# --- ensemble estimate + ensemble placebo paths (Abadie pre-fit filter) ---
|
|
706
841
|
ens_path = sum(wmap[m] * per[m]["att_path"] for m in order)
|
|
707
842
|
ens_cf_mean = float(sum(wmap[m] * per[m]["cf_mean"] for m in order))
|
|
708
843
|
ens_att = float(ens_path.mean())
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
844
|
+
treated_pre = sum(wmap[m] * per[m]["pre_rmspe"] for m in order)
|
|
845
|
+
|
|
846
|
+
ens_pb = [] # (path, pre_rmspe)
|
|
847
|
+
for di in range(len(donors)):
|
|
848
|
+
path = sum(wmap[m] * pb[m][di][0] for m in order)
|
|
849
|
+
pre = sum(wmap[m] * pb[m][di][1] for m in order)
|
|
850
|
+
ens_pb.append((path, pre))
|
|
851
|
+
kept = [p for (p, pre) in ens_pb if treated_pre <= 0 or pre <= 2.0 * treated_pre]
|
|
852
|
+
if len(kept) < 5: # too few comparable placebos → use all
|
|
853
|
+
kept = [p for (p, _) in ens_pb]
|
|
854
|
+
pb_mat = np.array(kept) if kept else np.zeros((0, post_len))
|
|
855
|
+
n_pb = pb_mat.shape[0]
|
|
856
|
+
|
|
857
|
+
# pointwise + cumulative + mean CIs, all from the placebo null
|
|
858
|
+
if n_pb >= 2:
|
|
859
|
+
point_lo = ens_path + np.quantile(pb_mat, a, axis=0)
|
|
860
|
+
point_hi = ens_path + np.quantile(pb_mat, 1.0 - a, axis=0)
|
|
861
|
+
point_hw = float(np.quantile(np.abs(pb_mat), float(level)))
|
|
862
|
+
cum_pb = np.cumsum(pb_mat, axis=1)
|
|
863
|
+
run = np.cumsum(ens_path)
|
|
864
|
+
cum_lo_band = np.quantile(cum_pb, a, axis=0)
|
|
865
|
+
cum_hi_band = np.quantile(cum_pb, 1.0 - a, axis=0)
|
|
866
|
+
pb_att = pb_mat.mean(axis=1)
|
|
867
|
+
p_value = float((1.0 + np.sum(np.abs(pb_att) >= abs(ens_att))) / (1.0 + n_pb))
|
|
868
|
+
else:
|
|
869
|
+
point_lo = point_hi = ens_path.copy()
|
|
870
|
+
point_hw = 0.0
|
|
871
|
+
run = np.cumsum(ens_path)
|
|
872
|
+
cum_lo_band = cum_hi_band = np.zeros(post_len)
|
|
873
|
+
pb_att = np.array([])
|
|
874
|
+
p_value = None
|
|
875
|
+
att_lo, att_hi = _ci(ens_att, pb_att)
|
|
876
|
+
|
|
877
|
+
cum_curve = run * n_treated
|
|
712
878
|
ensemble = {
|
|
713
|
-
"att": ens_att, "att_path": ens_path,
|
|
714
|
-
"att_lo":
|
|
879
|
+
"att": ens_att, "att_path": ens_path,
|
|
880
|
+
"att_lo": att_lo, "att_hi": att_hi,
|
|
715
881
|
"lift": ens_att / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
716
|
-
"lift_lo":
|
|
717
|
-
"lift_hi":
|
|
882
|
+
"lift_lo": att_lo / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
883
|
+
"lift_hi": att_hi / ens_cf_mean if ens_cf_mean else float("nan"),
|
|
718
884
|
"cumulative": float(ens_path.sum()) * n_treated,
|
|
719
|
-
"weights": wmap,
|
|
885
|
+
"weights": wmap, "n_placebo": n_pb,
|
|
720
886
|
}
|
|
721
887
|
|
|
722
|
-
#
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
888
|
+
# full-timeline counterfactual + gap path (pre shows fit; post = effect)
|
|
889
|
+
ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
|
|
890
|
+
full_gap = treated_series - ens_full_cf
|
|
891
|
+
full_gap[t0:] = ens_path
|
|
892
|
+
counterfactual = treated_series - full_gap
|
|
893
|
+
ensemble["full_gap"] = full_gap
|
|
894
|
+
ensemble["sigma_pre"] = (float(np.std(full_gap[:t0], ddof=1)) if t0 > 1
|
|
895
|
+
else float(np.std(full_gap[:t0])))
|
|
896
|
+
ensemble["point_hw"] = point_hw
|
|
897
|
+
ensemble["point_lo"] = point_lo
|
|
898
|
+
ensemble["point_hi"] = point_hi
|
|
899
|
+
ensemble["cum_curve"] = cum_curve
|
|
900
|
+
ensemble["cum_lo_curve"] = (run + cum_lo_band) * n_treated
|
|
901
|
+
ensemble["cum_hi_curve"] = (run + cum_hi_band) * n_treated
|
|
902
|
+
ensemble["cum_lo"] = float(ensemble["cum_lo_curve"][-1]) if post_len else float("nan")
|
|
903
|
+
ensemble["cum_hi"] = float(ensemble["cum_hi_curve"][-1]) if post_len else float("nan")
|
|
904
|
+
|
|
730
905
|
return _EvalReport(names, t0, n_treated, per, ensemble, p_value, level,
|
|
731
|
-
treated_series,
|
|
906
|
+
treated_series, counterfactual)
|
|
732
907
|
|
|
733
908
|
|
|
734
909
|
class _ScenarioGrid:
|
|
@@ -924,15 +1099,26 @@ class _EvalReport:
|
|
|
924
1099
|
"interval includes zero.")
|
|
925
1100
|
lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
|
|
926
1101
|
f"{e['cumulative']:,.0f} cumulative incremental")
|
|
1102
|
+
if "cum_lo" in e:
|
|
1103
|
+
lines.append(f"Cumulative {cl}% CI : "
|
|
1104
|
+
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
|
|
1105
|
+
f"(in-space placebo, {e.get('n_placebo', 0)} donors)")
|
|
927
1106
|
lines.append(verdict)
|
|
928
1107
|
lines.append("=" * 66)
|
|
929
1108
|
return "\n".join(lines)
|
|
930
1109
|
|
|
931
1110
|
def plot(self, path: str | None = None):
|
|
932
|
-
"""Render the evaluation figure (observed vs counterfactual, effect path
|
|
933
|
-
and a lift-by-method bar). Returns the matplotlib Figure."""
|
|
1111
|
+
"""Render the evaluation figure (observed vs counterfactual, effect path
|
|
1112
|
+
with CI band, and a lift-by-method bar). Returns the matplotlib Figure."""
|
|
934
1113
|
return _plot_eval(self, path)
|
|
935
1114
|
|
|
1115
|
+
def plot_effect_over_time(self, path: str | None = None):
|
|
1116
|
+
"""Render the effect-over-time figure: the **pointwise** effect across the
|
|
1117
|
+
full timeline (pre-period included, as a placebo check) and the running
|
|
1118
|
+
**cumulative** incremental, each as a point estimate with a confidence
|
|
1119
|
+
band. Returns the matplotlib Figure."""
|
|
1120
|
+
return _plot_eval_timeline(self, path)
|
|
1121
|
+
|
|
936
1122
|
def __repr__(self):
|
|
937
1123
|
sig = "sig" if self.significant else "ns"
|
|
938
1124
|
return (f"EvalReport(lift={100*self.lift:+.2f}%, "
|
|
@@ -1362,13 +1548,18 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1362
1548
|
ax.grid(True, alpha=0.25)
|
|
1363
1549
|
ax.legend(loc="best", framealpha=0.9, fontsize=9)
|
|
1364
1550
|
|
|
1365
|
-
# ---- B: effect path over the post-period (ensemble + per method).
|
|
1551
|
+
# ---- B: effect path over the post-period (ensemble + per method) + CI band.
|
|
1366
1552
|
axb = fig.add_subplot(gs[1, 0])
|
|
1367
1553
|
for m, r in rep.per.items():
|
|
1368
1554
|
axb.plot(post, r["att_path"], color=_METHOD_COLORS.get(m, _PK_GREY),
|
|
1369
1555
|
lw=1.3, alpha=0.7, label=m)
|
|
1370
|
-
|
|
1371
|
-
|
|
1556
|
+
ens_post = rep.ensemble["att_path"]
|
|
1557
|
+
p_lo = rep.ensemble.get("point_lo")
|
|
1558
|
+
p_hi = rep.ensemble.get("point_hi")
|
|
1559
|
+
if p_lo is not None:
|
|
1560
|
+
axb.fill_between(post, p_lo, p_hi, color=_PK_PURPLE, alpha=0.18,
|
|
1561
|
+
label=f"ensemble {int(round(100*rep.level))}% band")
|
|
1562
|
+
axb.plot(post, ens_post, color=_PK_PURPLE, lw=2.6, label="ENSEMBLE")
|
|
1372
1563
|
axb.axhline(0, color="#111827", lw=1.0)
|
|
1373
1564
|
axb.set_title("Effect over time (per-period ATT)", fontweight="bold")
|
|
1374
1565
|
axb.set_xlabel("period")
|
|
@@ -1405,3 +1596,82 @@ def _plot_eval(rep: "_EvalReport", path):
|
|
|
1405
1596
|
if path:
|
|
1406
1597
|
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
1407
1598
|
return fig
|
|
1599
|
+
|
|
1600
|
+
|
|
1601
|
+
def _plot_eval_timeline(rep: "_EvalReport", path):
|
|
1602
|
+
"""Pointwise + cumulative effect over the full timeline, with CI bands.
|
|
1603
|
+
|
|
1604
|
+
Bands come from the in-space placebo distribution (every donor refit as if
|
|
1605
|
+
treated): the pointwise band is the per-period placebo spread around the
|
|
1606
|
+
estimate; the cumulative band grows with horizon as the placebo
|
|
1607
|
+
cumulative-sums spread out."""
|
|
1608
|
+
_, plt = _require_mpl()
|
|
1609
|
+
import numpy as _np
|
|
1610
|
+
from matplotlib.gridspec import GridSpec
|
|
1611
|
+
|
|
1612
|
+
T = len(rep.treated_series)
|
|
1613
|
+
t0 = rep.t0
|
|
1614
|
+
e = rep.ensemble
|
|
1615
|
+
x = _np.arange(T)
|
|
1616
|
+
seg = x[t0:]
|
|
1617
|
+
gap = _np.asarray(e["full_gap"], dtype=float)
|
|
1618
|
+
hw = e.get("point_hw", 0.0)
|
|
1619
|
+
cl = int(round(100 * rep.level))
|
|
1620
|
+
|
|
1621
|
+
plt.rcParams.update({"font.size": 11, "axes.titlesize": 12})
|
|
1622
|
+
fig = plt.figure(figsize=(12, 7.8))
|
|
1623
|
+
fig.patch.set_facecolor("white")
|
|
1624
|
+
gs = GridSpec(2, 1, figure=fig, height_ratios=[1.0, 1.0], hspace=0.32)
|
|
1625
|
+
|
|
1626
|
+
# ---- Top: pointwise effect (treated − counterfactual), full timeline. ----
|
|
1627
|
+
ax = fig.add_subplot(gs[0])
|
|
1628
|
+
ax.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
|
|
1629
|
+
# Constant placebo band across the whole timeline (the pre-period sits inside
|
|
1630
|
+
# it as a fit/placebo check); the per-period CI on the post effect is shown
|
|
1631
|
+
# as a tighter band around the estimate.
|
|
1632
|
+
ax.fill_between(x, gap - hw, gap + hw, color=_PK_PURPLE, alpha=0.12,
|
|
1633
|
+
label=f"{cl}% placebo band")
|
|
1634
|
+
ax.fill_between(seg, e["point_lo"], e["point_hi"], color=_PK_PURPLE, alpha=0.22)
|
|
1635
|
+
ax.plot(x, gap, color=_PK_PURPLE, lw=2.0, label="pointwise effect")
|
|
1636
|
+
ax.axhline(0, color="#111827", lw=1.0)
|
|
1637
|
+
ax.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
|
|
1638
|
+
ax.annotate("pre-period (placebo)", (t0 / 2, ax.get_ylim()[1]), ha="center",
|
|
1639
|
+
va="top", color="#6b7280", fontsize=9)
|
|
1640
|
+
ax.annotate("test window", (t0 + (T - t0) / 2, ax.get_ylim()[1]), ha="center",
|
|
1641
|
+
va="top", color="#6b21a8", fontsize=9)
|
|
1642
|
+
ax.set_title("Pointwise effect over time (treated − counterfactual)",
|
|
1643
|
+
fontweight="bold")
|
|
1644
|
+
ax.set_xlabel("period")
|
|
1645
|
+
ax.set_ylabel("per-period effect")
|
|
1646
|
+
ax.grid(True, alpha=0.25)
|
|
1647
|
+
ax.legend(loc="upper left", framealpha=0.9, fontsize=9)
|
|
1648
|
+
|
|
1649
|
+
# ---- Bottom: cumulative incremental over the test window (×n_treated). ----
|
|
1650
|
+
axc = fig.add_subplot(gs[1])
|
|
1651
|
+
cum = e["cum_curve"]
|
|
1652
|
+
axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
|
|
1653
|
+
axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
|
|
1654
|
+
alpha=0.15, label=f"{cl}% band (in-space placebo)")
|
|
1655
|
+
axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
|
|
1656
|
+
axc.axhline(0, color="#111827", lw=1.0)
|
|
1657
|
+
axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
|
|
1658
|
+
final = cum[-1]
|
|
1659
|
+
axc.annotate(f"{final:,.0f}\n[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}]",
|
|
1660
|
+
(T - 1, final), textcoords="offset points", xytext=(-6, 0),
|
|
1661
|
+
ha="right", va="center", fontweight="bold", color="#065f46", fontsize=9)
|
|
1662
|
+
axc.set_title("Cumulative incremental effect over the test window",
|
|
1663
|
+
fontweight="bold")
|
|
1664
|
+
axc.set_xlabel("period")
|
|
1665
|
+
axc.set_ylabel("cumulative incremental")
|
|
1666
|
+
axc.set_xlim(-0.5, T - 0.5)
|
|
1667
|
+
axc.grid(True, alpha=0.25)
|
|
1668
|
+
axc.legend(loc="upper left", framealpha=0.9, fontsize=9)
|
|
1669
|
+
|
|
1670
|
+
fig.suptitle(f"panelkit · effect over time — ensemble "
|
|
1671
|
+
f"{100*rep.ensemble['lift']:+.2f}% lift, "
|
|
1672
|
+
f"{rep.ensemble['cumulative']:,.0f} cumulative "
|
|
1673
|
+
f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}]",
|
|
1674
|
+
fontsize=14, fontweight="bold", x=0.012, ha="left")
|
|
1675
|
+
if path:
|
|
1676
|
+
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
1677
|
+
return fig
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|