panelkit 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {panelkit-0.2.2 → panelkit-0.2.3}/Cargo.lock +5 -5
  2. {panelkit-0.2.2 → panelkit-0.2.3}/Cargo.toml +1 -1
  3. {panelkit-0.2.2 → panelkit-0.2.3}/GUIDE.md +27 -2
  4. {panelkit-0.2.2 → panelkit-0.2.3}/PKG-INFO +18 -1
  5. {panelkit-0.2.2 → panelkit-0.2.3}/README.md +17 -0
  6. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/selection.rs +68 -20
  7. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/tests/geo.rs +10 -0
  8. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/api_geo.rs +3 -1
  9. {panelkit-0.2.2 → panelkit-0.2.3}/pyproject.toml +1 -1
  10. {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/_panelkit.pyi +1 -0
  11. {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/design.py +269 -18
  12. {panelkit-0.2.2 → panelkit-0.2.3}/BENCHMARKS.md +0 -0
  13. {panelkit-0.2.2 → panelkit-0.2.3}/LICENSE-APACHE +0 -0
  14. {panelkit-0.2.2 → panelkit-0.2.3}/LICENSE-MIT +0 -0
  15. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/Cargo.toml +0 -0
  16. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/benches/estimators.rs +0 -0
  17. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/bacon.rs +0 -0
  18. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/callaway.rs +0 -0
  19. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/mod.rs +0 -0
  20. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/sunab.rs +0 -0
  21. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/did/twfe.rs +0 -0
  22. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/fe/mod.rs +0 -0
  23. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/fe/within.rs +0 -0
  24. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/lib.rs +0 -0
  25. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/mcnnm/mod.rs +0 -0
  26. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/mcnnm/softimpute.rs +0 -0
  27. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/panel.rs +0 -0
  28. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/result.rs +0 -0
  29. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/augmented.rs +0 -0
  30. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/cpasc.rs +0 -0
  31. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/mod.rs +0 -0
  32. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/sdid.rs +0 -0
  33. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/src/sc/synthetic.rs +0 -0
  34. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/cpasc.rs +0 -0
  35. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/did.rs +0 -0
  36. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/sc.rs +0 -0
  37. {panelkit-0.2.2 → panelkit-0.2.3}/crates/estimators/tests/sc_family.rs +0 -0
  38. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/Cargo.toml +0 -0
  39. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/diagnostics.rs +0 -0
  40. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/lib.rs +0 -0
  41. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/power.rs +0 -0
  42. {panelkit-0.2.2 → panelkit-0.2.3}/crates/geo/src/types.rs +0 -0
  43. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/Cargo.toml +0 -0
  44. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/batch.rs +0 -0
  45. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/bootstrap.rs +0 -0
  46. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/ci.rs +0 -0
  47. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/lib.rs +0 -0
  48. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/parallel.rs +0 -0
  49. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/src/placebo.rs +0 -0
  50. {panelkit-0.2.2 → panelkit-0.2.3}/crates/inference/tests/inference.rs +0 -0
  51. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/Cargo.toml +0 -0
  52. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/error.rs +0 -0
  53. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/cholesky.rs +0 -0
  54. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/eig_sym.rs +0 -0
  55. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/mod.rs +0 -0
  56. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/qr.rs +0 -0
  57. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/randomized.rs +0 -0
  58. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/svd.rs +0 -0
  59. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/factor/svd_gram.rs +0 -0
  60. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/lib.rs +0 -0
  61. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/matrix.rs +0 -0
  62. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/matmul.rs +0 -0
  63. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/mod.rs +0 -0
  64. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/norms.rs +0 -0
  65. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/ops/transform.rs +0 -0
  66. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/opt/mod.rs +0 -0
  67. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/opt/simplex.rs +0 -0
  68. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/opt/softthresh.rs +0 -0
  69. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/rng.rs +0 -0
  70. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/solve/lstsq.rs +0 -0
  71. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/solve/mod.rs +0 -0
  72. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/src/solve/spd.rs +0 -0
  73. {panelkit-0.2.2 → panelkit-0.2.3}/crates/linalg/tests/numerics.rs +0 -0
  74. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/Cargo.toml +0 -0
  75. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/api_did.rs +0 -0
  76. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/api_sc.rs +0 -0
  77. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/convert.rs +0 -0
  78. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/lib.rs +0 -0
  79. {panelkit-0.2.2 → panelkit-0.2.3}/crates/pypanelkit/src/results.rs +0 -0
  80. {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/__init__.py +0 -0
  81. {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/estimators.py +0 -0
  82. {panelkit-0.2.2 → panelkit-0.2.3}/python/panelkit/py.typed +0 -0
@@ -462,7 +462,7 @@ checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
462
462
 
463
463
  [[package]]
464
464
  name = "panelkit-estimators"
465
- version = "0.2.2"
465
+ version = "0.2.3"
466
466
  dependencies = [
467
467
  "criterion",
468
468
  "panelkit-linalg",
@@ -471,7 +471,7 @@ dependencies = [
471
471
 
472
472
  [[package]]
473
473
  name = "panelkit-geo"
474
- version = "0.2.2"
474
+ version = "0.2.3"
475
475
  dependencies = [
476
476
  "panelkit-estimators",
477
477
  "panelkit-inference",
@@ -482,7 +482,7 @@ dependencies = [
482
482
 
483
483
  [[package]]
484
484
  name = "panelkit-inference"
485
- version = "0.2.2"
485
+ version = "0.2.3"
486
486
  dependencies = [
487
487
  "panelkit-estimators",
488
488
  "panelkit-linalg",
@@ -491,7 +491,7 @@ dependencies = [
491
491
 
492
492
  [[package]]
493
493
  name = "panelkit-linalg"
494
- version = "0.2.2"
494
+ version = "0.2.3"
495
495
  dependencies = [
496
496
  "proptest",
497
497
  "rayon",
@@ -623,7 +623,7 @@ dependencies = [
623
623
 
624
624
  [[package]]
625
625
  name = "pypanelkit"
626
- version = "0.2.2"
626
+ version = "0.2.3"
627
627
  dependencies = [
628
628
  "numpy",
629
629
  "panelkit-estimators",
@@ -3,7 +3,7 @@ resolver = "2"
3
3
  members = ["crates/linalg", "crates/estimators", "crates/inference", "crates/geo", "crates/pypanelkit"]
4
4
 
5
5
  [workspace.package]
6
- version = "0.2.2"
6
+ version = "0.2.3"
7
7
  edition = "2021"
8
8
  rust-version = "1.74"
9
9
  license = "MIT OR Apache-2.0"
@@ -294,8 +294,9 @@ weighted-average **ensemble** estimate.
294
294
 
295
295
  ```python
296
296
  ev = design.evaluate(treated=["chicago", "denver"], treat_start=52, level=0.90)
297
- print(ev.summary()) # per-method + ensemble lift, CI, cumulative
298
- ev.plot("evaluate.png") # observed-vs-counterfactual, effect path, lift bar
297
+ print(ev.summary()) # per-method + ensemble lift, CI, cumulative
298
+ ev.plot("evaluate.png") # observed-vs-cf, effect path (CI band), lift bar
299
+ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
299
300
  ev.lift, ev.cumulative, ev.significant
300
301
  ```
301
302
 
@@ -308,6 +309,20 @@ and the ensemble in `ev.ensemble`. Reported numbers: **% lift** (effect ÷
308
309
  counterfactual), **per-period ATT**, and **cumulative incremental** over the
309
310
  window (summed across treated markets).
310
311
 
312
+ **Effect over time** (`ev.plot_effect_over_time(...)`) gives the event-study view:
313
+ the **pointwise** effect across the full timeline — *including the pre-period*, so
314
+ you can see it sits flat (centered on zero) inside the noise band before the test
315
+ starts (a placebo check) and breaks out after — and the running **cumulative
316
+ incremental**, each as a point estimate with a confidence band. The counterfactual
317
+ is centered on the pre-period, so the gap shows fit quality rather than a level
318
+ offset (SDID matches trends, not levels). The bands come from a **moving-block
319
+ bootstrap** of the pre-period residuals: resampling whole blocks preserves their
320
+ autocorrelation, so the intervals are more conservative than an iid normal
321
+ approximation — the cumulative band in particular widens faster than √k when the
322
+ residuals are positively autocorrelated. Raise `block_len` to capture longer-range
323
+ dependence (wider, more conservative cumulative bands). Pass `exclude=[…]` to drop
324
+ markets from the control pool (e.g. ones you don't trust as donors).
325
+
311
326
  ### Choosing a specification — `design.recommend(test_lengths, n_geos_options, target_lift, alphas=…)`
312
327
 
313
328
  Sweeps designs across **test length × number of geos × alpha** and recommends the
@@ -345,6 +360,16 @@ Searches candidate treatment-market sets and ranks them by power, MDE, pre-fit,
345
360
  holdout, and confidence. Pass `eligible=[…]` to restrict to markets you can
346
361
  actually run in.
347
362
 
363
+ Two real-world controls for *which* markets the search may use:
364
+
365
+ - **`include=[…]`** — force specific markets into **every** candidate treatment
366
+ set (must-treat markets, e.g. a flagship region you've already committed to).
367
+ The search fills the remaining slots from `eligible`, up to `max_treated`.
368
+ - **`exclude=[…]`** — drop markets **entirely**: they're never treated *and*
369
+ never used as a donor/control (e.g. a market with contaminated data or its own
370
+ concurrent campaign). `exclude` is also accepted by `power()`, `diagnose()`,
371
+ `evaluate()`, and `recommend()` to keep a market out of the control pool.
372
+
348
373
  ### Multi-cell tests — `design.multi_cell(cells, test_len, …)`
349
374
 
350
375
  Often you run several treatment cells at once — different creatives, budgets, or
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: panelkit
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Topic :: Scientific/Engineering
@@ -231,10 +231,15 @@ mc = design.multi_cell(cells={"west": ["los_angeles", "san_diego"],
231
231
  print(mc.summary()) # per-cell MDE / confidence / holdout
232
232
  mc.plot("multicell.png") # the multi-cell figure below
233
233
 
234
+ # pin in must-have markets, drop ones you don't trust:
235
+ ranked = design.select_markets(test_len=8, target_lift=0.05, max_treated=3,
236
+ include=["chicago"], exclude=["miami"])
237
+
234
238
  # already ran the test? measure it (SC/ASC/SDID + a weighted-average ensemble):
235
239
  ev = design.evaluate(treated=["chicago", "denver"], treat_start=52)
236
240
  print(ev.summary()) # per-method + ensemble lift, CI, cumulative
237
241
  ev.plot("evaluate.png") # observed vs counterfactual + lift-by-method
242
+ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
238
243
 
239
244
  # or sweep specifications (length × #geos × significance) and recommend one:
240
245
  grid = design.recommend(test_lengths=[4, 6, 8, 12], n_geos_options=[3, 5, 10, 20],
@@ -273,6 +278,18 @@ with an SC in-space placebo p-value:
273
278
 
274
279
  ![test evaluation](assets/geo_evaluate.png)
275
280
 
281
+ And the **effect over time** — the pointwise effect across the full timeline
282
+ (pre-period included, so you can see it sit flat in the noise band before the test
283
+ and break out after) plus the running cumulative incremental, each as a point
284
+ estimate with a confidence band:
285
+
286
+ ![effect over time](assets/geo_effect_over_time.png)
287
+
288
+ **Pin in / drop markets.** `select_markets`/`recommend` take `include=[…]`
289
+ (force must-treat markets into every candidate) and `exclude=[…]` (drop markets
290
+ entirely — never treated, never a control). `exclude` is also accepted by
291
+ `power`, `diagnose`, and `evaluate` to keep a market out of the donor pool.
292
+
276
293
  **Messy DataFrame? No problem.** `from_long` coerces real-world data: outcome
277
294
  strings → numeric (with a clear error on genuinely non-numeric values), dates
278
295
  (string or unsorted) → chronological columns, locations → market names, duplicate
@@ -201,10 +201,15 @@ mc = design.multi_cell(cells={"west": ["los_angeles", "san_diego"],
201
201
  print(mc.summary()) # per-cell MDE / confidence / holdout
202
202
  mc.plot("multicell.png") # the multi-cell figure below
203
203
 
204
+ # pin in must-have markets, drop ones you don't trust:
205
+ ranked = design.select_markets(test_len=8, target_lift=0.05, max_treated=3,
206
+ include=["chicago"], exclude=["miami"])
207
+
204
208
  # already ran the test? measure it (SC/ASC/SDID + a weighted-average ensemble):
205
209
  ev = design.evaluate(treated=["chicago", "denver"], treat_start=52)
206
210
  print(ev.summary()) # per-method + ensemble lift, CI, cumulative
207
211
  ev.plot("evaluate.png") # observed vs counterfactual + lift-by-method
212
+ ev.plot_effect_over_time("effect.png") # pointwise + cumulative over time, w/ CIs
208
213
 
209
214
  # or sweep specifications (length × #geos × significance) and recommend one:
210
215
  grid = design.recommend(test_lengths=[4, 6, 8, 12], n_geos_options=[3, 5, 10, 20],
@@ -243,6 +248,18 @@ with an SC in-space placebo p-value:
243
248
 
244
249
  ![test evaluation](assets/geo_evaluate.png)
245
250
 
251
+ And the **effect over time** — the pointwise effect across the full timeline
252
+ (pre-period included, so you can see it sit flat in the noise band before the test
253
+ and break out after) plus the running cumulative incremental, each as a point
254
+ estimate with a confidence band:
255
+
256
+ ![effect over time](assets/geo_effect_over_time.png)
257
+
258
+ **Pin in / drop markets.** `select_markets`/`recommend` take `include=[…]`
259
+ (force must-treat markets into every candidate) and `exclude=[…]` (drop markets
260
+ entirely — never treated, never a control). `exclude` is also accepted by
261
+ `power`, `diagnose`, and `evaluate` to keep a market out of the donor pool.
262
+
246
263
  **Messy DataFrame? No problem.** `from_long` coerces real-world data: outcome
247
264
  strings → numeric (with a clear error on genuinely non-numeric values), dates
248
265
  (string or unsorted) → chronological columns, locations → market names, duplicate
@@ -34,7 +34,10 @@ pub struct MarketCandidate {
34
34
  pub struct SelectConfig {
35
35
  /// Units eligible to be treated (e.g. markets you could actually run in).
36
36
  pub eligible: Vec<usize>,
37
- /// Maximum number of treated markets in a candidate set.
37
+ /// Units **forced into every** candidate treatment set (must-treat markets).
38
+ /// The search fills the remaining slots from `eligible`. Empty = no forcing.
39
+ pub include: Vec<usize>,
40
+ /// Maximum number of treated markets in a candidate set (counts `include`).
38
41
  pub max_treated: usize,
39
42
  pub test_len: usize,
40
43
  /// The lift you care about detecting (fraction, e.g. 0.05 = 5%).
@@ -95,46 +98,91 @@ pub fn evaluate(y: &Mat, treated: &[usize], cfg: &SelectConfig) -> MarketCandida
95
98
  }
96
99
  }
97
100
 
98
- /// Build the candidate list. With `exact_size = Some(k)`, every candidate has
99
- /// exactly `k` markets; otherwise it's every singleton plus sampled subsets of
100
- /// size 2..=max_treated.
101
+ /// Build the candidate list. Every candidate always contains the forced
102
+ /// `include` markets; the remaining slots are drawn from `eligible` (minus the
103
+ /// forced ones). With `exact_size = Some(k)`, every candidate has exactly `k`
104
+ /// markets total; otherwise it's the forced set plus each single extra market
105
+ /// plus sampled larger subsets up to `max_treated`.
101
106
  fn candidate_sets(cfg: &SelectConfig) -> Vec<Vec<usize>> {
102
107
  let mut rng = Xoshiro256pp::seed_from_u64(cfg.seed);
103
- let mut seen = std::collections::HashSet::new();
108
+ let mut seen: std::collections::HashSet<Vec<usize>> = std::collections::HashSet::new();
104
109
  let mut sets: Vec<Vec<usize>> = Vec::new();
105
110
 
106
- if let Some(k) = cfg.exact_size {
107
- let k = k.min(cfg.eligible.len()).max(1);
108
- if k == 1 {
109
- return cfg.eligible.iter().map(|&u| vec![u]).collect();
111
+ // Forced (must-treat) markets, de-duplicated, and the pool of extra picks.
112
+ let mut forced: Vec<usize> = cfg.include.clone();
113
+ forced.sort_unstable();
114
+ forced.dedup();
115
+ let forced_set: std::collections::HashSet<usize> = forced.iter().copied().collect();
116
+ let extra_pool: Vec<usize> = cfg
117
+ .eligible
118
+ .iter()
119
+ .copied()
120
+ .filter(|u| !forced_set.contains(u))
121
+ .collect();
122
+
123
+ if let Some(k0) = cfg.exact_size {
124
+ let k = k0.max(1);
125
+ let need = k.saturating_sub(forced.len());
126
+ if need == 0 {
127
+ // The forced set already fills the requested size.
128
+ if !forced.is_empty() {
129
+ sets.push(forced.clone());
130
+ }
131
+ return sets;
132
+ }
133
+ if need == 1 {
134
+ // Deterministic: forced + each eligible single (preserves the old
135
+ // "all singletons" behavior when nothing is forced and k == 1).
136
+ for &u in &extra_pool {
137
+ let mut pick = forced.clone();
138
+ pick.push(u);
139
+ pick.sort_unstable();
140
+ if seen.insert(pick.clone()) {
141
+ sets.push(pick);
142
+ }
143
+ }
144
+ return sets;
110
145
  }
111
146
  let mut attempts = 0;
112
147
  while sets.len() < cfg.n_candidates && attempts < cfg.n_candidates * 40 {
113
148
  attempts += 1;
114
- let mut pool = cfg.eligible.clone();
149
+ let mut pool = extra_pool.clone();
115
150
  rng.shuffle(&mut pool);
116
- let mut pick: Vec<usize> = pool.into_iter().take(k).collect();
151
+ let mut pick: Vec<usize> = forced.clone();
152
+ pick.extend(pool.into_iter().take(need));
117
153
  pick.sort_unstable();
118
- if seen.insert(pick.clone()) {
154
+ if pick.len() == k && seen.insert(pick.clone()) {
119
155
  sets.push(pick);
120
156
  }
121
157
  }
122
158
  return sets;
123
159
  }
124
160
 
125
- // Mixed-size search: all singletons + sampled subsets of size 2..=max_treated.
126
- sets = cfg.eligible.iter().map(|&u| vec![u]).collect();
127
- if cfg.max_treated >= 2 && cfg.eligible.len() >= 2 {
128
- for s in &sets {
129
- seen.insert(s.clone());
161
+ // Mixed-size search. Extra slots available on top of the forced set.
162
+ let budget = cfg.max_treated.saturating_sub(forced.len());
163
+ if !forced.is_empty() {
164
+ seen.insert(forced.clone());
165
+ sets.push(forced.clone());
166
+ }
167
+ if budget >= 1 {
168
+ for &u in &extra_pool {
169
+ let mut pick = forced.clone();
170
+ pick.push(u);
171
+ pick.sort_unstable();
172
+ if seen.insert(pick.clone()) {
173
+ sets.push(pick);
174
+ }
130
175
  }
176
+ }
177
+ if budget >= 2 && extra_pool.len() >= 2 {
131
178
  let mut attempts = 0;
132
179
  while sets.len() < cfg.n_candidates && attempts < cfg.n_candidates * 20 {
133
180
  attempts += 1;
134
- let size = 2 + rng.gen_range(cfg.max_treated - 1); // 2..=max_treated
135
- let mut pool = cfg.eligible.clone();
181
+ let extra = 2 + rng.gen_range(budget - 1); // 2..=budget extra markets
182
+ let mut pool = extra_pool.clone();
136
183
  rng.shuffle(&mut pool);
137
- let mut pick: Vec<usize> = pool.into_iter().take(size).collect();
184
+ let mut pick: Vec<usize> = forced.clone();
185
+ pick.extend(pool.into_iter().take(extra));
138
186
  pick.sort_unstable();
139
187
  if seen.insert(pick.clone()) {
140
188
  sets.push(pick);
@@ -114,6 +114,7 @@ fn market_selection_ranks_candidates() {
114
114
  let y = geo_panel(12, 60, 5);
115
115
  let cfg = SelectConfig {
116
116
  eligible: (0..12).collect(),
117
+ include: vec![],
117
118
  max_treated: 3,
118
119
  test_len: 10,
119
120
  target_lift: 0.10,
@@ -139,6 +140,15 @@ fn market_selection_ranks_candidates() {
139
140
  };
140
141
  let ranked2 = select_markets(&y, &cfg2);
141
142
  assert!(ranked2.iter().all(|c| c.treated.len() == 2));
143
+ // include: market 5 is forced into every candidate set.
144
+ let cfg3 = SelectConfig {
145
+ include: vec![5],
146
+ ..cfg.clone()
147
+ };
148
+ let ranked3 = select_markets(&y, &cfg3);
149
+ assert!(!ranked3.is_empty());
150
+ assert!(ranked3.iter().all(|c| c.treated.contains(&5)));
151
+ assert!(ranked3.iter().all(|c| c.treated.len() <= 3));
142
152
  // Every candidate has a valid holdout and confidence.
143
153
  for c in &ranked {
144
154
  assert!(c.holdout_pct > 0.0 && c.holdout_pct < 1.0);
@@ -169,7 +169,7 @@ pub fn geo_diagnostics(
169
169
 
170
170
  /// Search and rank candidate treatment-market sets.
171
171
  #[pyfunction]
172
- #[pyo3(signature = (y, eligible, max_treated, test_len, target_lift, method="sdid", alpha=0.1, target_power=0.8, min_pre=0, n_candidates=200, seed=0, exact_size=None, lookback=None))]
172
+ #[pyo3(signature = (y, eligible, max_treated, test_len, target_lift, method="sdid", alpha=0.1, target_power=0.8, min_pre=0, n_candidates=200, seed=0, exact_size=None, lookback=None, include=None))]
173
173
  #[allow(clippy::too_many_arguments)]
174
174
  pub fn geo_select(
175
175
  py: Python<'_>,
@@ -186,6 +186,7 @@ pub fn geo_select(
186
186
  seed: u64,
187
187
  exact_size: Option<usize>,
188
188
  lookback: Option<usize>,
189
+ include: Option<Vec<usize>>,
189
190
  ) -> PyResult<Vec<PyMarketCandidate>> {
190
191
  let m = parse_method(method)?;
191
192
  let mat = mat_from_numpy(&y);
@@ -196,6 +197,7 @@ pub fn geo_select(
196
197
  };
197
198
  let cfg = SelectConfig {
198
199
  eligible,
200
+ include: include.unwrap_or_default(),
199
201
  max_treated,
200
202
  test_len,
201
203
  target_lift,
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "panelkit"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "Fast, from-scratch causal-inference estimators for panel/geo experiments (SC, ASC, SDID, DiD, MC-NNM)."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -191,6 +191,7 @@ def geo_select(
191
191
  seed: int = ...,
192
192
  exact_size: Optional[int] = ...,
193
193
  lookback: Optional[int] = ...,
194
+ include: Optional[Sequence[int]] = ...,
194
195
  ) -> list[MarketCandidate]: ...
195
196
  def fit_callaway_py(
196
197
  y: npt.NDArray[np.float64],
@@ -52,6 +52,26 @@ def _ensemble_weight_arg(spec):
52
52
  return w
53
53
 
54
54
 
55
+ def _placebo_paths(pre_gaps, length, block_len, n_reps, seed):
56
+ """Moving-block bootstrap of the (centered) pre-period residuals into placebo
57
+ paths of ``length`` periods. Resampling whole blocks preserves the residual
58
+ autocorrelation, so the resulting CI bands are more conservative than an iid
59
+ normal approximation. Returns an ``(n_reps, length)`` array (empty if no
60
+ pre-period or zero length)."""
61
+ g = np.asarray(pre_gaps, dtype=float)
62
+ m = len(g)
63
+ if m == 0 or length <= 0 or n_reps <= 0:
64
+ return np.empty((0, max(length, 0)))
65
+ g = g - g.mean() # null is "no effect" → center the residuals
66
+ rng = np.random.default_rng(int(seed))
67
+ bl = max(1, min(int(block_len), m))
68
+ n_blocks = int(np.ceil(length / bl))
69
+ starts = rng.integers(0, m, size=(n_reps, n_blocks))
70
+ idx = (starts[:, :, None] + np.arange(bl)[None, None, :]) % m # circular blocks
71
+ paths = g[idx].reshape(n_reps, n_blocks * bl)[:, :length]
72
+ return paths
73
+
74
+
55
75
  class _PowerReport:
56
76
  """Result of a power analysis across methods, with a report and plots."""
57
77
 
@@ -352,6 +372,22 @@ class GeoDesign:
352
372
  out.append(self._index[m])
353
373
  return out
354
374
 
375
+ def _names_of(self, markets) -> list:
376
+ """Resolve markets (names or indices) to their string names."""
377
+ return [self.names[i] for i in self._resolve(markets)]
378
+
379
+ def _without(self, exclude):
380
+ """Return ``(sub_design, excluded_name_set)`` with the excluded markets
381
+ dropped entirely (so they're neither treated nor used as controls). Names
382
+ are preserved, so callers can pass markets to the sub-design by name."""
383
+ ex = set(self._names_of(exclude)) if exclude else set()
384
+ if not ex:
385
+ return self, ex
386
+ keep = [i for i in range(self.n) if self.names[i] not in ex]
387
+ if not keep:
388
+ raise ValueError("exclude removes every market — nothing left to analyze")
389
+ return GeoDesign(self.Y[keep], names=[self.names[i] for i in keep]), ex
390
+
355
391
  def power(
356
392
  self,
357
393
  treated,
@@ -364,6 +400,7 @@ class GeoDesign:
364
400
  lookback: int | None = None,
365
401
  ensemble: bool = True,
366
402
  ensemble_weights="auto",
403
+ exclude=None,
367
404
  ) -> _PowerReport:
368
405
  """Power analysis for a specified treated-market set across methods.
369
406
 
@@ -376,7 +413,20 @@ class GeoDesign:
376
413
  power reflects the averaged estimator, which is usually steadier than any
377
414
  one method). ``ensemble_weights`` is ``"auto"`` (data-driven inverse-variance
378
415
  weighting from each method's historical-null spread), ``"equal"``, or a dict
379
- like ``{"SC": 0.5, "ASC": 0.2, "SDID": 0.3}``."""
416
+ like ``{"SC": 0.5, "ASC": 0.2, "SDID": 0.3}``.
417
+
418
+ ``exclude`` drops markets entirely (e.g. contaminated or untrustworthy
419
+ ones) so they're never used as donors/controls."""
420
+ if exclude:
421
+ sub, ex = self._without(exclude)
422
+ tnames = self._names_of(treated)
423
+ bad = [n for n in tnames if n in ex]
424
+ if bad:
425
+ raise ValueError(f"treated markets were also excluded: {bad}")
426
+ return sub.power(tnames, test_len, lifts=lifts, methods=methods, alpha=alpha,
427
+ target_power=target_power, recommended=recommended,
428
+ lookback=lookback, ensemble=ensemble,
429
+ ensemble_weights=ensemble_weights)
380
430
  idx = self._resolve(treated)
381
431
  names = [self.names[i] for i in idx]
382
432
  lifts = list(_DEFAULT_LIFTS if lifts is None else lifts)
@@ -398,13 +448,21 @@ class GeoDesign:
398
448
  rec = recommended if recommended in results else list(results)[0]
399
449
  return _PowerReport(self, idx, names, test_len, results, diag, rec, alpha, target_power)
400
450
 
401
- def diagnose(self, treated, test_len: int) -> "_DiagnosticsReport":
451
+ def diagnose(self, treated, test_len: int, exclude=None) -> "_DiagnosticsReport":
402
452
  """Real-world guardrails for a treated-market set: pre-period fit,
403
453
  seasonality, holdout, stability, and warnings — with a visual.
404
454
 
405
455
  Returns a report with ``.summary()`` and ``.plot(path)`` (the guardrails
406
456
  figure: treated-vs-synthetic pre-fit, seasonality ACF, holdout share, and
407
- a scorecard listing any warnings)."""
457
+ a scorecard listing any warnings). ``exclude`` drops markets from the
458
+ control pool entirely."""
459
+ if exclude:
460
+ sub, ex = self._without(exclude)
461
+ tnames = self._names_of(treated)
462
+ bad = [n for n in tnames if n in ex]
463
+ if bad:
464
+ raise ValueError(f"treated markets were also excluded: {bad}")
465
+ return sub.diagnose(tnames, test_len)
408
466
  idx = self._resolve(treated)
409
467
  names = [self.names[i] for i in idx]
410
468
  t0 = self.t - int(test_len)
@@ -431,18 +489,46 @@ class GeoDesign:
431
489
  top: int = 10,
432
490
  exact_size: int | None = None,
433
491
  lookback: int | None = None,
492
+ include=None,
493
+ exclude=None,
434
494
  ) -> list:
435
495
  """Search candidate treatment-market sets and return the top ranked.
436
496
 
437
497
  ``exact_size=k`` restricts the search to sets of exactly ``k`` markets
438
498
  (otherwise sizes 1..``max_treated`` are considered). ``lookback=k`` powers
439
- over the most-recent ``k`` historical windows."""
499
+ over the most-recent ``k`` historical windows.
500
+
501
+ ``include`` forces specific markets into **every** candidate treatment set
502
+ (must-treat markets); the search fills the remaining slots from
503
+ ``eligible``. ``exclude`` drops markets entirely — they're never treated
504
+ and never used as controls."""
505
+ if exclude:
506
+ sub, ex = self._without(exclude)
507
+ elig_names = self._names_of(eligible) if eligible is not None else None
508
+ if elig_names is not None:
509
+ elig_names = [n for n in elig_names if n not in ex]
510
+ inc_names = self._names_of(include) if include else None
511
+ if inc_names is not None:
512
+ bad = [n for n in inc_names if n in ex]
513
+ if bad:
514
+ raise ValueError(f"markets in both include and exclude: {bad}")
515
+ return sub.select_markets(
516
+ test_len, target_lift, max_treated, eligible=elig_names, method=method,
517
+ alpha=alpha, target_power=target_power, n_candidates=n_candidates,
518
+ seed=seed, top=top, exact_size=exact_size, lookback=lookback,
519
+ include=inc_names, exclude=None)
520
+
440
521
  elig = self._resolve(eligible) if eligible is not None else list(range(self.n))
522
+ inc = sorted(set(self._resolve(include))) if include else []
523
+ if len(inc) > int(max_treated):
524
+ raise ValueError(f"include has {len(inc)} markets but max_treated="
525
+ f"{max_treated}; raise max_treated or include fewer")
441
526
  ranked = _panelkit.geo_select(
442
527
  self.Y, elig, int(max_treated), int(test_len), float(target_lift),
443
528
  method.lower(), alpha, target_power, 0, int(n_candidates), int(seed),
444
529
  None if exact_size is None else int(exact_size),
445
530
  None if lookback is None else int(lookback),
531
+ inc or None,
446
532
  )
447
533
  out = []
448
534
  for c in ranked[:top]:
@@ -470,6 +556,8 @@ class GeoDesign:
470
556
  seed: int = 0,
471
557
  min_confidence: float = 60.0,
472
558
  lookback: int | None = None,
559
+ include=None,
560
+ exclude=None,
473
561
  ) -> "_ScenarioGrid":
474
562
  """Sweep designs across **specifications** — test length × number of geos
475
563
  × significance level (alpha) — and recommend the best.
@@ -477,7 +565,9 @@ class GeoDesign:
477
565
  For each (alpha, test_len, n_geos) cell it searches for the best set of
478
566
  exactly ``n_geos`` treatment markets and records its MDE, power, holdout,
479
567
  and confidence. Returns a :class:`_ScenarioGrid` with a recommendation,
480
- a plain-English summary, and a tradeoffs figure.
568
+ a plain-English summary, and a tradeoffs figure. ``include`` forces
569
+ must-treat markets into every candidate; ``exclude`` drops markets
570
+ entirely.
481
571
  """
482
572
  rows = []
483
573
  for alpha in alphas:
@@ -488,6 +578,7 @@ class GeoDesign:
488
578
  eligible=eligible, method=method, alpha=alpha,
489
579
  target_power=target_power, n_candidates=n_candidates,
490
580
  seed=seed, top=1, exact_size=ng, lookback=lookback,
581
+ include=include, exclude=exclude,
491
582
  )
492
583
  best = ranked[0] if ranked else None
493
584
  if best is None:
@@ -613,6 +704,7 @@ class GeoDesign:
613
704
  n_boot: int = 2000,
614
705
  block_len: int = 4,
615
706
  seed: int = 0,
707
+ exclude=None,
616
708
  ) -> "_EvalReport":
617
709
  """Estimate the realized effect of a geo test that has **already run**.
618
710
 
@@ -643,8 +735,17 @@ class GeoDesign:
643
735
  -------
644
736
  _EvalReport
645
737
  With ``.summary()``, ``.plot(path)``, per-method results, and the
646
- ensemble point estimate / interval / lift.
738
+ ensemble point estimate / interval / lift. ``exclude`` drops markets
739
+ from the control pool entirely.
647
740
  """
741
+ if exclude:
742
+ sub, ex = self._without(exclude)
743
+ tnames = self._names_of(treated)
744
+ bad = [n for n in tnames if n in ex]
745
+ if bad:
746
+ raise ValueError(f"treated markets were also excluded: {bad}")
747
+ return sub.evaluate(tnames, treat_start, methods=methods, weights=weights,
748
+ level=level, n_boot=n_boot, block_len=block_len, seed=seed)
648
749
  idx = self._resolve(treated)
649
750
  names = [self.names[i] for i in idx]
650
751
  t0 = int(treat_start)
@@ -661,6 +762,7 @@ class GeoDesign:
661
762
  "ASC": lambda: _panelkit.fit_asc(self.Y, idx, t0, 0.0, None),
662
763
  "SDID": lambda: _panelkit.fit_sdid(self.Y, idx, t0, 1.0),
663
764
  }
765
+ treated_series = self.Y[idx].mean(axis=0)
664
766
  per = {}
665
767
  for m in methods:
666
768
  fit = fitters[m]()
@@ -671,8 +773,21 @@ class GeoDesign:
671
773
  se, lo, hi = _panelkit.bootstrap_mean(
672
774
  att_path.tolist(), "stationary", int(block_len), int(n_boot),
673
775
  int(seed), float(level))
776
+ # Full-timeline counterfactual via donor weights (exact for SC; the
777
+ # dominant term for ASC/SDID). Center on the pre-period so the gap
778
+ # reflects FIT, not a level offset — SDID is level-agnostic (matches
779
+ # trends, not levels), so its donor-weighted series sits at a constant
780
+ # offset that would otherwise look like a non-zero pre-period.
781
+ dids = np.asarray(fit.donor_ids, dtype=int)
782
+ ws = np.asarray(fit.weights, dtype=float)
783
+ if dids.size:
784
+ full_cf = self.Y[dids].T @ ws
785
+ full_cf = full_cf + (treated_series[:t0].mean() - full_cf[:t0].mean())
786
+ else:
787
+ full_cf = np.full(self.t, np.nan)
674
788
  per[m] = {
675
789
  "att": att, "att_path": att_path, "counterfactual": cf,
790
+ "full_cf": full_cf,
676
791
  "cf_mean": cf_mean, "lift": att / cf_mean if cf_mean else float("nan"),
677
792
  "se": se, "att_lo": lo, "att_hi": hi,
678
793
  "lift_lo": lo / cf_mean if cf_mean else float("nan"),
@@ -719,16 +834,57 @@ class GeoDesign:
719
834
  "weights": wmap,
720
835
  }
721
836
 
722
- # Significance: SC in-space placebo p-value, plus a full SC counterfactual
723
- # (donor-weight reconstruction) for the timeline plot.
837
+ # Significance: SC in-space placebo p-value.
724
838
  sc = _panelkit.fit_sc(self.Y, idx, t0, 0.0, True, level)
725
839
  p_value = sc.p_value
726
- donors = np.asarray(sc.donor_ids, dtype=int)
727
- w_sc = np.asarray(sc.weights, dtype=float)
728
- full_cf = (self.Y[donors].T @ w_sc) if donors.size else np.full(self.t, np.nan)
729
- treated_series = self.Y[idx].mean(axis=0)
840
+
841
+ # Full-timeline ensemble counterfactual + gap path (pre-period shows fit,
842
+ # post-period uses the exact ensemble effect).
843
+ ens_full_cf = sum(wmap[m] * per[m]["full_cf"] for m in order)
844
+ full_gap = treated_series - ens_full_cf
845
+ full_gap[t0:] = ens_path # exact ensemble post effect
846
+ counterfactual = treated_series - full_gap # consistent everywhere
847
+ pre_gaps = full_gap[:t0]
848
+ sigma_pre = float(np.std(pre_gaps, ddof=1)) if t0 > 1 else float(np.std(pre_gaps))
849
+
850
+ # CI bands from a MOVING-BLOCK BOOTSTRAP of the pre-period residuals.
851
+ # Blocks preserve autocorrelation, so the bands are more conservative than
852
+ # an iid normal approximation — especially the cumulative band, whose
853
+ # spread grows faster than sqrt(k) under positive autocorrelation.
854
+ post_len = self.t - t0
855
+ a = (1.0 - float(level)) / 2.0
856
+ paths = _placebo_paths(pre_gaps, post_len, int(block_len), int(n_boot), int(seed))
857
+ if paths.size:
858
+ point_lo = np.quantile(paths, a, axis=0)
859
+ point_hi = np.quantile(paths, 1.0 - a, axis=0)
860
+ point_hw = float(np.quantile(np.abs(paths), float(level))) # symmetric, full-timeline
861
+ cum_paths = np.cumsum(paths, axis=1)
862
+ cum_band_lo = np.quantile(cum_paths, a, axis=0)
863
+ cum_band_hi = np.quantile(cum_paths, 1.0 - a, axis=0)
864
+ else:
865
+ point_lo = point_hi = np.zeros(post_len)
866
+ point_hw = 0.0
867
+ cum_band_lo = cum_band_hi = np.zeros(post_len)
868
+
869
+ ens_post = ens_path
870
+ run = np.cumsum(ens_post)
871
+ cum_curve = run * n_treated
872
+ cum_lo_curve = (run + cum_band_lo) * n_treated
873
+ cum_hi_curve = (run + cum_band_hi) * n_treated
874
+
875
+ ensemble["sigma_pre"] = sigma_pre
876
+ ensemble["full_gap"] = full_gap
877
+ ensemble["point_hw"] = point_hw # constant pointwise half-width
878
+ ensemble["point_lo"] = ens_post + point_lo # per-period CI on the effect
879
+ ensemble["point_hi"] = ens_post + point_hi
880
+ ensemble["cum_curve"] = cum_curve # cumulative incremental path
881
+ ensemble["cum_lo_curve"] = cum_lo_curve
882
+ ensemble["cum_hi_curve"] = cum_hi_curve
883
+ ensemble["cum_lo"] = float(cum_lo_curve[-1]) if post_len else float("nan")
884
+ ensemble["cum_hi"] = float(cum_hi_curve[-1]) if post_len else float("nan")
885
+
730
886
  return _EvalReport(names, t0, n_treated, per, ensemble, p_value, level,
731
- treated_series, full_cf)
887
+ treated_series, counterfactual)
732
888
 
733
889
 
734
890
  class _ScenarioGrid:
@@ -924,15 +1080,26 @@ class _EvalReport:
924
1080
  "interval includes zero.")
925
1081
  lines.append(f"Headline (ensemble) : {100*e['lift']:+.2f}% lift, "
926
1082
  f"{e['cumulative']:,.0f} cumulative incremental")
1083
+ if "cum_lo" in e:
1084
+ lines.append(f"Cumulative {cl}% CI : "
1085
+ f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}] "
1086
+ f"(moving-block bootstrap, block_len-aware)")
927
1087
  lines.append(verdict)
928
1088
  lines.append("=" * 66)
929
1089
  return "\n".join(lines)
930
1090
 
931
1091
  def plot(self, path: str | None = None):
932
- """Render the evaluation figure (observed vs counterfactual, effect path,
933
- and a lift-by-method bar). Returns the matplotlib Figure."""
1092
+ """Render the evaluation figure (observed vs counterfactual, effect path
1093
+ with CI band, and a lift-by-method bar). Returns the matplotlib Figure."""
934
1094
  return _plot_eval(self, path)
935
1095
 
1096
+ def plot_effect_over_time(self, path: str | None = None):
1097
+ """Render the effect-over-time figure: the **pointwise** effect across the
1098
+ full timeline (pre-period included, as a placebo check) and the running
1099
+ **cumulative** incremental, each as a point estimate with a confidence
1100
+ band. Returns the matplotlib Figure."""
1101
+ return _plot_eval_timeline(self, path)
1102
+
936
1103
  def __repr__(self):
937
1104
  sig = "sig" if self.significant else "ns"
938
1105
  return (f"EvalReport(lift={100*self.lift:+.2f}%, "
@@ -1362,13 +1529,18 @@ def _plot_eval(rep: "_EvalReport", path):
1362
1529
  ax.grid(True, alpha=0.25)
1363
1530
  ax.legend(loc="best", framealpha=0.9, fontsize=9)
1364
1531
 
1365
- # ---- B: effect path over the post-period (ensemble + per method). ----
1532
+ # ---- B: effect path over the post-period (ensemble + per method) + CI band.
1366
1533
  axb = fig.add_subplot(gs[1, 0])
1367
1534
  for m, r in rep.per.items():
1368
1535
  axb.plot(post, r["att_path"], color=_METHOD_COLORS.get(m, _PK_GREY),
1369
1536
  lw=1.3, alpha=0.7, label=m)
1370
- axb.plot(post, rep.ensemble["att_path"], color=_PK_PURPLE, lw=2.6,
1371
- label="ENSEMBLE")
1537
+ ens_post = rep.ensemble["att_path"]
1538
+ p_lo = rep.ensemble.get("point_lo")
1539
+ p_hi = rep.ensemble.get("point_hi")
1540
+ if p_lo is not None:
1541
+ axb.fill_between(post, p_lo, p_hi, color=_PK_PURPLE, alpha=0.18,
1542
+ label=f"ensemble {int(round(100*rep.level))}% band")
1543
+ axb.plot(post, ens_post, color=_PK_PURPLE, lw=2.6, label="ENSEMBLE")
1372
1544
  axb.axhline(0, color="#111827", lw=1.0)
1373
1545
  axb.set_title("Effect over time (per-period ATT)", fontweight="bold")
1374
1546
  axb.set_xlabel("period")
@@ -1405,3 +1577,82 @@ def _plot_eval(rep: "_EvalReport", path):
1405
1577
  if path:
1406
1578
  fig.savefig(path, dpi=150, bbox_inches="tight")
1407
1579
  return fig
1580
+
1581
+
1582
+ def _plot_eval_timeline(rep: "_EvalReport", path):
1583
+ """Pointwise + cumulative effect over the full timeline, with CI bands.
1584
+
1585
+ Bands come from a moving-block bootstrap of the pre-period residuals (so they
1586
+ capture autocorrelation): the pointwise band is the per-period placebo spread
1587
+ around the estimate; the cumulative band grows with horizon as the bootstrap
1588
+ placebo cumulative-sums spread out."""
1589
+ _, plt = _require_mpl()
1590
+ import numpy as _np
1591
+ from matplotlib.gridspec import GridSpec
1592
+
1593
+ T = len(rep.treated_series)
1594
+ t0 = rep.t0
1595
+ e = rep.ensemble
1596
+ x = _np.arange(T)
1597
+ seg = x[t0:]
1598
+ gap = _np.asarray(e["full_gap"], dtype=float)
1599
+ hw = e.get("point_hw", 0.0)
1600
+ cl = int(round(100 * rep.level))
1601
+
1602
+ plt.rcParams.update({"font.size": 11, "axes.titlesize": 12})
1603
+ fig = plt.figure(figsize=(12, 7.8))
1604
+ fig.patch.set_facecolor("white")
1605
+ gs = GridSpec(2, 1, figure=fig, height_ratios=[1.0, 1.0], hspace=0.32)
1606
+
1607
+ # ---- Top: pointwise effect (treated − counterfactual), full timeline. ----
1608
+ ax = fig.add_subplot(gs[0])
1609
+ ax.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
1610
+ # Constant placebo band across the whole timeline (the pre-period sits inside
1611
+ # it as a fit/placebo check); the per-period CI on the post effect is shown
1612
+ # as a tighter band around the estimate.
1613
+ ax.fill_between(x, gap - hw, gap + hw, color=_PK_PURPLE, alpha=0.12,
1614
+ label=f"{cl}% placebo band")
1615
+ ax.fill_between(seg, e["point_lo"], e["point_hi"], color=_PK_PURPLE, alpha=0.22)
1616
+ ax.plot(x, gap, color=_PK_PURPLE, lw=2.0, label="pointwise effect")
1617
+ ax.axhline(0, color="#111827", lw=1.0)
1618
+ ax.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
1619
+ ax.annotate("pre-period (placebo)", (t0 / 2, ax.get_ylim()[1]), ha="center",
1620
+ va="top", color="#6b7280", fontsize=9)
1621
+ ax.annotate("test window", (t0 + (T - t0) / 2, ax.get_ylim()[1]), ha="center",
1622
+ va="top", color="#6b21a8", fontsize=9)
1623
+ ax.set_title("Pointwise effect over time (treated − counterfactual)",
1624
+ fontweight="bold")
1625
+ ax.set_xlabel("period")
1626
+ ax.set_ylabel("per-period effect")
1627
+ ax.grid(True, alpha=0.25)
1628
+ ax.legend(loc="upper left", framealpha=0.9, fontsize=9)
1629
+
1630
+ # ---- Bottom: cumulative incremental over the test window (×n_treated). ----
1631
+ axc = fig.add_subplot(gs[1])
1632
+ cum = e["cum_curve"]
1633
+ axc.axvspan(-0.5, t0 - 0.5, color="#f3f4f6", alpha=0.8)
1634
+ axc.fill_between(seg, e["cum_lo_curve"], e["cum_hi_curve"], color=_PK_GREEN,
1635
+ alpha=0.15, label=f"{cl}% band (block bootstrap)")
1636
+ axc.plot(seg, cum, color=_PK_GREEN, lw=2.4, label="cumulative incremental")
1637
+ axc.axhline(0, color="#111827", lw=1.0)
1638
+ axc.axvline(t0 - 0.5, color="#374151", lw=1.2, ls=":")
1639
+ final = cum[-1]
1640
+ axc.annotate(f"{final:,.0f}\n[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}]",
1641
+ (T - 1, final), textcoords="offset points", xytext=(-6, 0),
1642
+ ha="right", va="center", fontweight="bold", color="#065f46", fontsize=9)
1643
+ axc.set_title("Cumulative incremental effect over the test window",
1644
+ fontweight="bold")
1645
+ axc.set_xlabel("period")
1646
+ axc.set_ylabel("cumulative incremental")
1647
+ axc.set_xlim(-0.5, T - 0.5)
1648
+ axc.grid(True, alpha=0.25)
1649
+ axc.legend(loc="upper left", framealpha=0.9, fontsize=9)
1650
+
1651
+ fig.suptitle(f"panelkit · effect over time — ensemble "
1652
+ f"{100*rep.ensemble['lift']:+.2f}% lift, "
1653
+ f"{rep.ensemble['cumulative']:,.0f} cumulative "
1654
+ f"[{e['cum_lo']:,.0f}, {e['cum_hi']:,.0f}]",
1655
+ fontsize=14, fontweight="bold", x=0.012, ha="left")
1656
+ if path:
1657
+ fig.savefig(path, dpi=150, bbox_inches="tight")
1658
+ return fig
File without changes
File without changes
File without changes
File without changes
File without changes