kalshi-trading-bot-cli 2.1.6 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kalshi-trading-bot-cli",
3
- "version": "2.1.6",
3
+ "version": "2.1.8",
4
4
  "description": "Kalshi Trading Bot CLI - AI-powered prediction market terminal.",
5
5
  "license": "MIT",
6
6
  "author": "Octagon AI, Inc.",
@@ -1,9 +1,92 @@
1
1
  import type { Database } from 'bun:sqlite';
2
2
  import { callKalshiApi } from '../tools/kalshi/api.js';
3
3
  import type { KalshiMarket } from '../tools/kalshi/types.js';
4
+ import { fetchAllOctagonEvents } from '../scan/octagon-events-api.js';
4
5
 
5
6
  const CONCURRENCY = 10;
6
7
 
8
+ /** Where the backtest universe is sourced from. */
9
+ export type UniverseSource = 'api' | 'local';
10
+
11
+ export interface UniverseEntry {
12
+ event_ticker: string;
13
+ category: string | null;
14
+ }
15
+
16
+ export interface Universe {
17
+ events: UniverseEntry[];
18
+ source: UniverseSource;
19
+ description: string;
20
+ }
21
+
22
+ /**
23
+ * Resolve the backtest universe (the set of events scored).
24
+ *
25
+ * `api` (default): paginate Octagon's covered-events list — systematic,
26
+ * reproducible across machines, doesn't depend on whatever this install
27
+ * happened to analyze in the past. Uses fetchAllOctagonEvents directly;
28
+ * we deliberately do NOT pass hasHistory=true because a prior audit showed
29
+ * that flag silently dropped 373 of 662 events. The pipeline self-filters
30
+ * downstream: events with no usable snapshot return null from
31
+ * selectSnapshotByDate and are skipped cheaply.
32
+ *
33
+ * `local`: legacy behavior — pull from the local `octagon_reports` log.
34
+ * Reflects past usage of this machine, not a defined universe. Useful for
35
+ * offline runs and for comparing against historical backtests.
36
+ *
37
+ * KNOWN LIMITATION: the API returns *today's* covered universe, not the
38
+ * universe as of the entry date. Events dropped from coverage mid-window
39
+ * vanish from the backtest — survivorship at the universe level. A true
40
+ * point-in-time universe requires `events?as_of=<date>` upstream.
41
+ */
42
+ export async function resolveUniverse(
43
+ db: Database,
44
+ opts?: { source?: UniverseSource; category?: string },
45
+ ): Promise<Universe> {
46
+ const source = opts?.source ?? 'api';
47
+ if (source === 'api') {
48
+ const all = await fetchAllOctagonEvents();
49
+ let events: UniverseEntry[] = all.map((e) => ({
50
+ event_ticker: e.event_ticker,
51
+ category: e.series_category ?? null,
52
+ }));
53
+ if (opts?.category) {
54
+ const needle = opts.category.toLowerCase();
55
+ events = events.filter((e) => e.category?.toLowerCase().includes(needle));
56
+ }
57
+ return {
58
+ events,
59
+ source,
60
+ description: `${events.length} events from Octagon API (systematic universe)`,
61
+ };
62
+ }
63
+ // Legacy local path
64
+ const { query, params } = buildEventQuery('', opts?.category);
65
+ const rows = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
66
+ return {
67
+ events: rows.map((r) => ({ event_ticker: r.event_ticker, category: r.category })),
68
+ source,
69
+ description: `${rows.length} events from local octagon_reports (NON-SYSTEMATIC — reflects past usage of this machine)`,
70
+ };
71
+ }
72
+
73
+ /**
74
+ * Fetch the Kalshi event payload for each event in the universe, once.
75
+ * Returns a map keyed by event_ticker. Both discoverSettledMarkets and
76
+ * discoverOpenMarkets can read from this single map instead of each
77
+ * re-fetching every event payload — halves the Kalshi call count.
78
+ */
79
+ export async function fetchEventPayloads(
80
+ universe: UniverseEntry[],
81
+ ): Promise<Map<string, KalshiMarket[]>> {
82
+ const out = new Map<string, KalshiMarket[]>();
83
+ await parallelMap(universe, async (entry) => {
84
+ const markets = await fetchEventMarkets(entry.event_ticker);
85
+ out.set(entry.event_ticker, markets);
86
+ }, CONCURRENCY);
87
+ return out;
88
+ }
89
+
7
90
  export interface SettledMarket {
8
91
  ticker: string;
9
92
  event_ticker: string;
@@ -103,13 +186,23 @@ export async function parallelMap<T, R>(
103
186
  */
104
187
  export async function discoverSettledMarkets(
105
188
  db: Database,
106
- opts?: { category?: string },
189
+ opts?: {
190
+ category?: string;
191
+ /** Pre-resolved universe + payloads (Phase 4 path). When omitted, falls back to the legacy local SQL path. */
192
+ universe?: Universe;
193
+ payloads?: Map<string, KalshiMarket[]>;
194
+ },
107
195
  ): Promise<SettledMarket[]> {
108
- const { query, params } = buildEventQuery('', opts?.category);
109
- const events = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
196
+ let events: Array<{ event_ticker: string; category: string | null }>;
197
+ if (opts?.universe) {
198
+ events = opts.universe.events;
199
+ } else {
200
+ const { query, params } = buildEventQuery('', opts?.category);
201
+ events = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
202
+ }
110
203
 
111
204
  const batchResults = await parallelMap(events, async ({ event_ticker, category: cat }) => {
112
- const markets = await fetchEventMarkets(event_ticker);
205
+ const markets = opts?.payloads?.get(event_ticker) ?? await fetchEventMarkets(event_ticker);
113
206
  const settled: SettledMarket[] = [];
114
207
 
115
208
  for (const m of markets) {
@@ -137,13 +230,22 @@ export async function discoverSettledMarkets(
137
230
  */
138
231
  export async function discoverOpenMarkets(
139
232
  db: Database,
140
- opts?: { category?: string },
233
+ opts?: {
234
+ category?: string;
235
+ universe?: Universe;
236
+ payloads?: Map<string, KalshiMarket[]>;
237
+ },
141
238
  ): Promise<OpenMarket[]> {
142
- const { query: q2, params: p2 } = buildEventQuery('', opts?.category);
143
- const events2 = db.query(q2).all(p2) as Array<{ event_ticker: string; category: string | null }>;
239
+ let events2: Array<{ event_ticker: string; category: string | null }>;
240
+ if (opts?.universe) {
241
+ events2 = opts.universe.events;
242
+ } else {
243
+ const { query: q2, params: p2 } = buildEventQuery('', opts?.category);
244
+ events2 = db.query(q2).all(p2) as Array<{ event_ticker: string; category: string | null }>;
245
+ }
144
246
 
145
247
  const batchResults = await parallelMap(events2, async ({ event_ticker, category: cat }) => {
146
- const markets = await fetchEventMarkets(event_ticker);
248
+ const markets = opts?.payloads?.get(event_ticker) ?? await fetchEventMarkets(event_ticker);
147
249
  const open: OpenMarket[] = [];
148
250
 
149
251
  for (const m of markets) {
@@ -1,4 +1,4 @@
1
- import type { ScoredSignal, BacktestResult } from './types.js';
1
+ import type { ScoredSignal, BacktestResult, LegMetrics } from './types.js';
2
2
 
3
3
  /**
4
4
  * Skill score: how much better Octagon is vs the market as a forecaster.
@@ -44,6 +44,68 @@ export function bootstrapCI(
44
44
  return [stats[lo], stats[hi]];
45
45
  }
46
46
 
47
+ /**
48
+ * Cluster bootstrap — resamples GROUPS with replacement, not individual rows.
49
+ *
50
+ * Use when the unit of risk is the group, not the row. In our case Kalshi
51
+ * events are multi-outcome: a single "Will the Fed cut N times?" event has
52
+ * a ladder of NO contracts that all settle together. If the model bets NO
53
+ * on five rungs and the Fed cuts once, all five settle NO simultaneously
54
+ * — that's *one* underlying outcome contributing five rows.
55
+ *
56
+ * Row-level bootstrap (`bootstrapCI` above) treats those rows as independent
57
+ * → CI width shrinks with √N where N is the row count. Real effective N is
58
+ * closer to the event count, so the honest interval is roughly 2× wider.
59
+ *
60
+ * This function takes `groups` (each group = a contiguous block of indices
61
+ * referring to per-row data carried in closures by `statFn`), draws
62
+ * `groups.length` groups with replacement per iteration, concatenates the
63
+ * indices, and applies `statFn` to the pooled sample.
64
+ */
65
+ export function clusterBootstrapCI(
66
+ groups: number[][],
67
+ statFn: (sampleIndices: number[]) => number,
68
+ iterations = 10_000,
69
+ alpha = 0.05,
70
+ ): [number, number] {
71
+ if (groups.length === 0) return [0, 0];
72
+ if (!Number.isFinite(iterations) || !Number.isInteger(iterations) || iterations <= 0) {
73
+ throw new Error(`clusterBootstrapCI: iterations must be a finite integer > 0, got ${iterations}`);
74
+ }
75
+ if (!Number.isFinite(alpha) || alpha <= 0 || alpha >= 1) {
76
+ throw new Error(`clusterBootstrapCI: alpha must be a finite number in (0, 1), got ${alpha}`);
77
+ }
78
+ const stats: number[] = [];
79
+ for (let i = 0; i < iterations; i++) {
80
+ const pooled: number[] = [];
81
+ for (let j = 0; j < groups.length; j++) {
82
+ const g = groups[Math.floor(Math.random() * groups.length)];
83
+ pooled.push(...g);
84
+ }
85
+ if (pooled.length === 0) { stats.push(0); continue; }
86
+ stats.push(statFn(pooled));
87
+ }
88
+ stats.sort((a, b) => a - b);
89
+ const lo = Math.min(Math.max(0, Math.floor((alpha / 2) * stats.length)), stats.length - 1);
90
+ const hi = Math.min(Math.max(0, Math.floor((1 - alpha / 2) * stats.length)), stats.length - 1);
91
+ return [stats[lo], stats[hi]];
92
+ }
93
+
94
+ /**
95
+ * Build event-clustered groups: indices for each signal grouped by event_ticker.
96
+ * Used to feed clusterBootstrapCI when the underlying signals are correlated
97
+ * within an event (multi-outcome ladders, mutually-exclusive option sets).
98
+ */
99
+ function groupIndicesByEvent<T extends { event_ticker: string }>(items: T[]): number[][] {
100
+ const byEvent = new Map<string, number[]>();
101
+ items.forEach((item, idx) => {
102
+ const arr = byEvent.get(item.event_ticker) ?? [];
103
+ arr.push(idx);
104
+ byEvent.set(item.event_ticker, arr);
105
+ });
106
+ return [...byEvent.values()];
107
+ }
108
+
47
109
  /**
48
110
  * Compute Brier score: ((forecast/100) - (outcome/100))²
49
111
  * Both forecast and outcome are on 0-100 scale.
@@ -52,10 +114,162 @@ function brier(forecast: number, outcome: number): number {
52
114
  return ((forecast / 100) - (outcome / 100)) ** 2;
53
115
  }
54
116
 
117
+ /** Entry-price bands used by the within-band skill calculation. */
118
+ const PRICE_BANDS: Array<{ label: string; lo: number; hi: number }> = [
119
+ { label: '5-20¢', lo: 5, hi: 20 },
120
+ { label: '20-40¢', lo: 20, hi: 40 },
121
+ { label: '40-60¢', lo: 40, hi: 60 },
122
+ { label: '60-80¢', lo: 60, hi: 80 },
123
+ { label: '80-95¢', lo: 80, hi: 95 },
124
+ ];
125
+
126
+ /**
127
+ * Compute zero-skill baselines on the same post-filter universe as the model.
128
+ *
129
+ * Why: Kalshi events are multi-outcome — most resolved contracts settle NO
130
+ * because each event has one YES outcome and many NOs. A model that
131
+ * consistently picks NO will hit ~75% by structure alone. The "always-NO"
132
+ * baseline strips that structural tilt out so we can see whether the model
133
+ * has any selection skill beyond the universe's bias.
134
+ *
135
+ * Within-band skill: model NO-bet ROI minus always-NO ROI, computed inside
136
+ * entry-price buckets and capital-weighted across buckets. This also
137
+ * controls for the entry-price mix — a model that only bets cheap
138
+ * longshots will look great vs. an always-NO baseline run over the full
139
+ * universe, but mediocre once we compare within the same price band.
140
+ */
141
+ function computeBaselines(signals: ScoredSignal[]): BacktestResult['baselines'] {
142
+ // Universe-wide always-NO / always-YES on the same post-filter rows.
143
+ const noPnl = (s: ScoredSignal): { pnl: number; capital: number; hit: boolean } => {
144
+ const capital = (100 - s.market_then) / 100;
145
+ const settlement = 100 - s.market_now;
146
+ const pnl = (settlement - (100 - s.market_then)) / 100;
147
+ return { pnl, capital, hit: s.market_now < s.market_then };
148
+ };
149
+ const yesPnl = (s: ScoredSignal): { pnl: number; capital: number; hit: boolean } => {
150
+ const capital = s.market_then / 100;
151
+ const pnl = (s.market_now - s.market_then) / 100;
152
+ return { pnl, capital, hit: s.market_now > s.market_then };
153
+ };
154
+
155
+ let noP = 0, noC = 0, noHits = 0;
156
+ let yesP = 0, yesC = 0, yesHits = 0;
157
+ for (const s of signals) {
158
+ const n = noPnl(s); noP += n.pnl; noC += n.capital; if (n.hit) noHits++;
159
+ const y = yesPnl(s); yesP += y.pnl; yesC += y.capital; if (y.hit) yesHits++;
160
+ }
161
+ const alwaysNoRoi = noC > 0 ? noP / noC : 0;
162
+ const alwaysYesRoi = yesC > 0 ? yesP / yesC : 0;
163
+ const alwaysNoHitRate = signals.length > 0 ? noHits / signals.length : 0;
164
+ const alwaysYesHitRate = signals.length > 0 ? yesHits / signals.length : 0;
165
+
166
+ // Within-band: bucket by entry price, compute model-NO-bet ROI minus
167
+ // always-NO ROI per band, then capital-weight the deltas across bands.
168
+ // The model's NO bets are the meaningful comparable population (the
169
+ // structural NO tilt is the dominant source of "skill" in the universe).
170
+ const breakdown: BacktestResult['baselines']['within_band_breakdown'] = [];
171
+ let weightedDeltaNumer = 0;
172
+ let weightedDeltaDenom = 0;
173
+ for (const band of PRICE_BANDS) {
174
+ const inBand = signals.filter((s) => s.market_then >= band.lo && s.market_then < band.hi);
175
+ if (inBand.length === 0) continue;
176
+
177
+ let bandModelPnl = 0, bandModelCap = 0, bandModelN = 0;
178
+ let bandNoPnl = 0, bandNoCap = 0;
179
+ for (const s of inBand) {
180
+ const n = noPnl(s); bandNoPnl += n.pnl; bandNoCap += n.capital;
181
+ // Model "bet" here = signal where model picked NO (edge_pp < 0) and
182
+ // capital is the NO-side capital we already stored in s.capital.
183
+ if (s.edge_pp < 0) {
184
+ bandModelPnl += s.pnl;
185
+ bandModelCap += s.capital;
186
+ bandModelN++;
187
+ }
188
+ }
189
+ const modelRoi = bandModelCap > 0 ? bandModelPnl / bandModelCap : 0;
190
+ const baselineRoi = bandNoCap > 0 ? bandNoPnl / bandNoCap : 0;
191
+ const deltaPp = (modelRoi - baselineRoi) * 100;
192
+ breakdown.push({
193
+ band: band.label,
194
+ model_no_roi: modelRoi,
195
+ always_no_roi: baselineRoi,
196
+ skill_delta_pp: deltaPp,
197
+ n_model: bandModelN,
198
+ n_universe: inBand.length,
199
+ });
200
+ weightedDeltaNumer += deltaPp * bandModelCap;
201
+ weightedDeltaDenom += bandModelCap;
202
+ }
203
+ const withinBandSkillPp = weightedDeltaDenom > 0 ? weightedDeltaNumer / weightedDeltaDenom : 0;
204
+
205
+ return {
206
+ always_no_roi: alwaysNoRoi,
207
+ always_no_hit_rate: alwaysNoHitRate,
208
+ always_yes_roi: alwaysYesRoi,
209
+ always_yes_hit_rate: alwaysYesHitRate,
210
+ within_band_skill_pp: withinBandSkillPp,
211
+ within_band_breakdown: breakdown,
212
+ };
213
+ }
214
+
215
+ /**
216
+ * Compute the scorecard for one leg (resolved-only or unresolved-only).
217
+ * Same hit-rate and capital-weighted ROI definitions as the blended
218
+ * computation, just scoped to the subset.
219
+ */
220
+ function computeLegMetrics(signals: ScoredSignal[], minEdgePp: number): LegMetrics {
221
+ const edgeSignals = signals.filter((s) => s.edge_pp !== 0 && Math.abs(s.edge_pp) >= minEdgePp);
222
+ const edgeCount = edgeSignals.length;
223
+ const hits = edgeSignals.filter((s) =>
224
+ s.edge_pp > 0 ? s.market_now > s.market_then : s.market_now < s.market_then,
225
+ );
226
+ const hitRate = edgeCount > 0 ? hits.length / edgeCount : 0;
227
+ const hitRateData = edgeSignals.map((s) =>
228
+ s.edge_pp > 0 ? (s.market_now > s.market_then ? 1 : 0) : (s.market_now < s.market_then ? 1 : 0),
229
+ );
230
+ const legEventGroups = groupIndicesByEvent(edgeSignals);
231
+ const hitRateCI: [number, number] = edgeCount > 0
232
+ ? clusterBootstrapCI(legEventGroups, (sample) => {
233
+ let sum = 0;
234
+ for (const idx of sample) sum += hitRateData[idx];
235
+ return sample.length > 0 ? sum / sample.length : 0;
236
+ })
237
+ : [0, 0];
238
+ const pnl = edgeSignals.reduce((sum, s) => sum + s.pnl, 0);
239
+ const totalCapital = edgeSignals.reduce((sum, s) => sum + s.capital, 0);
240
+ const roi = totalCapital > 0 ? pnl / totalCapital : 0;
241
+ return {
242
+ edge_signals: edgeCount,
243
+ edge_hit_rate: hitRate,
244
+ hit_rate_ci: hitRateCI,
245
+ flat_bet_pnl: pnl,
246
+ flat_bet_roi: roi,
247
+ total_capital: totalCapital,
248
+ };
249
+ }
250
+
251
+ const EMPTY_LEG: LegMetrics = {
252
+ edge_signals: 0,
253
+ edge_hit_rate: 0,
254
+ hit_rate_ci: [0, 0],
255
+ flat_bet_pnl: 0,
256
+ flat_bet_roi: 0,
257
+ total_capital: 0,
258
+ };
259
+
260
+ const EMPTY_BASELINES: BacktestResult['baselines'] = {
261
+ always_no_roi: 0,
262
+ always_no_hit_rate: 0,
263
+ always_yes_roi: 0,
264
+ always_yes_hit_rate: 0,
265
+ within_band_skill_pp: 0,
266
+ within_band_breakdown: [],
267
+ };
268
+
55
269
  /**
56
270
  * Compute all backtest metrics from a unified list of scored signals.
57
271
  */
58
- export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<BacktestResult, 'subscription_notice'> {
272
+ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<BacktestResult, 'subscription_notice' | 'signals_dropped_no_volume' | 'universe_source' | 'universe_size' | 'universe_description' | 'fee_model' | 'flat_bet_pnl_net' | 'flat_bet_roi_net'> {
59
273
  const n = signals.length;
60
274
  if (n === 0) {
61
275
  return {
@@ -75,6 +289,9 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
75
289
  flat_bet_roi: 0,
76
290
  total_capital: 0,
77
291
  signals: [],
292
+ baselines: EMPTY_BASELINES,
293
+ resolved_metrics: EMPTY_LEG,
294
+ unresolved_metrics: EMPTY_LEG,
78
295
  };
79
296
  }
80
297
 
@@ -84,10 +301,14 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
84
301
  const brierOctagon = brierOctagonScores.reduce((a, b) => a + b, 0) / n;
85
302
  const brierMarket = brierMarketScores.reduce((a, b) => a + b, 0) / n;
86
303
 
87
- // Skill score with bootstrap CI resample both
304
+ // Skill score with EVENT-CLUSTERED bootstrap CI. Why clustered: multi-
305
+ // outcome events (Fed-cut ladders, election option sets, price strikes)
306
+ // settle as a block — N contracts from one event aren't N independent
307
+ // observations. Row-level bootstrap shrinks the CI with sqrt(N rows)
308
+ // when the right denominator is sqrt(N events).
88
309
  const skillScore = computeSkillScore(brierOctagon, brierMarket);
89
- const indices = signals.map((_, i) => i);
90
- const skillCI = bootstrapCI(indices, (sample) => {
310
+ const eventGroups = groupIndicesByEvent(signals);
311
+ const skillCI = clusterBootstrapCI(eventGroups, (sample) => {
91
312
  let sumOctagon = 0;
92
313
  let sumMarket = 0;
93
314
  for (const idx of sample) {
@@ -112,13 +333,16 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
112
333
  });
113
334
  const hitRate = edgeCount > 0 ? hits.length / edgeCount : 0;
114
335
 
115
- // Bootstrap hit rate CI
336
+ // Event-clustered hit rate CI on the EDGE signals only.
116
337
  const hitRateData = edgeSignals.map(s => {
117
338
  if (s.edge_pp > 0) return s.market_now > s.market_then ? 1 : 0;
118
339
  return s.market_now < s.market_then ? 1 : 0;
119
340
  });
120
- const hitRateCI = bootstrapCI(hitRateData, (sample) => {
121
- return sample.reduce((a, b) => a + b, 0) / sample.length;
341
+ const edgeEventGroups = groupIndicesByEvent(edgeSignals);
342
+ const hitRateCI = clusterBootstrapCI(edgeEventGroups, (sample) => {
343
+ let sum = 0;
344
+ for (const idx of sample) sum += hitRateData[idx];
345
+ return sample.length > 0 ? sum / sample.length : 0;
122
346
  });
123
347
 
124
348
  // P&L and capital-weighted ROI (matches Supabase methodology):
@@ -161,5 +385,8 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
161
385
  flat_bet_roi: roi,
162
386
  total_capital: totalCapital,
163
387
  signals,
388
+ baselines: computeBaselines(signals),
389
+ resolved_metrics: computeLegMetrics(signals.filter((s) => s.resolved), minEdgePp),
390
+ unresolved_metrics: computeLegMetrics(signals.filter((s) => !s.resolved), minEdgePp),
164
391
  };
165
392
  }
@@ -5,6 +5,16 @@ export interface FormatOpts {
5
5
  minEdge?: number; // 0-1 scale, default 0.005 (0.5pp)
6
6
  }
7
7
 
8
+ /** Format a 0-1 ROI as a signed percentage string. */
9
+ function fmtRoi(roi: number): string {
10
+ return `${roi >= 0 ? '+' : ''}${(roi * 100).toFixed(1)}%`;
11
+ }
12
+
13
+ /** Format a percentage-point delta with sign. */
14
+ function fmtPp(pp: number): string {
15
+ return `${pp >= 0 ? '+' : ''}${pp.toFixed(1)}pp`;
16
+ }
17
+
8
18
  /**
9
19
  * Format complete backtest result for terminal display.
10
20
  */
@@ -17,6 +27,20 @@ export function formatBacktestHuman(result: BacktestResult, opts?: FormatOpts):
17
27
 
18
28
  const lines: string[] = [];
19
29
  lines.push(`Octagon Backtest — ${result.days}-day lookback (${fromStr} – ${toStr})`);
30
+ lines.push(`Universe: ${result.universe_description}`);
31
+ let feeHeader: string;
32
+ switch (result.fee_model) {
33
+ case 'none':
34
+ feeHeader = 'none — output is GROSS (pre-fee)';
35
+ break;
36
+ case 'taker':
37
+ feeHeader = 'taker (entries charged Kalshi taker fee = 0.07·p·(1−p))';
38
+ break;
39
+ case 'maker':
40
+ feeHeader = 'maker (free-entry execution assumption — net P&L equals gross)';
41
+ break;
42
+ }
43
+ lines.push(`Fee model: ${feeHeader}`);
20
44
  lines.push('══════════════════════════════════════════════════════════');
21
45
  lines.push('');
22
46
 
@@ -47,9 +71,73 @@ export function formatBacktestHuman(result: BacktestResult, opts?: FormatOpts):
47
71
  // lines.push('');
48
72
  lines.push(` Edge signals ${result.edge_signals} (min edge: ${minEdgePp}pp)`);
49
73
  if (result.edge_signals > 0) {
50
- lines.push(` Hit rate ${(result.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%]`);
51
- lines.push(` Flat-bet P&L ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (ROI: ${result.flat_bet_roi >= 0 ? '+' : ''}${(result.flat_bet_roi * 100).toFixed(1)}%)`);
52
- lines.push(` Capital deployed $${result.total_capital.toFixed(2)} (capital-weighted ROI)`);
74
+ // Resolved settles at 0/100 realized. Unresolved is marked to the
75
+ // current Kalshi price paper P&L that can reverse. Splitting them
76
+ // makes it visible when one leg is carrying a weak other.
77
+ const r = result.resolved_metrics;
78
+ const u = result.unresolved_metrics;
79
+ if (r.edge_signals > 0) {
80
+ lines.push('');
81
+ lines.push(' RESOLVED (realized P&L)');
82
+ lines.push(` Hit rate ${(r.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(r.hit_rate_ci[0] * 100).toFixed(1)}% to ${(r.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered] n=${r.edge_signals}`);
83
+ lines.push(` Flat-bet P&L ${fmtRoi(r.flat_bet_roi)} ROI (${r.flat_bet_pnl >= 0 ? '+' : ''}$${r.flat_bet_pnl.toFixed(2)} on $${r.total_capital.toFixed(2)} capital)`);
84
+ }
85
+ if (u.edge_signals > 0) {
86
+ lines.push('');
87
+ lines.push(' UNRESOLVED (mark-to-market — paper P&L)');
88
+ lines.push(` Directional drift ${(u.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(u.hit_rate_ci[0] * 100).toFixed(1)}% to ${(u.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered] n=${u.edge_signals}`);
89
+ lines.push(` M2M P&L ${fmtRoi(u.flat_bet_roi)} ROI (${u.flat_bet_pnl >= 0 ? '+' : ''}$${u.flat_bet_pnl.toFixed(2)} on $${u.total_capital.toFixed(2)} capital)`);
90
+ }
91
+ if (r.edge_signals > 0 && u.edge_signals > 0) {
92
+ lines.push('');
93
+ lines.push(' COMBINED (both legs blended — interpret with care)');
94
+ lines.push(` Hit rate ${(result.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered]`);
95
+ lines.push(` Flat-bet P&L ${fmtRoi(result.flat_bet_roi)} ROI (${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} on $${result.total_capital.toFixed(2)} capital)`);
96
+ }
97
+ // Fee drag — show only when --fees is on so existing output is unchanged.
98
+ if (result.fee_model !== 'none' && result.flat_bet_pnl !== result.flat_bet_pnl_net) {
99
+ const feeDrag = result.flat_bet_pnl - result.flat_bet_pnl_net;
100
+ lines.push('');
101
+ lines.push(` Fees applied (${result.fee_model})`);
102
+ lines.push(` Gross P&L ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (${fmtRoi(result.flat_bet_roi)} ROI)`);
103
+ lines.push(` Fee drag -$${feeDrag.toFixed(2)}`);
104
+ lines.push(` Net P&L ${result.flat_bet_pnl_net >= 0 ? '+' : ''}$${result.flat_bet_pnl_net.toFixed(2)} (${fmtRoi(result.flat_bet_roi_net)} ROI)`);
105
+ } else if (r.edge_signals === 0 && u.edge_signals === 0) {
106
+ // No edge signals on either leg — fall back to the old single-line view.
107
+ lines.push(` Hit rate ${(result.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%]`);
108
+ lines.push(` Flat-bet P&L ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (ROI: ${fmtRoi(result.flat_bet_roi)})`);
109
+ }
110
+ }
111
+
112
+ // ─── Zero-skill baselines ─────────────────────────────────────────────
113
+ // The headline ROI / hit rate can look strong purely from the universe's
114
+ // structural NO tilt (multi-outcome events resolve mostly NO). These two
115
+ // baselines run the same post-filter universe under zero-skill strategies
116
+ // so the user can see whether the model adds anything.
117
+ const b = result.baselines;
118
+ if (result.signals.length > 0) {
119
+ lines.push('');
120
+ lines.push(' Zero-skill baselines (same universe, no model):');
121
+ lines.push(` Always-NO ROI ${fmtRoi(b.always_no_roi)} hit rate ${(b.always_no_hit_rate * 100).toFixed(1)}%`);
122
+ lines.push(` Always-YES ROI ${fmtRoi(b.always_yes_roi)} hit rate ${(b.always_yes_hit_rate * 100).toFixed(1)}%`);
123
+ lines.push(` Within-band skill ${fmtPp(b.within_band_skill_pp)} (model NO-ROI minus always-NO ROI, capital-weighted across entry-price bands)`);
124
+ // Per-band breakdown when at least one band has model bets
125
+ if (b.within_band_breakdown.some((r) => r.n_model > 0)) {
126
+ lines.push('');
127
+ lines.push(' Per-band skill breakdown:');
128
+ lines.push(` ${'Band'.padEnd(8)} ${'Model NO ROI'.padStart(13)} ${'Always-NO ROI'.padStart(14)} ${'Delta'.padStart(9)} ${'n_model'.padStart(7)} ${'n_total'.padStart(7)}`);
129
+ for (const row of b.within_band_breakdown) {
130
+ if (row.n_universe === 0) continue;
131
+ const delta = `${row.skill_delta_pp >= 0 ? '+' : ''}${row.skill_delta_pp.toFixed(1)}pp`;
132
+ lines.push(` ${row.band.padEnd(8)} ${fmtRoi(row.model_no_roi).padStart(13)} ${fmtRoi(row.always_no_roi).padStart(14)} ${delta.padStart(9)} ${String(row.n_model).padStart(7)} ${String(row.n_universe).padStart(7)}`);
133
+ }
134
+ }
135
+ }
136
+
137
+ // Coverage cost of the strict (no lifetime-volume look-ahead) volume gate.
138
+ if (result.signals_dropped_no_volume > 0) {
139
+ lines.push('');
140
+ lines.push(` Signals dropped: ${result.signals_dropped_no_volume} (no per-contract volume in Octagon snapshot; lifetime-volume fallback removed to avoid look-ahead bias)`);
53
141
  }
54
142
 
55
143
  // Resolved detail table