kalshi-trading-bot-cli 2.1.7 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kalshi-trading-bot-cli",
3
- "version": "2.1.7",
3
+ "version": "2.1.8",
4
4
  "description": "Kalshi Trading Bot CLI - AI-powered prediction market terminal.",
5
5
  "license": "MIT",
6
6
  "author": "Octagon AI, Inc.",
@@ -1,9 +1,92 @@
1
1
  import type { Database } from 'bun:sqlite';
2
2
  import { callKalshiApi } from '../tools/kalshi/api.js';
3
3
  import type { KalshiMarket } from '../tools/kalshi/types.js';
4
+ import { fetchAllOctagonEvents } from '../scan/octagon-events-api.js';
4
5
 
5
6
  const CONCURRENCY = 10;
6
7
 
8
+ /** Where the backtest universe is sourced from. */
9
+ export type UniverseSource = 'api' | 'local';
10
+
11
+ export interface UniverseEntry {
12
+ event_ticker: string;
13
+ category: string | null;
14
+ }
15
+
16
+ export interface Universe {
17
+ events: UniverseEntry[];
18
+ source: UniverseSource;
19
+ description: string;
20
+ }
21
+
22
+ /**
23
+ * Resolve the backtest universe (the set of events scored).
24
+ *
25
+ * `api` (default): paginate Octagon's covered-events list — systematic,
26
+ * reproducible across machines, doesn't depend on whatever this install
27
+ * happened to analyze in the past. Uses fetchAllOctagonEvents directly;
28
+ * we deliberately do NOT pass hasHistory=true because a prior audit showed
29
+ * that flag silently dropped 373 of 662 events. The pipeline self-filters
30
+ * downstream: events with no usable snapshot return null from
31
+ * selectSnapshotByDate and are skipped cheaply.
32
+ *
33
+ * `local`: legacy behavior — pull from the local `octagon_reports` log.
34
+ * Reflects past usage of this machine, not a defined universe. Useful for
35
+ * offline runs and for comparing against historical backtests.
36
+ *
37
+ * KNOWN LIMITATION: the API returns *today's* covered universe, not the
38
+ * universe as of the entry date. Events dropped from coverage mid-window
39
+ * vanish from the backtest — survivorship at the universe level. A true
40
+ * point-in-time universe requires `events?as_of=<date>` upstream.
41
+ */
42
+ export async function resolveUniverse(
43
+ db: Database,
44
+ opts?: { source?: UniverseSource; category?: string },
45
+ ): Promise<Universe> {
46
+ const source = opts?.source ?? 'api';
47
+ if (source === 'api') {
48
+ const all = await fetchAllOctagonEvents();
49
+ let events: UniverseEntry[] = all.map((e) => ({
50
+ event_ticker: e.event_ticker,
51
+ category: e.series_category ?? null,
52
+ }));
53
+ if (opts?.category) {
54
+ const needle = opts.category.toLowerCase();
55
+ events = events.filter((e) => e.category?.toLowerCase().includes(needle));
56
+ }
57
+ return {
58
+ events,
59
+ source,
60
+ description: `${events.length} events from Octagon API (systematic universe)`,
61
+ };
62
+ }
63
+ // Legacy local path
64
+ const { query, params } = buildEventQuery('', opts?.category);
65
+ const rows = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
66
+ return {
67
+ events: rows.map((r) => ({ event_ticker: r.event_ticker, category: r.category })),
68
+ source,
69
+ description: `${rows.length} events from local octagon_reports (NON-SYSTEMATIC — reflects past usage of this machine)`,
70
+ };
71
+ }
72
+
73
+ /**
74
+ * Fetch the Kalshi event payload for each event in the universe, once.
75
+ * Returns a map keyed by event_ticker. Both discoverSettledMarkets and
76
+ * discoverOpenMarkets can read from this single map instead of each
77
+ * re-fetching every event payload — halves the Kalshi call count.
78
+ */
79
+ export async function fetchEventPayloads(
80
+ universe: UniverseEntry[],
81
+ ): Promise<Map<string, KalshiMarket[]>> {
82
+ const out = new Map<string, KalshiMarket[]>();
83
+ await parallelMap(universe, async (entry) => {
84
+ const markets = await fetchEventMarkets(entry.event_ticker);
85
+ out.set(entry.event_ticker, markets);
86
+ }, CONCURRENCY);
87
+ return out;
88
+ }
89
+
7
90
  export interface SettledMarket {
8
91
  ticker: string;
9
92
  event_ticker: string;
@@ -103,13 +186,23 @@ export async function parallelMap<T, R>(
103
186
  */
104
187
  export async function discoverSettledMarkets(
105
188
  db: Database,
106
- opts?: { category?: string },
189
+ opts?: {
190
+ category?: string;
191
+ /** Pre-resolved universe + payloads (Phase 4 path). When omitted, falls back to the legacy local SQL path. */
192
+ universe?: Universe;
193
+ payloads?: Map<string, KalshiMarket[]>;
194
+ },
107
195
  ): Promise<SettledMarket[]> {
108
- const { query, params } = buildEventQuery('', opts?.category);
109
- const events = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
196
+ let events: Array<{ event_ticker: string; category: string | null }>;
197
+ if (opts?.universe) {
198
+ events = opts.universe.events;
199
+ } else {
200
+ const { query, params } = buildEventQuery('', opts?.category);
201
+ events = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
202
+ }
110
203
 
111
204
  const batchResults = await parallelMap(events, async ({ event_ticker, category: cat }) => {
112
- const markets = await fetchEventMarkets(event_ticker);
205
+ const markets = opts?.payloads?.get(event_ticker) ?? await fetchEventMarkets(event_ticker);
113
206
  const settled: SettledMarket[] = [];
114
207
 
115
208
  for (const m of markets) {
@@ -137,13 +230,22 @@ export async function discoverSettledMarkets(
137
230
  */
138
231
  export async function discoverOpenMarkets(
139
232
  db: Database,
140
- opts?: { category?: string },
233
+ opts?: {
234
+ category?: string;
235
+ universe?: Universe;
236
+ payloads?: Map<string, KalshiMarket[]>;
237
+ },
141
238
  ): Promise<OpenMarket[]> {
142
- const { query: q2, params: p2 } = buildEventQuery('', opts?.category);
143
- const events2 = db.query(q2).all(p2) as Array<{ event_ticker: string; category: string | null }>;
239
+ let events2: Array<{ event_ticker: string; category: string | null }>;
240
+ if (opts?.universe) {
241
+ events2 = opts.universe.events;
242
+ } else {
243
+ const { query: q2, params: p2 } = buildEventQuery('', opts?.category);
244
+ events2 = db.query(q2).all(p2) as Array<{ event_ticker: string; category: string | null }>;
245
+ }
144
246
 
145
247
  const batchResults = await parallelMap(events2, async ({ event_ticker, category: cat }) => {
146
- const markets = await fetchEventMarkets(event_ticker);
248
+ const markets = opts?.payloads?.get(event_ticker) ?? await fetchEventMarkets(event_ticker);
147
249
  const open: OpenMarket[] = [];
148
250
 
149
251
  for (const m of markets) {
@@ -1,4 +1,4 @@
1
- import type { ScoredSignal, BacktestResult } from './types.js';
1
+ import type { ScoredSignal, BacktestResult, LegMetrics } from './types.js';
2
2
 
3
3
  /**
4
4
  * Skill score: how much better Octagon is vs the market as a forecaster.
@@ -44,6 +44,68 @@ export function bootstrapCI(
44
44
  return [stats[lo], stats[hi]];
45
45
  }
46
46
 
47
+ /**
48
+ * Cluster bootstrap — resamples GROUPS with replacement, not individual rows.
49
+ *
50
+ * Use when the unit of risk is the group, not the row. In our case Kalshi
51
+ * events are multi-outcome: a single "Will the Fed cut N times?" event has
52
+ * a ladder of NO contracts that all settle together. If the model bets NO
53
+ * on five rungs and the Fed cuts once, all five settle NO simultaneously
54
+ * — that's *one* underlying outcome contributing five rows.
55
+ *
56
+ * Row-level bootstrap (`bootstrapCI` above) treats those rows as independent
57
+ * → CI width shrinks with √N where N is the row count. Real effective N is
58
+ * closer to the event count, so the honest interval is roughly 2× wider.
59
+ *
60
+ * This function takes `groups` (each group = a contiguous block of indices
61
+ * referring to per-row data carried in closures by `statFn`), draws
62
+ * `groups.length` groups with replacement per iteration, concatenates the
63
+ * indices, and applies `statFn` to the pooled sample.
64
+ */
65
+ export function clusterBootstrapCI(
66
+ groups: number[][],
67
+ statFn: (sampleIndices: number[]) => number,
68
+ iterations = 10_000,
69
+ alpha = 0.05,
70
+ ): [number, number] {
71
+ if (groups.length === 0) return [0, 0];
72
+ if (!Number.isFinite(iterations) || !Number.isInteger(iterations) || iterations <= 0) {
73
+ throw new Error(`clusterBootstrapCI: iterations must be a finite integer > 0, got ${iterations}`);
74
+ }
75
+ if (!Number.isFinite(alpha) || alpha <= 0 || alpha >= 1) {
76
+ throw new Error(`clusterBootstrapCI: alpha must be a finite number in (0, 1), got ${alpha}`);
77
+ }
78
+ const stats: number[] = [];
79
+ for (let i = 0; i < iterations; i++) {
80
+ const pooled: number[] = [];
81
+ for (let j = 0; j < groups.length; j++) {
82
+ const g = groups[Math.floor(Math.random() * groups.length)];
83
+ pooled.push(...g);
84
+ }
85
+ if (pooled.length === 0) { stats.push(0); continue; }
86
+ stats.push(statFn(pooled));
87
+ }
88
+ stats.sort((a, b) => a - b);
89
+ const lo = Math.min(Math.max(0, Math.floor((alpha / 2) * stats.length)), stats.length - 1);
90
+ const hi = Math.min(Math.max(0, Math.floor((1 - alpha / 2) * stats.length)), stats.length - 1);
91
+ return [stats[lo], stats[hi]];
92
+ }
93
+
94
+ /**
95
+ * Build event-clustered groups: indices for each signal grouped by event_ticker.
96
+ * Used to feed clusterBootstrapCI when the underlying signals are correlated
97
+ * within an event (multi-outcome ladders, mutually-exclusive option sets).
98
+ */
99
+ function groupIndicesByEvent<T extends { event_ticker: string }>(items: T[]): number[][] {
100
+ const byEvent = new Map<string, number[]>();
101
+ items.forEach((item, idx) => {
102
+ const arr = byEvent.get(item.event_ticker) ?? [];
103
+ arr.push(idx);
104
+ byEvent.set(item.event_ticker, arr);
105
+ });
106
+ return [...byEvent.values()];
107
+ }
108
+
47
109
  /**
48
110
  * Compute Brier score: ((forecast/100) - (outcome/100))²
49
111
  * Both forecast and outcome are on 0-100 scale.
@@ -52,10 +114,162 @@ function brier(forecast: number, outcome: number): number {
52
114
  return ((forecast / 100) - (outcome / 100)) ** 2;
53
115
  }
54
116
 
117
+ /** Entry-price bands used by the within-band skill calculation. */
118
+ const PRICE_BANDS: Array<{ label: string; lo: number; hi: number }> = [
119
+ { label: '5-20¢', lo: 5, hi: 20 },
120
+ { label: '20-40¢', lo: 20, hi: 40 },
121
+ { label: '40-60¢', lo: 40, hi: 60 },
122
+ { label: '60-80¢', lo: 60, hi: 80 },
123
+ { label: '80-95¢', lo: 80, hi: 95 },
124
+ ];
125
+
126
+ /**
127
+ * Compute zero-skill baselines on the same post-filter universe as the model.
128
+ *
129
+ * Why: Kalshi events are multi-outcome — most resolved contracts settle NO
130
+ * because each event has one YES outcome and many NOs. A model that
131
+ * consistently picks NO will hit ~75% by structure alone. The "always-NO"
132
+ * baseline strips that structural tilt out so we can see whether the model
133
+ * has any selection skill beyond the universe's bias.
134
+ *
135
+ * Within-band skill: model NO-bet ROI minus always-NO ROI, computed inside
136
+ * entry-price buckets and capital-weighted across buckets. This also
137
+ * controls for the entry-price mix — a model that only bets cheap
138
+ * longshots will look great vs. an always-NO baseline run over the full
139
+ * universe, but mediocre once we compare within the same price band.
140
+ */
141
+ function computeBaselines(signals: ScoredSignal[]): BacktestResult['baselines'] {
142
+ // Universe-wide always-NO / always-YES on the same post-filter rows.
143
+ const noPnl = (s: ScoredSignal): { pnl: number; capital: number; hit: boolean } => {
144
+ const capital = (100 - s.market_then) / 100;
145
+ const settlement = 100 - s.market_now;
146
+ const pnl = (settlement - (100 - s.market_then)) / 100;
147
+ return { pnl, capital, hit: s.market_now < s.market_then };
148
+ };
149
+ const yesPnl = (s: ScoredSignal): { pnl: number; capital: number; hit: boolean } => {
150
+ const capital = s.market_then / 100;
151
+ const pnl = (s.market_now - s.market_then) / 100;
152
+ return { pnl, capital, hit: s.market_now > s.market_then };
153
+ };
154
+
155
+ let noP = 0, noC = 0, noHits = 0;
156
+ let yesP = 0, yesC = 0, yesHits = 0;
157
+ for (const s of signals) {
158
+ const n = noPnl(s); noP += n.pnl; noC += n.capital; if (n.hit) noHits++;
159
+ const y = yesPnl(s); yesP += y.pnl; yesC += y.capital; if (y.hit) yesHits++;
160
+ }
161
+ const alwaysNoRoi = noC > 0 ? noP / noC : 0;
162
+ const alwaysYesRoi = yesC > 0 ? yesP / yesC : 0;
163
+ const alwaysNoHitRate = signals.length > 0 ? noHits / signals.length : 0;
164
+ const alwaysYesHitRate = signals.length > 0 ? yesHits / signals.length : 0;
165
+
166
+ // Within-band: bucket by entry price, compute model-NO-bet ROI minus
167
+ // always-NO ROI per band, then capital-weight the deltas across bands.
168
+ // The model's NO bets are the meaningful comparable population (the
169
+ // structural NO tilt is the dominant source of "skill" in the universe).
170
+ const breakdown: BacktestResult['baselines']['within_band_breakdown'] = [];
171
+ let weightedDeltaNumer = 0;
172
+ let weightedDeltaDenom = 0;
173
+ for (const band of PRICE_BANDS) {
174
+ const inBand = signals.filter((s) => s.market_then >= band.lo && s.market_then < band.hi);
175
+ if (inBand.length === 0) continue;
176
+
177
+ let bandModelPnl = 0, bandModelCap = 0, bandModelN = 0;
178
+ let bandNoPnl = 0, bandNoCap = 0;
179
+ for (const s of inBand) {
180
+ const n = noPnl(s); bandNoPnl += n.pnl; bandNoCap += n.capital;
181
+ // Model "bet" here = signal where model picked NO (edge_pp < 0) and
182
+ // capital is the NO-side capital we already stored in s.capital.
183
+ if (s.edge_pp < 0) {
184
+ bandModelPnl += s.pnl;
185
+ bandModelCap += s.capital;
186
+ bandModelN++;
187
+ }
188
+ }
189
+ const modelRoi = bandModelCap > 0 ? bandModelPnl / bandModelCap : 0;
190
+ const baselineRoi = bandNoCap > 0 ? bandNoPnl / bandNoCap : 0;
191
+ const deltaPp = (modelRoi - baselineRoi) * 100;
192
+ breakdown.push({
193
+ band: band.label,
194
+ model_no_roi: modelRoi,
195
+ always_no_roi: baselineRoi,
196
+ skill_delta_pp: deltaPp,
197
+ n_model: bandModelN,
198
+ n_universe: inBand.length,
199
+ });
200
+ weightedDeltaNumer += deltaPp * bandModelCap;
201
+ weightedDeltaDenom += bandModelCap;
202
+ }
203
+ const withinBandSkillPp = weightedDeltaDenom > 0 ? weightedDeltaNumer / weightedDeltaDenom : 0;
204
+
205
+ return {
206
+ always_no_roi: alwaysNoRoi,
207
+ always_no_hit_rate: alwaysNoHitRate,
208
+ always_yes_roi: alwaysYesRoi,
209
+ always_yes_hit_rate: alwaysYesHitRate,
210
+ within_band_skill_pp: withinBandSkillPp,
211
+ within_band_breakdown: breakdown,
212
+ };
213
+ }
214
+
215
+ /**
216
+ * Compute the scorecard for one leg (resolved-only or unresolved-only).
217
+ * Same hit-rate and capital-weighted ROI definitions as the blended
218
+ * computation, just scoped to the subset.
219
+ */
220
+ function computeLegMetrics(signals: ScoredSignal[], minEdgePp: number): LegMetrics {
221
+ const edgeSignals = signals.filter((s) => s.edge_pp !== 0 && Math.abs(s.edge_pp) >= minEdgePp);
222
+ const edgeCount = edgeSignals.length;
223
+ const hits = edgeSignals.filter((s) =>
224
+ s.edge_pp > 0 ? s.market_now > s.market_then : s.market_now < s.market_then,
225
+ );
226
+ const hitRate = edgeCount > 0 ? hits.length / edgeCount : 0;
227
+ const hitRateData = edgeSignals.map((s) =>
228
+ s.edge_pp > 0 ? (s.market_now > s.market_then ? 1 : 0) : (s.market_now < s.market_then ? 1 : 0),
229
+ );
230
+ const legEventGroups = groupIndicesByEvent(edgeSignals);
231
+ const hitRateCI: [number, number] = edgeCount > 0
232
+ ? clusterBootstrapCI(legEventGroups, (sample) => {
233
+ let sum = 0;
234
+ for (const idx of sample) sum += hitRateData[idx];
235
+ return sample.length > 0 ? sum / sample.length : 0;
236
+ })
237
+ : [0, 0];
238
+ const pnl = edgeSignals.reduce((sum, s) => sum + s.pnl, 0);
239
+ const totalCapital = edgeSignals.reduce((sum, s) => sum + s.capital, 0);
240
+ const roi = totalCapital > 0 ? pnl / totalCapital : 0;
241
+ return {
242
+ edge_signals: edgeCount,
243
+ edge_hit_rate: hitRate,
244
+ hit_rate_ci: hitRateCI,
245
+ flat_bet_pnl: pnl,
246
+ flat_bet_roi: roi,
247
+ total_capital: totalCapital,
248
+ };
249
+ }
250
+
251
+ const EMPTY_LEG: LegMetrics = {
252
+ edge_signals: 0,
253
+ edge_hit_rate: 0,
254
+ hit_rate_ci: [0, 0],
255
+ flat_bet_pnl: 0,
256
+ flat_bet_roi: 0,
257
+ total_capital: 0,
258
+ };
259
+
260
+ const EMPTY_BASELINES: BacktestResult['baselines'] = {
261
+ always_no_roi: 0,
262
+ always_no_hit_rate: 0,
263
+ always_yes_roi: 0,
264
+ always_yes_hit_rate: 0,
265
+ within_band_skill_pp: 0,
266
+ within_band_breakdown: [],
267
+ };
268
+
55
269
  /**
56
270
  * Compute all backtest metrics from a unified list of scored signals.
57
271
  */
58
- export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<BacktestResult, 'subscription_notice'> {
272
+ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<BacktestResult, 'subscription_notice' | 'signals_dropped_no_volume' | 'universe_source' | 'universe_size' | 'universe_description' | 'fee_model' | 'flat_bet_pnl_net' | 'flat_bet_roi_net'> {
59
273
  const n = signals.length;
60
274
  if (n === 0) {
61
275
  return {
@@ -75,6 +289,9 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
75
289
  flat_bet_roi: 0,
76
290
  total_capital: 0,
77
291
  signals: [],
292
+ baselines: EMPTY_BASELINES,
293
+ resolved_metrics: EMPTY_LEG,
294
+ unresolved_metrics: EMPTY_LEG,
78
295
  };
79
296
  }
80
297
 
@@ -84,10 +301,14 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
84
301
  const brierOctagon = brierOctagonScores.reduce((a, b) => a + b, 0) / n;
85
302
  const brierMarket = brierMarketScores.reduce((a, b) => a + b, 0) / n;
86
303
 
87
- // Skill score with bootstrap CI resample both
304
+ // Skill score with EVENT-CLUSTERED bootstrap CI. Why clustered: multi-
305
+ // outcome events (Fed-cut ladders, election option sets, price strikes)
306
+ // settle as a block — N contracts from one event aren't N independent
307
+ // observations. Row-level bootstrap shrinks the CI with sqrt(N rows)
308
+ // when the right denominator is sqrt(N events).
88
309
  const skillScore = computeSkillScore(brierOctagon, brierMarket);
89
- const indices = signals.map((_, i) => i);
90
- const skillCI = bootstrapCI(indices, (sample) => {
310
+ const eventGroups = groupIndicesByEvent(signals);
311
+ const skillCI = clusterBootstrapCI(eventGroups, (sample) => {
91
312
  let sumOctagon = 0;
92
313
  let sumMarket = 0;
93
314
  for (const idx of sample) {
@@ -112,13 +333,16 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
112
333
  });
113
334
  const hitRate = edgeCount > 0 ? hits.length / edgeCount : 0;
114
335
 
115
- // Bootstrap hit rate CI
336
+ // Event-clustered hit rate CI on the EDGE signals only.
116
337
  const hitRateData = edgeSignals.map(s => {
117
338
  if (s.edge_pp > 0) return s.market_now > s.market_then ? 1 : 0;
118
339
  return s.market_now < s.market_then ? 1 : 0;
119
340
  });
120
- const hitRateCI = bootstrapCI(hitRateData, (sample) => {
121
- return sample.reduce((a, b) => a + b, 0) / sample.length;
341
+ const edgeEventGroups = groupIndicesByEvent(edgeSignals);
342
+ const hitRateCI = clusterBootstrapCI(edgeEventGroups, (sample) => {
343
+ let sum = 0;
344
+ for (const idx of sample) sum += hitRateData[idx];
345
+ return sample.length > 0 ? sum / sample.length : 0;
122
346
  });
123
347
 
124
348
  // P&L and capital-weighted ROI (matches Supabase methodology):
@@ -161,5 +385,8 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
161
385
  flat_bet_roi: roi,
162
386
  total_capital: totalCapital,
163
387
  signals,
388
+ baselines: computeBaselines(signals),
389
+ resolved_metrics: computeLegMetrics(signals.filter((s) => s.resolved), minEdgePp),
390
+ unresolved_metrics: computeLegMetrics(signals.filter((s) => !s.resolved), minEdgePp),
164
391
  };
165
392
  }
@@ -5,6 +5,16 @@ export interface FormatOpts {
5
5
  minEdge?: number; // 0-1 scale, default 0.005 (0.5pp)
6
6
  }
7
7
 
8
+ /** Format a 0-1 ROI as a signed percentage string. */
9
+ function fmtRoi(roi: number): string {
10
+ return `${roi >= 0 ? '+' : ''}${(roi * 100).toFixed(1)}%`;
11
+ }
12
+
13
+ /** Format a percentage-point delta with sign. */
14
+ function fmtPp(pp: number): string {
15
+ return `${pp >= 0 ? '+' : ''}${pp.toFixed(1)}pp`;
16
+ }
17
+
8
18
  /**
9
19
  * Format complete backtest result for terminal display.
10
20
  */
@@ -17,6 +27,20 @@ export function formatBacktestHuman(result: BacktestResult, opts?: FormatOpts):
17
27
 
18
28
  const lines: string[] = [];
19
29
  lines.push(`Octagon Backtest — ${result.days}-day lookback (${fromStr} – ${toStr})`);
30
+ lines.push(`Universe: ${result.universe_description}`);
31
+ let feeHeader: string;
32
+ switch (result.fee_model) {
33
+ case 'none':
34
+ feeHeader = 'none — output is GROSS (pre-fee)';
35
+ break;
36
+ case 'taker':
37
+ feeHeader = 'taker (entries charged Kalshi taker fee = 0.07·p·(1−p))';
38
+ break;
39
+ case 'maker':
40
+ feeHeader = 'maker (free-entry execution assumption — net P&L equals gross)';
41
+ break;
42
+ }
43
+ lines.push(`Fee model: ${feeHeader}`);
20
44
  lines.push('══════════════════════════════════════════════════════════');
21
45
  lines.push('');
22
46
 
@@ -47,9 +71,73 @@ export function formatBacktestHuman(result: BacktestResult, opts?: FormatOpts):
47
71
  // lines.push('');
48
72
  lines.push(` Edge signals ${result.edge_signals} (min edge: ${minEdgePp}pp)`);
49
73
  if (result.edge_signals > 0) {
50
- lines.push(` Hit rate ${(result.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%]`);
51
- lines.push(` Flat-bet P&L ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (ROI: ${result.flat_bet_roi >= 0 ? '+' : ''}${(result.flat_bet_roi * 100).toFixed(1)}%)`);
52
- lines.push(` Capital deployed $${result.total_capital.toFixed(2)} (capital-weighted ROI)`);
74
+ // Resolved settles at 0/100 realized. Unresolved is marked to the
75
+ // current Kalshi price paper P&L that can reverse. Splitting them
76
+ // makes it visible when one leg is carrying a weak other.
77
+ const r = result.resolved_metrics;
78
+ const u = result.unresolved_metrics;
79
+ if (r.edge_signals > 0) {
80
+ lines.push('');
81
+ lines.push(' RESOLVED (realized P&L)');
82
+ lines.push(` Hit rate ${(r.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(r.hit_rate_ci[0] * 100).toFixed(1)}% to ${(r.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered] n=${r.edge_signals}`);
83
+ lines.push(` Flat-bet P&L ${fmtRoi(r.flat_bet_roi)} ROI (${r.flat_bet_pnl >= 0 ? '+' : ''}$${r.flat_bet_pnl.toFixed(2)} on $${r.total_capital.toFixed(2)} capital)`);
84
+ }
85
+ if (u.edge_signals > 0) {
86
+ lines.push('');
87
+ lines.push(' UNRESOLVED (mark-to-market — paper P&L)');
88
+ lines.push(` Directional drift ${(u.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(u.hit_rate_ci[0] * 100).toFixed(1)}% to ${(u.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered] n=${u.edge_signals}`);
89
+ lines.push(` M2M P&L ${fmtRoi(u.flat_bet_roi)} ROI (${u.flat_bet_pnl >= 0 ? '+' : ''}$${u.flat_bet_pnl.toFixed(2)} on $${u.total_capital.toFixed(2)} capital)`);
90
+ }
91
+ if (r.edge_signals > 0 && u.edge_signals > 0) {
92
+ lines.push('');
93
+ lines.push(' COMBINED (both legs blended — interpret with care)');
94
+ lines.push(` Hit rate ${(result.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered]`);
95
+ lines.push(` Flat-bet P&L ${fmtRoi(result.flat_bet_roi)} ROI (${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} on $${result.total_capital.toFixed(2)} capital)`);
96
+ }
97
+ // Fee drag — show only when --fees is on so existing output is unchanged.
98
+ if (result.fee_model !== 'none' && result.flat_bet_pnl !== result.flat_bet_pnl_net) {
99
+ const feeDrag = result.flat_bet_pnl - result.flat_bet_pnl_net;
100
+ lines.push('');
101
+ lines.push(` Fees applied (${result.fee_model})`);
102
+ lines.push(` Gross P&L ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (${fmtRoi(result.flat_bet_roi)} ROI)`);
103
+ lines.push(` Fee drag -$${feeDrag.toFixed(2)}`);
104
+ lines.push(` Net P&L ${result.flat_bet_pnl_net >= 0 ? '+' : ''}$${result.flat_bet_pnl_net.toFixed(2)} (${fmtRoi(result.flat_bet_roi_net)} ROI)`);
105
+ } else if (r.edge_signals === 0 && u.edge_signals === 0) {
106
+ // No edge signals on either leg — fall back to the old single-line view.
107
+ lines.push(` Hit rate ${(result.edge_hit_rate * 100).toFixed(1)}% [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%]`);
108
+ lines.push(` Flat-bet P&L ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (ROI: ${fmtRoi(result.flat_bet_roi)})`);
109
+ }
110
+ }
111
+
112
+ // ─── Zero-skill baselines ─────────────────────────────────────────────
113
+ // The headline ROI / hit rate can look strong purely from the universe's
114
+ // structural NO tilt (multi-outcome events resolve mostly NO). These two
115
+ // baselines run the same post-filter universe under zero-skill strategies
116
+ // so the user can see whether the model adds anything.
117
+ const b = result.baselines;
118
+ if (result.signals.length > 0) {
119
+ lines.push('');
120
+ lines.push(' Zero-skill baselines (same universe, no model):');
121
+ lines.push(` Always-NO ROI ${fmtRoi(b.always_no_roi)} hit rate ${(b.always_no_hit_rate * 100).toFixed(1)}%`);
122
+ lines.push(` Always-YES ROI ${fmtRoi(b.always_yes_roi)} hit rate ${(b.always_yes_hit_rate * 100).toFixed(1)}%`);
123
+ lines.push(` Within-band skill ${fmtPp(b.within_band_skill_pp)} (model NO-ROI minus always-NO ROI, capital-weighted across entry-price bands)`);
124
+ // Per-band breakdown when at least one band has model bets
125
+ if (b.within_band_breakdown.some((r) => r.n_model > 0)) {
126
+ lines.push('');
127
+ lines.push(' Per-band skill breakdown:');
128
+ lines.push(` ${'Band'.padEnd(8)} ${'Model NO ROI'.padStart(13)} ${'Always-NO ROI'.padStart(14)} ${'Delta'.padStart(9)} ${'n_model'.padStart(7)} ${'n_total'.padStart(7)}`);
129
+ for (const row of b.within_band_breakdown) {
130
+ if (row.n_universe === 0) continue;
131
+ const delta = `${row.skill_delta_pp >= 0 ? '+' : ''}${row.skill_delta_pp.toFixed(1)}pp`;
132
+ lines.push(` ${row.band.padEnd(8)} ${fmtRoi(row.model_no_roi).padStart(13)} ${fmtRoi(row.always_no_roi).padStart(14)} ${delta.padStart(9)} ${String(row.n_model).padStart(7)} ${String(row.n_universe).padStart(7)}`);
133
+ }
134
+ }
135
+ }
136
+
137
+ // Coverage cost of the strict (no lifetime-volume look-ahead) volume gate.
138
+ if (result.signals_dropped_no_volume > 0) {
139
+ lines.push('');
140
+ lines.push(` Signals dropped: ${result.signals_dropped_no_volume} (no per-contract volume in Octagon snapshot; lifetime-volume fallback removed to avoid look-ahead bias)`);
53
141
  }
54
142
 
55
143
  // Resolved detail table
@@ -5,6 +5,10 @@ export interface BacktestOpts {
5
5
  category?: string;
6
6
  minEdge: number; // fractional (0-1 scale), converted to pp by caller (e.g., 0.005 → 0.5pp)
7
7
  exportPath?: string;
8
+ /** Where the universe is sourced from. Default 'api'. */
9
+ universe?: 'api' | 'local';
10
+ /** Fee model for net P&L. Default 'none' — output is gross. */
11
+ fees?: 'none' | 'taker' | 'maker';
8
12
  }
9
13
 
10
14
  /** A single scored market signal — unified type for both resolved and unresolved. */
@@ -16,7 +20,12 @@ export interface ScoredSignal {
16
20
  market_then: number; // 0-100 (Kalshi trading price N days ago, from Octagon snapshot)
17
21
  market_now: number; // 0-100 (settlement for resolved, current price for unresolved)
18
22
  resolved: boolean;
19
- edge_pp: number; // model_prob - market_then
23
+ /**
24
+ * Raw, unrounded edge in percentage points: model_prob − market_then.
25
+ * Filtering on |edge| should always use this value; display layers
26
+ * round to 0.1pp or 1pp as appropriate.
27
+ */
28
+ edge_pp: number;
20
29
  pnl: number; // computed P&L for this signal ($ per $1 face value)
21
30
  capital: number; // $ capital deployed per $1 face value: kp/100 for YES edges, (100-kp)/100 for NO edges
22
31
  edge_bucket: string; // absolute-edge bucket label e.g. "0-5%", "5-10%", ..., "90%+"
@@ -24,6 +33,19 @@ export interface ScoredSignal {
24
33
  close_time: string;
25
34
  }
26
35
 
36
+ /**
37
+ * Per-leg scorecard: realized P&L on the resolved leg, mark-to-market on the
38
+ * unresolved leg. Computed on the leg's subset of signals.
39
+ */
40
+ export interface LegMetrics {
41
+ edge_signals: number;
42
+ edge_hit_rate: number;
43
+ hit_rate_ci: [number, number];
44
+ flat_bet_pnl: number;
45
+ flat_bet_roi: number;
46
+ total_capital: number;
47
+ }
48
+
27
49
  export interface BacktestResult {
28
50
  verdict: { summary: string; significant: boolean; profitable: boolean };
29
51
  days: number;
@@ -41,5 +63,74 @@ export interface BacktestResult {
41
63
  flat_bet_roi: number; // capital-weighted: sum(pnl) / sum(capital) across edge signals
42
64
  total_capital: number; // sum of capital across edge signals (ROI denominator)
43
65
  signals: ScoredSignal[];
66
+ /**
67
+ * Count of candidate signals dropped because the Octagon snapshot had no
68
+ * per-contract volume (older snapshots predate the per-contract field).
69
+ * We deliberately do NOT fall back to Kalshi lifetime volume — that
70
+ * would be a look-ahead bias (lifetime includes post-entry trading).
71
+ * Surfaced so users can see the coverage cost of the strict gate.
72
+ */
73
+ signals_dropped_no_volume: number;
74
+ /**
75
+ * Provenance for the universe — printed in the scorecard header so users
76
+ * (and downstream JSON consumers) can see whether the backtest ran over
77
+ * the systematic Octagon-API universe or the legacy local-DB universe.
78
+ */
79
+ universe_source: 'api' | 'local';
80
+ universe_size: number;
81
+ universe_description: string;
82
+ /**
83
+ * Fee model applied to the P&L. 'none' means the reported P&L is gross
84
+ * (no fees, no spreads). 'taker' charges the Kalshi taker fee per entry.
85
+ * 'maker' assumes free entry. Default 'none' so existing output is
86
+ * unchanged — opt in with --fees taker.
87
+ */
88
+ fee_model: 'none' | 'taker' | 'maker';
89
+ /** P&L net of fees when fee_model != 'none', else equal to flat_bet_pnl. */
90
+ flat_bet_pnl_net: number;
91
+ flat_bet_roi_net: number;
92
+ /**
93
+ * Sub-scorecards computed on the resolved and unresolved legs separately.
94
+ * Resolved settles at 0/100 — realized outcomes. Unresolved is marked to
95
+ * an arbitrary "now" price and may reverse before settlement. Blending
96
+ * them in the top-level fields can hide cases where the paper P&L
97
+ * inflates a weak realized result.
98
+ *
99
+ * The blended top-level fields (`edge_hit_rate`, `flat_bet_roi`, etc.)
100
+ * are kept for backward compatibility with existing consumers.
101
+ */
102
+ resolved_metrics: LegMetrics;
103
+ unresolved_metrics: LegMetrics;
104
+ /**
105
+ * Zero-skill baseline ROIs on the same post-filter universe. Always-NO is
106
+ * the relevant null because Kalshi's universe is structurally NO-heavy:
107
+ * multi-outcome events have one YES and many NOs. A model that consistently
108
+ * beats always-NO has selection skill; one that doesn't is mostly
109
+ * harvesting the favorite-longshot tilt.
110
+ */
111
+ baselines: {
112
+ always_no_roi: number;
113
+ always_no_hit_rate: number;
114
+ always_yes_roi: number;
115
+ always_yes_hit_rate: number;
116
+ /**
117
+ * Model NO-bet ROI minus always-NO ROI, computed in entry-price bands
118
+ * (5-20, 20-40, 40-60, 60-80, 80-95) and capital-weighted across bands.
119
+ * This is the honest "within-band skill" delta: it controls for both
120
+ * the structural NO tilt AND the entry-price mix.
121
+ */
122
+ within_band_skill_pp: number;
123
+ /**
124
+ * Per-band breakdown so users can see where the skill (if any) comes from.
125
+ */
126
+ within_band_breakdown: Array<{
127
+ band: string; // e.g. "20-40¢"
128
+ model_no_roi: number; // model NO-bet ROI in this band
129
+ always_no_roi: number; // always-NO ROI in this band
130
+ skill_delta_pp: number; // (model - baseline) × 100, percentage points
131
+ n_model: number; // count of model NO bets in this band
132
+ n_universe: number; // count of all-NO universe contracts in this band
133
+ }>;
134
+ };
44
135
  subscription_notice?: string;
45
136
  }
@@ -115,12 +115,43 @@ function getVolume(m: KalshiMarket): number {
115
115
  return m.volume || 0;
116
116
  }
117
117
 
118
+ /**
119
+ * Normalize user input into a canonical Kalshi ticker.
120
+ *
121
+ * Accepts any of:
122
+ * - Bare ticker, any case: `kxmeasles-26`, `KXMEASLES-26`, `KxMeAsLeS-26`
123
+ * - Kalshi URL: `https://kalshi.com/markets/kxmeasles/measles-cases/kxmeasles-26`
124
+ * - URL without protocol: `kalshi.com/markets/kxmeasles-26`
125
+ * - URL with query / fragment: `…/kxmeasles-26?ref=foo#yes`
126
+ *
127
+ * Strategy: detect URL-shaped input, extract the last non-empty path segment
128
+ * (which by Kalshi convention is the ticker), then uppercase. Bare tickers
129
+ * are simply uppercased. Kalshi's path is case-sensitive — without this
130
+ * `/markets/kxmeasles-26` 404s even though the ticker exists.
131
+ */
132
+ export function normalizeKalshiInput(input: string): string {
133
+ const trimmed = input.trim();
134
+ const looksLikeUrl =
135
+ /^https?:\/\//i.test(trimmed) || /^(www\.)?kalshi\.com\//i.test(trimmed);
136
+ if (looksLikeUrl) {
137
+ const noProto = trimmed
138
+ .replace(/^https?:\/\/[^/]+/i, '')
139
+ .replace(/^(www\.)?kalshi\.com/i, '');
140
+ const path = noProto.replace(/[?#].*$/, '').replace(/\/+$/, '');
141
+ const segments = path.split('/').filter(Boolean);
142
+ const last = segments[segments.length - 1] ?? '';
143
+ if (last) return last.toUpperCase();
144
+ }
145
+ return trimmed.toUpperCase();
146
+ }
147
+
118
148
  /**
119
149
  * Resolve a user-provided ticker to a market ticker.
120
- * Accepts: market ticker, event ticker, or series ticker.
150
+ * Accepts: market ticker, event ticker, series ticker, or Kalshi URL.
121
151
  * Returns the resolved KalshiMarket (picking the most active open market for events/series).
122
152
  */
123
- export async function resolveMarket(input: string): Promise<KalshiMarket> {
153
+ export async function resolveMarket(rawInput: string): Promise<KalshiMarket> {
154
+ const input = normalizeKalshiInput(rawInput);
124
155
  // 1. Try as a market ticker first
125
156
  try {
126
157
  const res = await callKalshiApi('GET', `/markets/${input}`);
@@ -178,7 +209,7 @@ export async function resolveMarket(input: string): Promise<KalshiMarket> {
178
209
  if (!(err instanceof KalshiApiError && err.statusCode === 404)) throw err;
179
210
  }
180
211
 
181
- throw new Error(`Could not find a market for "${input}". Try a full market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), or series ticker (e.g. KXBTC).`);
212
+ throw new Error(`Could not find a market for "${rawInput}" (normalized to "${input}"). Try a market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), series ticker (e.g. KXBTC), or a Kalshi URL like https://kalshi.com/markets/<series>/<slug>/<event>.`);
182
213
  }
183
214
 
184
215
  export async function handleAnalyze(
@@ -390,11 +421,41 @@ export async function handleAnalyze(
390
421
  // This is the "Refreshed" date — what bumps when --refresh runs.
391
422
  // modelRunAt = Octagon's analysis_last_updated (when their model last
392
423
  // scored this event). Independent of our cache.
393
- const refreshedAt = latestDbReport
394
- ? new Date(latestDbReport.fetched_at * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
424
+ //
425
+ // Load timestamps from a single coherent source the row identified by
426
+ // report.reportId is the exact row used for THIS analysis. The previous
427
+ // implementation mixed fields from market-keyed and event-keyed rows
428
+ // (different captured runs), so refreshedAt and modelRunAt could refer
429
+ // to different snapshots.
430
+ //
431
+ // If the primary row doesn't carry analysis_last_updated (fetchReport
432
+ // path doesn't expose it), fall back to the latest event-keyed prefetch
433
+ // row for that field only — never for fetched_at.
434
+ const primaryRow = report.reportId
435
+ ? db.query(
436
+ `SELECT fetched_at, analysis_last_updated FROM octagon_reports WHERE report_id = $rid`,
437
+ ).get({ $rid: report.reportId }) as
438
+ | { fetched_at: number; analysis_last_updated: string | null }
439
+ | undefined
440
+ : undefined;
441
+ let fetchedAtEpoch = primaryRow?.fetched_at ?? null;
442
+ let analysisLastUpdated = primaryRow?.analysis_last_updated ?? null;
443
+ if ((!fetchedAtEpoch || !analysisLastUpdated) && eventTicker && eventTicker !== resolvedTicker) {
444
+ const eventRow = db.query(
445
+ `SELECT fetched_at, analysis_last_updated FROM octagon_reports
446
+ WHERE event_ticker = $et AND variant_used = 'events-api'
447
+ ORDER BY fetched_at DESC LIMIT 1`,
448
+ ).get({ $et: eventTicker }) as { fetched_at: number; analysis_last_updated: string | null } | undefined;
449
+ if (eventRow) {
450
+ fetchedAtEpoch = fetchedAtEpoch ?? eventRow.fetched_at;
451
+ analysisLastUpdated = analysisLastUpdated ?? eventRow.analysis_last_updated;
452
+ }
453
+ }
454
+ const refreshedAt = fetchedAtEpoch
455
+ ? new Date(fetchedAtEpoch * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
395
456
  : null;
396
- const modelRunAt = latestDbReport?.analysis_last_updated
397
- ? latestDbReport.analysis_last_updated.replace('T', ' ').slice(0, 16) + ' UTC'
457
+ const modelRunAt = analysisLastUpdated
458
+ ? analysisLastUpdated.replace('T', ' ').slice(0, 16) + ' UTC'
398
459
  : null;
399
460
 
400
461
  // hasModel + canComputeEdge were computed earlier (above Kelly/signal),
@@ -403,12 +464,13 @@ export async function handleAnalyze(
403
464
 
404
465
  // staleUpstream = user asked for --refresh but Octagon's upstream model run
405
466
  // timestamp didn't move. Cache fetch time bumped, but the underlying report
406
- // body is the same one Octagon previously generated. The user wanted fresh
407
- // analysis; they got an unchanged stale one.
467
+ // body is the same one Octagon previously generated. Compare against the
468
+ // same coherent source we used for modelRunAt above — otherwise we could
469
+ // false-positive on staleness when the two lookups disagreed.
408
470
  const staleUpstream = refresh
409
471
  && preRefreshAnalysis != null
410
- && latestDbReport?.analysis_last_updated != null
411
- && preRefreshAnalysis === latestDbReport.analysis_last_updated;
472
+ && analysisLastUpdated != null
473
+ && preRefreshAnalysis === analysisLastUpdated;
412
474
 
413
475
  // Null out trading-side fields when the underlying inputs are unavailable.
414
476
  // JSON consumers previously saw modelProb: 0.5 / marketProb: 0.5 / edge: 0
@@ -2,7 +2,7 @@ import type { ParsedArgs } from './parse-args.js';
2
2
  import type { CLIResponse } from './json.js';
3
3
  import { wrapSuccess } from './json.js';
4
4
  import { getDb } from '../db/index.js';
5
- import { discoverSettledMarkets, discoverOpenMarkets, parallelMap } from '../backtest/discovery.js';
5
+ import { discoverSettledMarkets, discoverOpenMarkets, parallelMap, resolveUniverse, fetchEventPayloads } from '../backtest/discovery.js';
6
6
  import { fetchAndCacheHistory, selectSnapshotByDate, SubscriptionRequiredError, type OutcomeProbability } from '../backtest/fetcher.js';
7
7
  import { computeMetrics } from '../backtest/metrics.js';
8
8
  import type { BacktestResult, ScoredSignal } from '../backtest/types.js';
@@ -37,21 +37,24 @@ function edgeBucketLabel(edgePp: number): string {
37
37
  }
38
38
 
39
39
  /**
40
- * Return the tradeable volume for a contract.
41
- * Prefers per-contract volume fields from the Octagon snapshot (as the
42
- * Supabase methodology does); falls back to Kalshi lifetime volume for
43
- * older cached snapshots that pre-date the API's per-contract volume.
40
+ * Return the tradeable volume for a contract, measured AT SNAPSHOT TIME.
41
+ *
42
+ * Returns null when the Octagon snapshot has no per-contract volume — older
43
+ * snapshots predate the per-contract field. We deliberately do NOT fall back
44
+ * to Kalshi LIFETIME volume here: lifetime volume includes trading that
45
+ * happened *after* the entry date, so a contract with zero liquidity at
46
+ * entry that later became active would silently pass the tradeability gate
47
+ * retroactively (look-ahead bias on the tradeable filter).
48
+ *
49
+ * Callers should skip the signal when this returns null and count it as
50
+ * "dropped via no per-contract volume" so the coverage cost is visible.
44
51
  */
45
- function contractVolume(
46
- perContract: OutcomeProbability | null,
47
- fallbackLifetimeVolume: number,
48
- ): number {
49
- if (perContract) {
50
- const v = typeof perContract.volume === 'number' ? perContract.volume : null;
51
- const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
52
- if (v !== null || v24 !== null) return Math.max(v ?? 0, v24 ?? 0);
53
- }
54
- return fallbackLifetimeVolume;
52
+ function contractVolume(perContract: OutcomeProbability | null): number | null {
53
+ if (!perContract) return null;
54
+ const v = typeof perContract.volume === 'number' ? perContract.volume : null;
55
+ const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
56
+ if (v === null && v24 === null) return null;
57
+ return Math.max(v ?? 0, v24 ?? 0);
55
58
  }
56
59
 
57
60
  export { formatBacktestHuman };
@@ -74,11 +77,26 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
74
77
 
75
78
  const signals: ScoredSignal[] = [];
76
79
  let subscriptionNotice: string | undefined;
80
+ // Counter for signals dropped because the Octagon snapshot had no
81
+ // per-contract volume. Surfaced in the result so users can see how much
82
+ // coverage the strict (no lifetime-volume look-ahead) gate cost them.
83
+ let signalsDroppedNoVolume = 0;
84
+
85
+ // ─── UNIVERSE RESOLUTION (Phase 4, Issue 7) ────────────────────────────
86
+ // Resolve once and share the Kalshi event-payload map between both legs
87
+ // so we fetch each event payload only once instead of twice. The
88
+ // payloads map is built lazily — we only fetch when at least one leg
89
+ // will use it.
90
+ const universeSource = args.backtestUniverse ?? 'api';
91
+ const universe = await resolveUniverse(db, { source: universeSource, category: args.category });
92
+ // Fetch all event payloads once and share between legs. parallelMap
93
+ // caps concurrency so this doesn't hammer Kalshi.
94
+ const payloads = await fetchEventPayloads(universe.events);
77
95
 
78
96
  // ─── RESOLVED: settled markets with historical Octagon snapshots ────────
79
97
  if (!args.unresolved) {
80
98
  try {
81
- const settled = await discoverSettledMarkets(db, { category: args.category });
99
+ const settled = await discoverSettledMarkets(db, { universe, payloads, category: args.category });
82
100
 
83
101
  if (settled.length > 0) {
84
102
  // Group by event_ticker to batch history fetches
@@ -115,12 +133,17 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
115
133
  const marketThen = perMarket.market_probability;
116
134
  if (!Number.isFinite(modelProb) || !Number.isFinite(marketThen)) continue;
117
135
  const marketNow = m.result === 'yes' ? 100 : 0;
118
- const edgePp = Math.round((modelProb - marketThen) * 10) / 10;
136
+ // Unrounded edge filtering happens downstream against the
137
+ // raw value. Display layer rounds for presentation. Rounding
138
+ // here makes the minEdge filter asymmetric (0.449 rounds to 0.4
139
+ // and is excluded; 0.451 rounds to 0.5 and is included).
140
+ const edgePp = modelProb - marketThen;
119
141
 
120
- // Tradeable filter — per-contract volume from the Octagon snapshot
121
- // (matches Supabase methodology); falls back to Kalshi lifetime
122
- // volume for pre-API-change cached snapshots.
123
- const vol = contractVolume(perMarket, m.volume);
142
+ // Tradeable filter — per-contract volume from the Octagon
143
+ // snapshot only (no Kalshi lifetime-volume fallback, which would
144
+ // be a look-ahead since lifetime includes post-entry trading).
145
+ const vol = contractVolume(perMarket);
146
+ if (vol === null) { signalsDroppedNoVolume++; continue; }
124
147
  if (vol < minVolume) continue;
125
148
  // Price is marketThen (the price you'd transact at for a resolved bet).
126
149
  if (marketThen < minPrice || marketThen > maxPrice) continue;
@@ -137,8 +160,13 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
137
160
  pnl = (marketThen - marketNow) / 100;
138
161
  capital = (100 - marketThen) / 100;
139
162
  } else {
140
- // Zero edge: capital still reflects the tradeable side implied by sign
141
- // (use YES side so divide-by-zero checks don't fire on 0-edge signals).
163
+ // Zero edge: model and market agree exactly. Such signals are
164
+ // excluded from edge metrics (metrics.ts filters edge_pp != 0
165
+ // && |edge_pp| >= minEdgePp) but kept in `signals` so the CSV
166
+ // export retains a complete picture of what was scored. We
167
+ // assign YES-side capital so divide-by-zero checks don't fire
168
+ // — the capital field is consulted only when computing ROI on
169
+ // the edge subset, where these rows aren't present.
142
170
  capital = marketThen / 100;
143
171
  }
144
172
  if (capital <= 0) continue;
@@ -175,7 +203,7 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
175
203
  // ─── UNRESOLVED: open markets with current Kalshi prices ───────────────
176
204
  if (!args.resolved) {
177
205
  try {
178
- const openMarkets = await discoverOpenMarkets(db, { category: args.category });
206
+ const openMarkets = await discoverOpenMarkets(db, { universe, payloads, category: args.category });
179
207
 
180
208
  // Group by event_ticker to batch history fetches (same as resolved path).
181
209
  const openByEvent = new Map<string, typeof openMarkets>();
@@ -207,13 +235,21 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
207
235
  const confidenceScore = snap.confidence_score ?? 0;
208
236
 
209
237
  const marketNow = m.market_prob * 100; // current Kalshi price (0-100)
210
- const edgePp = Math.round((modelProb - marketThen) * 10) / 10;
238
+ // Unrounded edge see resolved-leg comment above.
239
+ const edgePp = modelProb - marketThen;
211
240
 
212
- // Tradeable filter — per-contract volume from the Octagon snapshot.
213
- const vol = contractVolume(perMarket, m.volume);
241
+ // Tradeable filter — per-contract volume from the Octagon snapshot
242
+ // only (no Kalshi lifetime-volume fallback, see contractVolume).
243
+ const vol = contractVolume(perMarket);
244
+ if (vol === null) { signalsDroppedNoVolume++; continue; }
214
245
  if (vol < minVolume) continue;
215
- // Price is marketNow (the current transactable price for an open position).
216
- if (marketNow < minPrice || marketNow > maxPrice) continue;
246
+ // Filter on the ENTRY price (marketThen), not the current mark
247
+ // (marketNow). Filtering on marketNow conditions the sample on
248
+ // the outcome: positions that collapsed below minPrice or ran
249
+ // above maxPrice get silently dropped *after* we observe the
250
+ // move. That truncates both tails of the P&L distribution and
251
+ // is a look-ahead bias. Matches the resolved leg above.
252
+ if (marketThen < minPrice || marketThen > maxPrice) continue;
217
253
 
218
254
  // M2M P&L and capital per $1 face value.
219
255
  let pnl = 0;
@@ -262,9 +298,34 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
262
298
  // ─── COMPUTE METRICS ───────────────────────────────────────────────────
263
299
  const metrics = computeMetrics(signals, minEdgePp);
264
300
 
301
+ // Fee model — defaults to 'none' so existing output is unchanged. With
302
+ // --fees taker we apply Kalshi's taker formula: 0.07 × p × (1−p) per
303
+ // entry, where p is the entry probability for the side we took. Maker
304
+ // execution assumes zero entry fee.
305
+ // We compute on the EDGE signals only (same population as flat_bet_pnl).
306
+ const feeModel = args.backtestFees ?? 'none';
307
+ let feeDrag = 0;
308
+ if (feeModel === 'taker') {
309
+ for (const s of signals) {
310
+ if (s.edge_pp === 0 || Math.abs(s.edge_pp) < minEdgePp) continue;
311
+ // Entry probability on the side we took (YES on positive edge, NO on negative).
312
+ const p = (s.edge_pp > 0 ? s.market_then : (100 - s.market_then)) / 100;
313
+ feeDrag += 0.07 * p * (1 - p);
314
+ }
315
+ }
316
+ const flatBetPnlNet = metrics.flat_bet_pnl - feeDrag;
317
+ const flatBetRoiNet = metrics.total_capital > 0 ? flatBetPnlNet / metrics.total_capital : 0;
318
+
265
319
  const result: BacktestResult = {
266
320
  ...metrics,
267
321
  days,
322
+ signals_dropped_no_volume: signalsDroppedNoVolume,
323
+ universe_source: universe.source,
324
+ universe_size: universe.events.length,
325
+ universe_description: universe.description,
326
+ fee_model: feeModel,
327
+ flat_bet_pnl_net: flatBetPnlNet,
328
+ flat_bet_roi_net: flatBetRoiNet,
268
329
  subscription_notice: subscriptionNotice,
269
330
  };
270
331
 
@@ -120,6 +120,10 @@ ${p}backtest --category crypto Filter by category
120
120
  ${p}backtest --min-edge 10 Stricter edge threshold in pp (default 0.5pp)
121
121
  ${p}backtest --min-volume 10 Per-contract volume gate (default 1)
122
122
  ${p}backtest --min-price 5 --max-price 95 Tradeable price band 0-100 (defaults: 5 / 95)
123
+ ${p}backtest --universe api Systematic Octagon-API universe (default; reproducible across machines)
124
+ ${p}backtest --universe local Legacy local octagon_reports universe (offline, NON-SYSTEMATIC)
125
+ ${p}backtest --fees taker Apply Kalshi taker fee (0.07·p·(1−p) per entry); default 'none' = gross
126
+ ${p}backtest --fees maker Maker execution (free entry)
123
127
  ${p}backtest --export results.csv Per-market detail CSV
124
128
  ${p}backtest --json Machine-readable output
125
129
 
@@ -127,8 +131,9 @@ Looks back N days, compares what the model said then to where the market is now.
127
131
  Resolved markets: scored against Kalshi settlement (0 or 100).
128
132
  Unresolved markets: mark-to-market vs current Kalshi trading price.
129
133
  Per-contract entry: mp/kp come from the per-contract outcome_probabilities on the
130
- Octagon snapshot (no event-level fallback). Volume gate uses per-contract volume
131
- from the snapshot when available, else current Kalshi lifetime volume.
134
+ Octagon snapshot (no event-level fallback). Volume gate requires per-contract
135
+ volume from the snapshot; signals without it are dropped (the legacy fallback
136
+ to Kalshi lifetime volume was a look-ahead and has been removed).
132
137
  ROI is capital-weighted: sum(pnl) / sum(capital) across edge signals, where capital
133
138
  is kp/100 for YES edges and (100-kp)/100 for NO edges (matches Supabase methodology).`,
134
139
 
@@ -467,7 +472,8 @@ System:
467
472
 
468
473
  Flags: --json, --refresh, --performance, --dry-run, --verbose
469
474
  Backtest flags: --days, --max-age, --resolved, --unresolved, --category, --min-edge,
470
- --min-volume, --min-price, --max-price, --export
475
+ --min-volume, --min-price, --max-price, --export,
476
+ --universe api|local (default api), --fees none|taker|maker (default none)
471
477
  Run "kalshi help <command>" for detailed usage.`;
472
478
  }
473
479
 
@@ -157,6 +157,14 @@ export async function handleSlashCommand(input: string): Promise<CommandResult |
157
157
  else if (a === '--min-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.minPrice = v; }
158
158
  else if (a === '--max-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.maxPrice = v; }
159
159
  else if (a === '--export') { const v = args[++i]; if (v) btArgs.exportPath = v; }
160
+ else if (a === '--universe') { const v = args[++i]; if (v === 'api' || v === 'local') btArgs.backtestUniverse = v; }
161
+ else if (a === '--fees') { const v = args[++i]; if (v === 'none' || v === 'taker' || v === 'maker') btArgs.backtestFees = v; }
162
+ }
163
+ // Mirror parse-args' mutual-exclusion check — the slash parser above
164
+ // accepts both flags independently, which would put btArgs in a
165
+ // conflicting state before handleBacktest could see it.
166
+ if (btArgs.resolved && btArgs.unresolved) {
167
+ return { output: 'Error: --resolved and --unresolved cannot be used together.' };
160
168
  }
161
169
  const mode = btArgs.resolved ? 'resolved markets' : btArgs.unresolved ? 'open markets' : 'resolved + open markets';
162
170
  const daysLabel = btArgs.days ?? 15;
@@ -40,6 +40,10 @@ export interface ParsedArgs {
40
40
  category?: string;
41
41
  limit?: number;
42
42
  exportPath?: string;
43
+ /** Backtest universe source — 'api' (default) or 'local'. */
44
+ backtestUniverse?: 'api' | 'local';
45
+ /** Backtest fee model — 'none' (default), 'taker', or 'maker'. */
46
+ backtestFees?: 'none' | 'taker' | 'maker';
43
47
  minVolume?: number;
44
48
  minPrice?: number;
45
49
  maxPrice?: number;
@@ -98,6 +102,8 @@ export function parseArgs(argv: string[] = process.argv.slice(2)): ParsedArgs {
98
102
  let category: string | undefined;
99
103
  let limit: number | undefined;
100
104
  let exportPath: string | undefined;
105
+ let backtestUniverse: 'api' | 'local' | undefined;
106
+ let backtestFees: 'none' | 'taker' | 'maker' | undefined;
101
107
  let maxAge: number | undefined;
102
108
  let minVolume: number | undefined;
103
109
  let minPrice: number | undefined;
@@ -237,6 +243,22 @@ export function parseArgs(argv: string[] = process.argv.slice(2)): ParsedArgs {
237
243
  } else if (arg === '--export') {
238
244
  const val = argv[++i];
239
245
  if (val != null) { exportPath = val; } else { parseErrors.push('--export requires a value'); }
246
+ } else if (arg === '--universe') {
247
+ if (i + 1 >= argv.length) {
248
+ parseErrors.push('--universe requires a value (expected "api" or "local")');
249
+ } else {
250
+ const val = argv[++i];
251
+ if (val === 'api' || val === 'local') { backtestUniverse = val; }
252
+ else { parseErrors.push(`Invalid --universe value: "${val}" (expected "api" or "local")`); }
253
+ }
254
+ } else if (arg === '--fees') {
255
+ if (i + 1 >= argv.length) {
256
+ parseErrors.push('--fees requires a value (expected "none", "taker", or "maker")');
257
+ } else {
258
+ const val = argv[++i];
259
+ if (val === 'none' || val === 'taker' || val === 'maker') { backtestFees = val; }
260
+ else { parseErrors.push(`Invalid --fees value: "${val}" (expected "none", "taker", or "maker")`); }
261
+ }
240
262
  } else if (arg === '--max-age') {
241
263
  const raw = argv[++i];
242
264
  if (raw != null) {
@@ -431,7 +453,7 @@ export function parseArgs(argv: string[] = process.argv.slice(2)): ParsedArgs {
431
453
  return {
432
454
  subcommand, positionalArgs, json, theme, ticker, interval, since, minConfidence, minEdge, side,
433
455
  live, refresh, report, dryRun, verbose, performance, resolved, unresolved, days, maxAge, category,
434
- limit, exportPath, minVolume, minPrice, maxPrice,
456
+ limit, exportPath, backtestUniverse, backtestFees, minVolume, minPrice, maxPrice,
435
457
  topK, behavioral, ranked, labelContains, closeBefore, windowDays, correlationInterval, timeframe,
436
458
  weights, bankroll, kellyMultiplier, n, maxPerCluster, maxCorrelation, minReturn, seriesTicker,
437
459
  sortBy, probabilities, tickers, query, showCluster, aggregateBy, activeOnly,