kalshi-trading-bot-cli 2.1.6 → 2.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/backtest/discovery.ts +110 -8
- package/src/backtest/metrics.ts +235 -8
- package/src/backtest/renderer.ts +91 -3
- package/src/backtest/types.ts +92 -1
- package/src/commands/analyze.ts +140 -39
- package/src/commands/backtest.ts +90 -29
- package/src/commands/help.ts +9 -3
- package/src/commands/index.ts +8 -0
- package/src/commands/parse-args.ts +23 -1
- package/src/commands/review.ts +28 -17
- package/src/scan/octagon-client.ts +130 -2
- package/src/tools/v2/portfolio-review.ts +1 -1
package/src/backtest/types.ts
CHANGED
|
@@ -5,6 +5,10 @@ export interface BacktestOpts {
|
|
|
5
5
|
category?: string;
|
|
6
6
|
minEdge: number; // fractional (0-1 scale), converted to pp by caller (e.g., 0.005 → 0.5pp)
|
|
7
7
|
exportPath?: string;
|
|
8
|
+
/** Where the universe is sourced from. Default 'api'. */
|
|
9
|
+
universe?: 'api' | 'local';
|
|
10
|
+
/** Fee model for net P&L. Default 'none' — output is gross. */
|
|
11
|
+
fees?: 'none' | 'taker' | 'maker';
|
|
8
12
|
}
|
|
9
13
|
|
|
10
14
|
/** A single scored market signal — unified type for both resolved and unresolved. */
|
|
@@ -16,7 +20,12 @@ export interface ScoredSignal {
|
|
|
16
20
|
market_then: number; // 0-100 (Kalshi trading price N days ago, from Octagon snapshot)
|
|
17
21
|
market_now: number; // 0-100 (settlement for resolved, current price for unresolved)
|
|
18
22
|
resolved: boolean;
|
|
19
|
-
|
|
23
|
+
/**
|
|
24
|
+
* Raw, unrounded edge in percentage points: model_prob − market_then.
|
|
25
|
+
* Filtering on |edge| should always use this value; display layers
|
|
26
|
+
* round to 0.1pp or 1pp as appropriate.
|
|
27
|
+
*/
|
|
28
|
+
edge_pp: number;
|
|
20
29
|
pnl: number; // computed P&L for this signal ($ per $1 face value)
|
|
21
30
|
capital: number; // $ capital deployed per $1 face value: kp/100 for YES edges, (100-kp)/100 for NO edges
|
|
22
31
|
edge_bucket: string; // absolute-edge bucket label e.g. "0-5%", "5-10%", ..., "90%+"
|
|
@@ -24,6 +33,19 @@ export interface ScoredSignal {
|
|
|
24
33
|
close_time: string;
|
|
25
34
|
}
|
|
26
35
|
|
|
36
|
+
/**
|
|
37
|
+
* Per-leg scorecard: realized P&L on the resolved leg, mark-to-market on the
|
|
38
|
+
* unresolved leg. Computed on the leg's subset of signals.
|
|
39
|
+
*/
|
|
40
|
+
export interface LegMetrics {
|
|
41
|
+
edge_signals: number;
|
|
42
|
+
edge_hit_rate: number;
|
|
43
|
+
hit_rate_ci: [number, number];
|
|
44
|
+
flat_bet_pnl: number;
|
|
45
|
+
flat_bet_roi: number;
|
|
46
|
+
total_capital: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
27
49
|
export interface BacktestResult {
|
|
28
50
|
verdict: { summary: string; significant: boolean; profitable: boolean };
|
|
29
51
|
days: number;
|
|
@@ -41,5 +63,74 @@ export interface BacktestResult {
|
|
|
41
63
|
flat_bet_roi: number; // capital-weighted: sum(pnl) / sum(capital) across edge signals
|
|
42
64
|
total_capital: number; // sum of capital across edge signals (ROI denominator)
|
|
43
65
|
signals: ScoredSignal[];
|
|
66
|
+
/**
|
|
67
|
+
* Count of candidate signals dropped because the Octagon snapshot had no
|
|
68
|
+
* per-contract volume (older snapshots predate the per-contract field).
|
|
69
|
+
* We deliberately do NOT fall back to Kalshi lifetime volume — that
|
|
70
|
+
* would be a look-ahead bias (lifetime includes post-entry trading).
|
|
71
|
+
* Surfaced so users can see the coverage cost of the strict gate.
|
|
72
|
+
*/
|
|
73
|
+
signals_dropped_no_volume: number;
|
|
74
|
+
/**
|
|
75
|
+
* Provenance for the universe — printed in the scorecard header so users
|
|
76
|
+
* (and downstream JSON consumers) can see whether the backtest ran over
|
|
77
|
+
* the systematic Octagon-API universe or the legacy local-DB universe.
|
|
78
|
+
*/
|
|
79
|
+
universe_source: 'api' | 'local';
|
|
80
|
+
universe_size: number;
|
|
81
|
+
universe_description: string;
|
|
82
|
+
/**
|
|
83
|
+
* Fee model applied to the P&L. 'none' means the reported P&L is gross
|
|
84
|
+
* (no fees, no spreads). 'taker' charges the Kalshi taker fee per entry.
|
|
85
|
+
* 'maker' assumes free entry. Default 'none' so existing output is
|
|
86
|
+
* unchanged — opt in with --fees taker.
|
|
87
|
+
*/
|
|
88
|
+
fee_model: 'none' | 'taker' | 'maker';
|
|
89
|
+
/** P&L net of fees when fee_model != 'none', else equal to flat_bet_pnl. */
|
|
90
|
+
flat_bet_pnl_net: number;
|
|
91
|
+
flat_bet_roi_net: number;
|
|
92
|
+
/**
|
|
93
|
+
* Sub-scorecards computed on the resolved and unresolved legs separately.
|
|
94
|
+
* Resolved settles at 0/100 — realized outcomes. Unresolved is marked to
|
|
95
|
+
* an arbitrary "now" price and may reverse before settlement. Blending
|
|
96
|
+
* them in the top-level fields can hide cases where the paper P&L
|
|
97
|
+
* inflates a weak realized result.
|
|
98
|
+
*
|
|
99
|
+
* The blended top-level fields (`edge_hit_rate`, `flat_bet_roi`, etc.)
|
|
100
|
+
* are kept for backward compatibility with existing consumers.
|
|
101
|
+
*/
|
|
102
|
+
resolved_metrics: LegMetrics;
|
|
103
|
+
unresolved_metrics: LegMetrics;
|
|
104
|
+
/**
|
|
105
|
+
* Zero-skill baseline ROIs on the same post-filter universe. Always-NO is
|
|
106
|
+
* the relevant null because Kalshi's universe is structurally NO-heavy:
|
|
107
|
+
* multi-outcome events have one YES and many NOs. A model that consistently
|
|
108
|
+
* beats always-NO has selection skill; one that doesn't is mostly
|
|
109
|
+
* harvesting the favorite-longshot tilt.
|
|
110
|
+
*/
|
|
111
|
+
baselines: {
|
|
112
|
+
always_no_roi: number;
|
|
113
|
+
always_no_hit_rate: number;
|
|
114
|
+
always_yes_roi: number;
|
|
115
|
+
always_yes_hit_rate: number;
|
|
116
|
+
/**
|
|
117
|
+
* Model NO-bet ROI minus always-NO ROI, computed in entry-price bands
|
|
118
|
+
* (5-20, 20-40, 40-60, 60-80, 80-95) and capital-weighted across bands.
|
|
119
|
+
* This is the honest "within-band skill" delta: it controls for both
|
|
120
|
+
* the structural NO tilt AND the entry-price mix.
|
|
121
|
+
*/
|
|
122
|
+
within_band_skill_pp: number;
|
|
123
|
+
/**
|
|
124
|
+
* Per-band breakdown so users can see where the skill (if any) comes from.
|
|
125
|
+
*/
|
|
126
|
+
within_band_breakdown: Array<{
|
|
127
|
+
band: string; // e.g. "20-40¢"
|
|
128
|
+
model_no_roi: number; // model NO-bet ROI in this band
|
|
129
|
+
always_no_roi: number; // always-NO ROI in this band
|
|
130
|
+
skill_delta_pp: number; // (model - baseline) × 100, percentage points
|
|
131
|
+
n_model: number; // count of model NO bets in this band
|
|
132
|
+
n_universe: number; // count of all-NO universe contracts in this band
|
|
133
|
+
}>;
|
|
134
|
+
};
|
|
44
135
|
subscription_notice?: string;
|
|
45
136
|
}
|
package/src/commands/analyze.ts
CHANGED
|
@@ -35,12 +35,25 @@ export interface AnalyzeData {
|
|
|
35
35
|
* cache time but didn't get a newer underlying report from Octagon.
|
|
36
36
|
*/
|
|
37
37
|
staleUpstream: boolean;
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
/**
|
|
39
|
+
* Octagon's model probability for this market. null when hasModel is
|
|
40
|
+
* false — we deliberately do NOT emit the 0.5 placeholder fallback to
|
|
41
|
+
* JSON consumers. Always check hasModel before reading this field.
|
|
42
|
+
*/
|
|
43
|
+
modelProb: number | null;
|
|
44
|
+
/**
|
|
45
|
+
* Last traded market probability. null when hasMarketPrice is false.
|
|
46
|
+
* Always check hasMarketPrice before reading.
|
|
47
|
+
*/
|
|
48
|
+
marketProb: number | null;
|
|
49
|
+
/** modelProb − marketProb. null when either input is unavailable. */
|
|
50
|
+
edge: number | null;
|
|
51
|
+
/** Pretty-printed edge ("+14pp"). null when edge is null. */
|
|
52
|
+
edgePp: string | null;
|
|
53
|
+
/** "very_high" | "high" | "moderate" | "low" — null when edge is null. */
|
|
54
|
+
confidence: string | null;
|
|
55
|
+
/** "underpriced" | "overpriced" | "fair_value" — null when edge is null. */
|
|
56
|
+
mispricingSignal: string | null;
|
|
44
57
|
signal: string;
|
|
45
58
|
drivers: PriceDriver[];
|
|
46
59
|
catalysts: Catalyst[];
|
|
@@ -102,12 +115,43 @@ function getVolume(m: KalshiMarket): number {
|
|
|
102
115
|
return m.volume || 0;
|
|
103
116
|
}
|
|
104
117
|
|
|
118
|
+
/**
|
|
119
|
+
* Normalize user input into a canonical Kalshi ticker.
|
|
120
|
+
*
|
|
121
|
+
* Accepts any of:
|
|
122
|
+
* - Bare ticker, any case: `kxmeasles-26`, `KXMEASLES-26`, `KxMeAsLeS-26`
|
|
123
|
+
* - Kalshi URL: `https://kalshi.com/markets/kxmeasles/measles-cases/kxmeasles-26`
|
|
124
|
+
* - URL without protocol: `kalshi.com/markets/kxmeasles-26`
|
|
125
|
+
* - URL with query / fragment: `…/kxmeasles-26?ref=foo#yes`
|
|
126
|
+
*
|
|
127
|
+
* Strategy: detect URL-shaped input, extract the last non-empty path segment
|
|
128
|
+
* (which by Kalshi convention is the ticker), then uppercase. Bare tickers
|
|
129
|
+
* are simply uppercased. Kalshi's path is case-sensitive — without this
|
|
130
|
+
* `/markets/kxmeasles-26` 404s even though the ticker exists.
|
|
131
|
+
*/
|
|
132
|
+
export function normalizeKalshiInput(input: string): string {
|
|
133
|
+
const trimmed = input.trim();
|
|
134
|
+
const looksLikeUrl =
|
|
135
|
+
/^https?:\/\//i.test(trimmed) || /^(www\.)?kalshi\.com\//i.test(trimmed);
|
|
136
|
+
if (looksLikeUrl) {
|
|
137
|
+
const noProto = trimmed
|
|
138
|
+
.replace(/^https?:\/\/[^/]+/i, '')
|
|
139
|
+
.replace(/^(www\.)?kalshi\.com/i, '');
|
|
140
|
+
const path = noProto.replace(/[?#].*$/, '').replace(/\/+$/, '');
|
|
141
|
+
const segments = path.split('/').filter(Boolean);
|
|
142
|
+
const last = segments[segments.length - 1] ?? '';
|
|
143
|
+
if (last) return last.toUpperCase();
|
|
144
|
+
}
|
|
145
|
+
return trimmed.toUpperCase();
|
|
146
|
+
}
|
|
147
|
+
|
|
105
148
|
/**
|
|
106
149
|
* Resolve a user-provided ticker to a market ticker.
|
|
107
|
-
* Accepts: market ticker, event ticker, or
|
|
150
|
+
* Accepts: market ticker, event ticker, series ticker, or Kalshi URL.
|
|
108
151
|
* Returns the resolved KalshiMarket (picking the most active open market for events/series).
|
|
109
152
|
*/
|
|
110
|
-
export async function resolveMarket(
|
|
153
|
+
export async function resolveMarket(rawInput: string): Promise<KalshiMarket> {
|
|
154
|
+
const input = normalizeKalshiInput(rawInput);
|
|
111
155
|
// 1. Try as a market ticker first
|
|
112
156
|
try {
|
|
113
157
|
const res = await callKalshiApi('GET', `/markets/${input}`);
|
|
@@ -165,7 +209,7 @@ export async function resolveMarket(input: string): Promise<KalshiMarket> {
|
|
|
165
209
|
if (!(err instanceof KalshiApiError && err.statusCode === 404)) throw err;
|
|
166
210
|
}
|
|
167
211
|
|
|
168
|
-
throw new Error(`Could not find a market for "${input}". Try a
|
|
212
|
+
throw new Error(`Could not find a market for "${rawInput}" (normalized to "${input}"). Try a market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), series ticker (e.g. KXBTC), or a Kalshi URL like https://kalshi.com/markets/<series>/<slug>/<event>.`);
|
|
169
213
|
}
|
|
170
214
|
|
|
171
215
|
export async function handleAnalyze(
|
|
@@ -228,6 +272,16 @@ export async function handleAnalyze(
|
|
|
228
272
|
const latestDbReport = getLatestReport(db, resolvedTicker);
|
|
229
273
|
const reportAge = latestDbReport ? formatAge(latestDbReport.fetched_at) : null;
|
|
230
274
|
|
|
275
|
+
// Decide trading-side gating BEFORE running edge / Kelly / signal math.
|
|
276
|
+
// hasModel uses report.modelProb directly (snapshot.modelProb is just
|
|
277
|
+
// propagated unchanged from computeEdge — verified in edge-computer.ts:38).
|
|
278
|
+
// canComputeEdge is the contract: any trading decision (signal, Kelly,
|
|
279
|
+
// mispricing) must check it first. Otherwise we'd build a "BUY YES @ $X"
|
|
280
|
+
// recommendation from a 0.5 placeholder modelProb on uncovered events.
|
|
281
|
+
const hasModel = !report.cacheMiss && Number.isFinite(report.modelProb)
|
|
282
|
+
&& !(report.modelProb === 0.5 && report.drivers.length === 0 && report.catalysts.length === 0);
|
|
283
|
+
const canComputeEdge = hasModel && hasMarketPrice;
|
|
284
|
+
|
|
231
285
|
const snapshot = edgeComputer.computeEdge(resolvedTicker, report, marketProb);
|
|
232
286
|
|
|
233
287
|
// Persist edge
|
|
@@ -264,7 +318,9 @@ export async function handleAnalyze(
|
|
|
264
318
|
liquidityAdjusted: false,
|
|
265
319
|
};
|
|
266
320
|
let kelly: KellyResult;
|
|
267
|
-
if (!
|
|
321
|
+
if (!canComputeEdge) {
|
|
322
|
+
// Either no model coverage or no last_price → any sizing computed from
|
|
323
|
+
// a placeholder modelProb / marketProb would be meaningless.
|
|
268
324
|
kelly = emptyKelly;
|
|
269
325
|
} else {
|
|
270
326
|
try {
|
|
@@ -316,9 +372,16 @@ export async function handleAnalyze(
|
|
|
316
372
|
const entryPrice = (snapshot.edge > 0 ? yesAsk : noAsk);
|
|
317
373
|
|
|
318
374
|
let signal: string;
|
|
319
|
-
if (!
|
|
320
|
-
//
|
|
321
|
-
|
|
375
|
+
if (!canComputeEdge) {
|
|
376
|
+
// Any actionable signal needs both a real model probability and a real
|
|
377
|
+
// last_price. Spell out which one is missing so the user / bot knows
|
|
378
|
+
// why we're not making a recommendation.
|
|
379
|
+
const reason = !hasModel && !hasMarketPrice
|
|
380
|
+
? 'no Octagon model coverage and no last traded price'
|
|
381
|
+
: !hasModel
|
|
382
|
+
? 'no Octagon model coverage for this market'
|
|
383
|
+
: 'market has no last traded price';
|
|
384
|
+
signal = `no signal (${reason})`;
|
|
322
385
|
} else if (existingPosition) {
|
|
323
386
|
const holdDir = existingPosition.direction.toUpperCase();
|
|
324
387
|
const edgeReversed =
|
|
@@ -358,28 +421,62 @@ export async function handleAnalyze(
|
|
|
358
421
|
// This is the "Refreshed" date — what bumps when --refresh runs.
|
|
359
422
|
// modelRunAt = Octagon's analysis_last_updated (when their model last
|
|
360
423
|
// scored this event). Independent of our cache.
|
|
361
|
-
|
|
362
|
-
|
|
424
|
+
//
|
|
425
|
+
// Load timestamps from a single coherent source — the row identified by
|
|
426
|
+
// report.reportId is the exact row used for THIS analysis. The previous
|
|
427
|
+
// implementation mixed fields from market-keyed and event-keyed rows
|
|
428
|
+
// (different captured runs), so refreshedAt and modelRunAt could refer
|
|
429
|
+
// to different snapshots.
|
|
430
|
+
//
|
|
431
|
+
// If the primary row doesn't carry analysis_last_updated (fetchReport
|
|
432
|
+
// path doesn't expose it), fall back to the latest event-keyed prefetch
|
|
433
|
+
// row for that field only — never for fetched_at.
|
|
434
|
+
const primaryRow = report.reportId
|
|
435
|
+
? db.query(
|
|
436
|
+
`SELECT fetched_at, analysis_last_updated FROM octagon_reports WHERE report_id = $rid`,
|
|
437
|
+
).get({ $rid: report.reportId }) as
|
|
438
|
+
| { fetched_at: number; analysis_last_updated: string | null }
|
|
439
|
+
| undefined
|
|
440
|
+
: undefined;
|
|
441
|
+
let fetchedAtEpoch = primaryRow?.fetched_at ?? null;
|
|
442
|
+
let analysisLastUpdated = primaryRow?.analysis_last_updated ?? null;
|
|
443
|
+
if ((!fetchedAtEpoch || !analysisLastUpdated) && eventTicker && eventTicker !== resolvedTicker) {
|
|
444
|
+
const eventRow = db.query(
|
|
445
|
+
`SELECT fetched_at, analysis_last_updated FROM octagon_reports
|
|
446
|
+
WHERE event_ticker = $et AND variant_used = 'events-api'
|
|
447
|
+
ORDER BY fetched_at DESC LIMIT 1`,
|
|
448
|
+
).get({ $et: eventTicker }) as { fetched_at: number; analysis_last_updated: string | null } | undefined;
|
|
449
|
+
if (eventRow) {
|
|
450
|
+
fetchedAtEpoch = fetchedAtEpoch ?? eventRow.fetched_at;
|
|
451
|
+
analysisLastUpdated = analysisLastUpdated ?? eventRow.analysis_last_updated;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
const refreshedAt = fetchedAtEpoch
|
|
455
|
+
? new Date(fetchedAtEpoch * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
|
|
363
456
|
: null;
|
|
364
|
-
const modelRunAt =
|
|
365
|
-
?
|
|
457
|
+
const modelRunAt = analysisLastUpdated
|
|
458
|
+
? analysisLastUpdated.replace('T', ' ').slice(0, 16) + ' UTC'
|
|
366
459
|
: null;
|
|
367
460
|
|
|
368
|
-
// hasModel
|
|
369
|
-
//
|
|
370
|
-
//
|
|
371
|
-
const hasModel = !report.cacheMiss && Number.isFinite(snapshot.modelProb)
|
|
372
|
-
&& !(snapshot.modelProb === 0.5 && report.drivers.length === 0 && report.catalysts.length === 0);
|
|
461
|
+
// hasModel + canComputeEdge were computed earlier (above Kelly/signal),
|
|
462
|
+
// so trading-side math never reads a placeholder edge. See top of
|
|
463
|
+
// handleAnalyze for the contract.
|
|
373
464
|
|
|
374
465
|
// staleUpstream = user asked for --refresh but Octagon's upstream model run
|
|
375
466
|
// timestamp didn't move. Cache fetch time bumped, but the underlying report
|
|
376
|
-
// body is the same one Octagon previously generated.
|
|
377
|
-
//
|
|
467
|
+
// body is the same one Octagon previously generated. Compare against the
|
|
468
|
+
// same coherent source we used for modelRunAt above — otherwise we could
|
|
469
|
+
// false-positive on staleness when the two lookups disagreed.
|
|
378
470
|
const staleUpstream = refresh
|
|
379
471
|
&& preRefreshAnalysis != null
|
|
380
|
-
&&
|
|
381
|
-
&& preRefreshAnalysis ===
|
|
382
|
-
|
|
472
|
+
&& analysisLastUpdated != null
|
|
473
|
+
&& preRefreshAnalysis === analysisLastUpdated;
|
|
474
|
+
|
|
475
|
+
// Null out trading-side fields when the underlying inputs are unavailable.
|
|
476
|
+
// JSON consumers previously saw modelProb: 0.5 / marketProb: 0.5 / edge: 0
|
|
477
|
+
// on degraded paths and treated them as real predictions. The hasModel and
|
|
478
|
+
// hasMarketPrice flags are the source of truth — fields here mirror them.
|
|
479
|
+
// (canComputeEdge was already evaluated at the top of the function.)
|
|
383
480
|
return {
|
|
384
481
|
ticker: resolvedTicker,
|
|
385
482
|
eventTicker,
|
|
@@ -390,12 +487,12 @@ export async function handleAnalyze(
|
|
|
390
487
|
staleUpstream,
|
|
391
488
|
hasModel,
|
|
392
489
|
hasMarketPrice,
|
|
393
|
-
modelProb: snapshot.modelProb,
|
|
394
|
-
marketProb,
|
|
395
|
-
edge: snapshot.edge,
|
|
396
|
-
edgePp,
|
|
397
|
-
confidence: snapshot.confidence,
|
|
398
|
-
mispricingSignal,
|
|
490
|
+
modelProb: hasModel ? snapshot.modelProb : null,
|
|
491
|
+
marketProb: hasMarketPrice ? marketProb : null,
|
|
492
|
+
edge: canComputeEdge ? snapshot.edge : null,
|
|
493
|
+
edgePp: canComputeEdge ? edgePp : null,
|
|
494
|
+
confidence: canComputeEdge ? snapshot.confidence : null,
|
|
495
|
+
mispricingSignal: canComputeEdge ? mispricingSignal : null,
|
|
399
496
|
signal,
|
|
400
497
|
drivers: snapshot.drivers,
|
|
401
498
|
catalysts: snapshot.catalysts,
|
|
@@ -437,17 +534,17 @@ export function formatAnalyzeHuman(data: AnalyzeData): string {
|
|
|
437
534
|
// hasMarketPrice=false → Kalshi market has no last_price → Market Prob shows "--"
|
|
438
535
|
// Edge needs both. Either being false means edge/confidence/mispricing
|
|
439
536
|
// render "--" — we never show a number derived from a placeholder.
|
|
440
|
-
const modelStr = data.hasModel
|
|
537
|
+
const modelStr = data.hasModel && data.modelProb != null
|
|
441
538
|
? `${(data.modelProb * 100).toFixed(1)}%`
|
|
442
539
|
: `-- (no Octagon model coverage for this market)`;
|
|
443
|
-
const marketStr = data.hasMarketPrice
|
|
540
|
+
const marketStr = data.hasMarketPrice && data.marketProb != null
|
|
444
541
|
? `${(data.marketProb * 100).toFixed(1)}%`
|
|
445
542
|
: `-- (no last traded price — market hasn't traded yet)`;
|
|
446
|
-
const canComputeEdge = data.hasModel && data.hasMarketPrice;
|
|
543
|
+
const canComputeEdge = data.hasModel && data.hasMarketPrice && data.edge != null;
|
|
447
544
|
lines.push(` Model Prob: ${modelStr}`);
|
|
448
545
|
lines.push(` Market Prob: ${marketStr}`);
|
|
449
546
|
if (canComputeEdge) {
|
|
450
|
-
lines.push(` Edge: ${data.edgePp} (${(data.edge * 100).toFixed(1)}%)`);
|
|
547
|
+
lines.push(` Edge: ${data.edgePp} (${(data.edge! * 100).toFixed(1)}%)`);
|
|
451
548
|
lines.push(` Confidence: ${data.confidence}`);
|
|
452
549
|
lines.push(` Mispricing: ${data.mispricingSignal}`);
|
|
453
550
|
} else {
|
|
@@ -624,8 +721,12 @@ export async function promptAnalyzeActions(data: AnalyzeData): Promise<void> {
|
|
|
624
721
|
// Close position: sell what we hold
|
|
625
722
|
const sellSide = data.existingPosition.direction;
|
|
626
723
|
const sellSize = data.existingPosition.size;
|
|
724
|
+
// marketProb is guaranteed when isSell is reachable (we got a SELL
|
|
725
|
+
// recommendation, which requires a price), but type system can't
|
|
726
|
+
// see that — fall back to 50 if data was tampered with.
|
|
727
|
+
const mp = data.marketProb ?? 0.5;
|
|
627
728
|
const closePrice = data.closePriceCents ?? Math.round(
|
|
628
|
-
(sellSide === 'yes' ?
|
|
729
|
+
(sellSide === 'yes' ? mp : 1 - mp) * 100
|
|
629
730
|
);
|
|
630
731
|
|
|
631
732
|
console.log(` Signal: SELL ${sellSize} ${sellSide.toUpperCase()} @ ${closePrice}¢ (close position)`);
|
|
@@ -701,7 +802,7 @@ export async function promptAnalyzeActions(data: AnalyzeData): Promise<void> {
|
|
|
701
802
|
break;
|
|
702
803
|
}
|
|
703
804
|
|
|
704
|
-
const side = data.edge > 0 ? 'yes' : 'no';
|
|
805
|
+
const side = (data.edge ?? 0) > 0 ? 'yes' : 'no';
|
|
705
806
|
const price = data.kelly.entryPriceCents;
|
|
706
807
|
console.log(` Signal: BUY ${data.kelly.contracts} ${side.toUpperCase()} @ ${price}¢`);
|
|
707
808
|
const confirm = await ask(' Execute? [y/n] ');
|
package/src/commands/backtest.ts
CHANGED
|
@@ -2,7 +2,7 @@ import type { ParsedArgs } from './parse-args.js';
|
|
|
2
2
|
import type { CLIResponse } from './json.js';
|
|
3
3
|
import { wrapSuccess } from './json.js';
|
|
4
4
|
import { getDb } from '../db/index.js';
|
|
5
|
-
import { discoverSettledMarkets, discoverOpenMarkets, parallelMap } from '../backtest/discovery.js';
|
|
5
|
+
import { discoverSettledMarkets, discoverOpenMarkets, parallelMap, resolveUniverse, fetchEventPayloads } from '../backtest/discovery.js';
|
|
6
6
|
import { fetchAndCacheHistory, selectSnapshotByDate, SubscriptionRequiredError, type OutcomeProbability } from '../backtest/fetcher.js';
|
|
7
7
|
import { computeMetrics } from '../backtest/metrics.js';
|
|
8
8
|
import type { BacktestResult, ScoredSignal } from '../backtest/types.js';
|
|
@@ -37,21 +37,24 @@ function edgeBucketLabel(edgePp: number): string {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
/**
|
|
40
|
-
* Return the tradeable volume for a contract.
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
*
|
|
40
|
+
* Return the tradeable volume for a contract, measured AT SNAPSHOT TIME.
|
|
41
|
+
*
|
|
42
|
+
* Returns null when the Octagon snapshot has no per-contract volume — older
|
|
43
|
+
* snapshots predate the per-contract field. We deliberately do NOT fall back
|
|
44
|
+
* to Kalshi LIFETIME volume here: lifetime volume includes trading that
|
|
45
|
+
* happened *after* the entry date, so a contract with zero liquidity at
|
|
46
|
+
* entry that later became active would silently pass the tradeability gate
|
|
47
|
+
* retroactively (look-ahead bias on the tradeable filter).
|
|
48
|
+
*
|
|
49
|
+
* Callers should skip the signal when this returns null and count it as
|
|
50
|
+
* "dropped via no per-contract volume" so the coverage cost is visible.
|
|
44
51
|
*/
|
|
45
|
-
function contractVolume(
|
|
46
|
-
perContract
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
|
|
51
|
-
const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
|
|
52
|
-
if (v !== null || v24 !== null) return Math.max(v ?? 0, v24 ?? 0);
|
|
53
|
-
}
|
|
54
|
-
return fallbackLifetimeVolume;
|
|
52
|
+
function contractVolume(perContract: OutcomeProbability | null): number | null {
|
|
53
|
+
if (!perContract) return null;
|
|
54
|
+
const v = typeof perContract.volume === 'number' ? perContract.volume : null;
|
|
55
|
+
const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
|
|
56
|
+
if (v === null && v24 === null) return null;
|
|
57
|
+
return Math.max(v ?? 0, v24 ?? 0);
|
|
55
58
|
}
|
|
56
59
|
|
|
57
60
|
export { formatBacktestHuman };
|
|
@@ -74,11 +77,26 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
|
|
|
74
77
|
|
|
75
78
|
const signals: ScoredSignal[] = [];
|
|
76
79
|
let subscriptionNotice: string | undefined;
|
|
80
|
+
// Counter for signals dropped because the Octagon snapshot had no
|
|
81
|
+
// per-contract volume. Surfaced in the result so users can see how much
|
|
82
|
+
// coverage the strict (no lifetime-volume look-ahead) gate cost them.
|
|
83
|
+
let signalsDroppedNoVolume = 0;
|
|
84
|
+
|
|
85
|
+
// ─── UNIVERSE RESOLUTION (Phase 4, Issue 7) ────────────────────────────
|
|
86
|
+
// Resolve once and share the Kalshi event-payload map between both legs
|
|
87
|
+
// so we fetch each event payload only once instead of twice. The
|
|
88
|
+
// payloads map is built lazily — we only fetch when at least one leg
|
|
89
|
+
// will use it.
|
|
90
|
+
const universeSource = args.backtestUniverse ?? 'api';
|
|
91
|
+
const universe = await resolveUniverse(db, { source: universeSource, category: args.category });
|
|
92
|
+
// Fetch all event payloads once and share between legs. parallelMap
|
|
93
|
+
// caps concurrency so this doesn't hammer Kalshi.
|
|
94
|
+
const payloads = await fetchEventPayloads(universe.events);
|
|
77
95
|
|
|
78
96
|
// ─── RESOLVED: settled markets with historical Octagon snapshots ────────
|
|
79
97
|
if (!args.unresolved) {
|
|
80
98
|
try {
|
|
81
|
-
const settled = await discoverSettledMarkets(db, { category: args.category });
|
|
99
|
+
const settled = await discoverSettledMarkets(db, { universe, payloads, category: args.category });
|
|
82
100
|
|
|
83
101
|
if (settled.length > 0) {
|
|
84
102
|
// Group by event_ticker to batch history fetches
|
|
@@ -115,12 +133,17 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
|
|
|
115
133
|
const marketThen = perMarket.market_probability;
|
|
116
134
|
if (!Number.isFinite(modelProb) || !Number.isFinite(marketThen)) continue;
|
|
117
135
|
const marketNow = m.result === 'yes' ? 100 : 0;
|
|
118
|
-
|
|
136
|
+
// Unrounded edge — filtering happens downstream against the
|
|
137
|
+
// raw value. Display layer rounds for presentation. Rounding
|
|
138
|
+
// here makes the minEdge filter asymmetric (0.449 rounds to 0.4
|
|
139
|
+
// and is excluded; 0.451 rounds to 0.5 and is included).
|
|
140
|
+
const edgePp = modelProb - marketThen;
|
|
119
141
|
|
|
120
|
-
// Tradeable filter — per-contract volume from the Octagon
|
|
121
|
-
//
|
|
122
|
-
//
|
|
123
|
-
const vol = contractVolume(perMarket
|
|
142
|
+
// Tradeable filter — per-contract volume from the Octagon
|
|
143
|
+
// snapshot only (no Kalshi lifetime-volume fallback, which would
|
|
144
|
+
// be a look-ahead since lifetime includes post-entry trading).
|
|
145
|
+
const vol = contractVolume(perMarket);
|
|
146
|
+
if (vol === null) { signalsDroppedNoVolume++; continue; }
|
|
124
147
|
if (vol < minVolume) continue;
|
|
125
148
|
// Price is marketThen (the price you'd transact at for a resolved bet).
|
|
126
149
|
if (marketThen < minPrice || marketThen > maxPrice) continue;
|
|
@@ -137,8 +160,13 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
|
|
|
137
160
|
pnl = (marketThen - marketNow) / 100;
|
|
138
161
|
capital = (100 - marketThen) / 100;
|
|
139
162
|
} else {
|
|
140
|
-
// Zero edge:
|
|
141
|
-
//
|
|
163
|
+
// Zero edge: model and market agree exactly. Such signals are
|
|
164
|
+
// excluded from edge metrics (metrics.ts filters edge_pp != 0
|
|
165
|
+
// && |edge_pp| >= minEdgePp) but kept in `signals` so the CSV
|
|
166
|
+
// export retains a complete picture of what was scored. We
|
|
167
|
+
// assign YES-side capital so divide-by-zero checks don't fire
|
|
168
|
+
// — the capital field is consulted only when computing ROI on
|
|
169
|
+
// the edge subset, where these rows aren't present.
|
|
142
170
|
capital = marketThen / 100;
|
|
143
171
|
}
|
|
144
172
|
if (capital <= 0) continue;
|
|
@@ -175,7 +203,7 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
|
|
|
175
203
|
// ─── UNRESOLVED: open markets with current Kalshi prices ───────────────
|
|
176
204
|
if (!args.resolved) {
|
|
177
205
|
try {
|
|
178
|
-
const openMarkets = await discoverOpenMarkets(db, { category: args.category });
|
|
206
|
+
const openMarkets = await discoverOpenMarkets(db, { universe, payloads, category: args.category });
|
|
179
207
|
|
|
180
208
|
// Group by event_ticker to batch history fetches (same as resolved path).
|
|
181
209
|
const openByEvent = new Map<string, typeof openMarkets>();
|
|
@@ -207,13 +235,21 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
|
|
|
207
235
|
const confidenceScore = snap.confidence_score ?? 0;
|
|
208
236
|
|
|
209
237
|
const marketNow = m.market_prob * 100; // current Kalshi price (0-100)
|
|
210
|
-
|
|
238
|
+
// Unrounded edge — see resolved-leg comment above.
|
|
239
|
+
const edgePp = modelProb - marketThen;
|
|
211
240
|
|
|
212
|
-
// Tradeable filter — per-contract volume from the Octagon snapshot
|
|
213
|
-
|
|
241
|
+
// Tradeable filter — per-contract volume from the Octagon snapshot
|
|
242
|
+
// only (no Kalshi lifetime-volume fallback, see contractVolume).
|
|
243
|
+
const vol = contractVolume(perMarket);
|
|
244
|
+
if (vol === null) { signalsDroppedNoVolume++; continue; }
|
|
214
245
|
if (vol < minVolume) continue;
|
|
215
|
-
//
|
|
216
|
-
|
|
246
|
+
// Filter on the ENTRY price (marketThen), not the current mark
|
|
247
|
+
// (marketNow). Filtering on marketNow conditions the sample on
|
|
248
|
+
// the outcome: positions that collapsed below minPrice or ran
|
|
249
|
+
// above maxPrice get silently dropped *after* we observe the
|
|
250
|
+
// move. That truncates both tails of the P&L distribution and
|
|
251
|
+
// is a look-ahead bias. Matches the resolved leg above.
|
|
252
|
+
if (marketThen < minPrice || marketThen > maxPrice) continue;
|
|
217
253
|
|
|
218
254
|
// M2M P&L and capital per $1 face value.
|
|
219
255
|
let pnl = 0;
|
|
@@ -262,9 +298,34 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
|
|
|
262
298
|
// ─── COMPUTE METRICS ───────────────────────────────────────────────────
|
|
263
299
|
const metrics = computeMetrics(signals, minEdgePp);
|
|
264
300
|
|
|
301
|
+
// Fee model — defaults to 'none' so existing output is unchanged. With
|
|
302
|
+
// --fees taker we apply Kalshi's taker formula: 0.07 × p × (1−p) per
|
|
303
|
+
// entry, where p is the entry probability for the side we took. Maker
|
|
304
|
+
// execution assumes zero entry fee.
|
|
305
|
+
// We compute on the EDGE signals only (same population as flat_bet_pnl).
|
|
306
|
+
const feeModel = args.backtestFees ?? 'none';
|
|
307
|
+
let feeDrag = 0;
|
|
308
|
+
if (feeModel === 'taker') {
|
|
309
|
+
for (const s of signals) {
|
|
310
|
+
if (s.edge_pp === 0 || Math.abs(s.edge_pp) < minEdgePp) continue;
|
|
311
|
+
// Entry probability on the side we took (YES on positive edge, NO on negative).
|
|
312
|
+
const p = (s.edge_pp > 0 ? s.market_then : (100 - s.market_then)) / 100;
|
|
313
|
+
feeDrag += 0.07 * p * (1 - p);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
const flatBetPnlNet = metrics.flat_bet_pnl - feeDrag;
|
|
317
|
+
const flatBetRoiNet = metrics.total_capital > 0 ? flatBetPnlNet / metrics.total_capital : 0;
|
|
318
|
+
|
|
265
319
|
const result: BacktestResult = {
|
|
266
320
|
...metrics,
|
|
267
321
|
days,
|
|
322
|
+
signals_dropped_no_volume: signalsDroppedNoVolume,
|
|
323
|
+
universe_source: universe.source,
|
|
324
|
+
universe_size: universe.events.length,
|
|
325
|
+
universe_description: universe.description,
|
|
326
|
+
fee_model: feeModel,
|
|
327
|
+
flat_bet_pnl_net: flatBetPnlNet,
|
|
328
|
+
flat_bet_roi_net: flatBetRoiNet,
|
|
268
329
|
subscription_notice: subscriptionNotice,
|
|
269
330
|
};
|
|
270
331
|
|
package/src/commands/help.ts
CHANGED
|
@@ -120,6 +120,10 @@ ${p}backtest --category crypto Filter by category
|
|
|
120
120
|
${p}backtest --min-edge 10 Stricter edge threshold in pp (default 0.5pp)
|
|
121
121
|
${p}backtest --min-volume 10 Per-contract volume gate (default 1)
|
|
122
122
|
${p}backtest --min-price 5 --max-price 95 Tradeable price band 0-100 (defaults: 5 / 95)
|
|
123
|
+
${p}backtest --universe api Systematic Octagon-API universe (default; reproducible across machines)
|
|
124
|
+
${p}backtest --universe local Legacy local octagon_reports universe (offline, NON-SYSTEMATIC)
|
|
125
|
+
${p}backtest --fees taker Apply Kalshi taker fee (0.07·p·(1−p) per entry); default 'none' = gross
|
|
126
|
+
${p}backtest --fees maker Maker execution (free entry)
|
|
123
127
|
${p}backtest --export results.csv Per-market detail CSV
|
|
124
128
|
${p}backtest --json Machine-readable output
|
|
125
129
|
|
|
@@ -127,8 +131,9 @@ Looks back N days, compares what the model said then to where the market is now.
|
|
|
127
131
|
Resolved markets: scored against Kalshi settlement (0 or 100).
|
|
128
132
|
Unresolved markets: mark-to-market vs current Kalshi trading price.
|
|
129
133
|
Per-contract entry: mp/kp come from the per-contract outcome_probabilities on the
|
|
130
|
-
Octagon snapshot (no event-level fallback). Volume gate
|
|
131
|
-
from the snapshot
|
|
134
|
+
Octagon snapshot (no event-level fallback). Volume gate requires per-contract
|
|
135
|
+
volume from the snapshot; signals without it are dropped (the legacy fallback
|
|
136
|
+
to Kalshi lifetime volume was a look-ahead and has been removed).
|
|
132
137
|
ROI is capital-weighted: sum(pnl) / sum(capital) across edge signals, where capital
|
|
133
138
|
is kp/100 for YES edges and (100-kp)/100 for NO edges (matches Supabase methodology).`,
|
|
134
139
|
|
|
@@ -467,7 +472,8 @@ System:
|
|
|
467
472
|
|
|
468
473
|
Flags: --json, --refresh, --performance, --dry-run, --verbose
|
|
469
474
|
Backtest flags: --days, --max-age, --resolved, --unresolved, --category, --min-edge,
|
|
470
|
-
--min-volume, --min-price, --max-price, --export
|
|
475
|
+
--min-volume, --min-price, --max-price, --export,
|
|
476
|
+
--universe api|local (default api), --fees none|taker|maker (default none)
|
|
471
477
|
Run "kalshi help <command>" for detailed usage.`;
|
|
472
478
|
}
|
|
473
479
|
|
package/src/commands/index.ts
CHANGED
|
@@ -157,6 +157,14 @@ export async function handleSlashCommand(input: string): Promise<CommandResult |
|
|
|
157
157
|
else if (a === '--min-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.minPrice = v; }
|
|
158
158
|
else if (a === '--max-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.maxPrice = v; }
|
|
159
159
|
else if (a === '--export') { const v = args[++i]; if (v) btArgs.exportPath = v; }
|
|
160
|
+
else if (a === '--universe') { const v = args[++i]; if (v === 'api' || v === 'local') btArgs.backtestUniverse = v; }
|
|
161
|
+
else if (a === '--fees') { const v = args[++i]; if (v === 'none' || v === 'taker' || v === 'maker') btArgs.backtestFees = v; }
|
|
162
|
+
}
|
|
163
|
+
// Mirror parse-args' mutual-exclusion check — the slash parser above
|
|
164
|
+
// accepts both flags independently, which would put btArgs in a
|
|
165
|
+
// conflicting state before handleBacktest could see it.
|
|
166
|
+
if (btArgs.resolved && btArgs.unresolved) {
|
|
167
|
+
return { output: 'Error: --resolved and --unresolved cannot be used together.' };
|
|
160
168
|
}
|
|
161
169
|
const mode = btArgs.resolved ? 'resolved markets' : btArgs.unresolved ? 'open markets' : 'resolved + open markets';
|
|
162
170
|
const daysLabel = btArgs.days ?? 15;
|