npm - kalshi-trading-bot-cli - Versions diffs - 2.1.6 → 2.1.8 - Mend

kalshi-trading-bot-cli 2.1.6 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/package.json +1 -1
package/src/backtest/discovery.ts +110 -8
package/src/backtest/metrics.ts +235 -8
package/src/backtest/renderer.ts +91 -3
package/src/backtest/types.ts +92 -1
package/src/commands/analyze.ts +140 -39
package/src/commands/backtest.ts +90 -29
package/src/commands/help.ts +9 -3
package/src/commands/index.ts +8 -0
package/src/commands/parse-args.ts +23 -1
package/src/commands/review.ts +28 -17
package/src/scan/octagon-client.ts +130 -2
package/src/tools/v2/portfolio-review.ts +1 -1

package/src/backtest/types.ts CHANGED Viewed

@@ -5,6 +5,10 @@ export interface BacktestOpts {
   category?: string;
   minEdge: number;            // fractional (0-1 scale), converted to pp by caller (e.g., 0.005 → 0.5pp)
   exportPath?: string;
+  /** Where the universe is sourced from. Default 'api'. */
+  universe?: 'api' | 'local';
+  /** Fee model for net P&L. Default 'none' — output is gross. */
+  fees?: 'none' | 'taker' | 'maker';
 }
 /** A single scored market signal — unified type for both resolved and unresolved. */
@@ -16,7 +20,12 @@ export interface ScoredSignal {
   market_then: number;        // 0-100 (Kalshi trading price N days ago, from Octagon snapshot)
   market_now: number;         // 0-100 (settlement for resolved, current price for unresolved)
   resolved: boolean;
-  edge_pp: number;            // model_prob - market_then
+  /**
+   * Raw, unrounded edge in percentage points: model_prob − market_then.
+   * Filtering on |edge| should always use this value; display layers
+   * round to 0.1pp or 1pp as appropriate.
+   */
+  edge_pp: number;
   pnl: number;               // computed P&L for this signal ($ per $1 face value)
   capital: number;           // $ capital deployed per $1 face value: kp/100 for YES edges, (100-kp)/100 for NO edges
   edge_bucket: string;        // absolute-edge bucket label e.g. "0-5%", "5-10%", ..., "90%+"
@@ -24,6 +33,19 @@ export interface ScoredSignal {
   close_time: string;
 }
+/**
+ * Per-leg scorecard: realized P&L on the resolved leg, mark-to-market on the
+ * unresolved leg. Computed on the leg's subset of signals.
+ */
+export interface LegMetrics {
+  edge_signals: number;
+  edge_hit_rate: number;
+  hit_rate_ci: [number, number];
+  flat_bet_pnl: number;
+  flat_bet_roi: number;
+  total_capital: number;
+}
 export interface BacktestResult {
   verdict: { summary: string; significant: boolean; profitable: boolean };
   days: number;
@@ -41,5 +63,74 @@ export interface BacktestResult {
   flat_bet_roi: number;       // capital-weighted: sum(pnl) / sum(capital) across edge signals
   total_capital: number;      // sum of capital across edge signals (ROI denominator)
   signals: ScoredSignal[];
+  /**
+   * Count of candidate signals dropped because the Octagon snapshot had no
+   * per-contract volume (older snapshots predate the per-contract field).
+   * We deliberately do NOT fall back to Kalshi lifetime volume — that
+   * would be a look-ahead bias (lifetime includes post-entry trading).
+   * Surfaced so users can see the coverage cost of the strict gate.
+   */
+  signals_dropped_no_volume: number;
+  /**
+   * Provenance for the universe — printed in the scorecard header so users
+   * (and downstream JSON consumers) can see whether the backtest ran over
+   * the systematic Octagon-API universe or the legacy local-DB universe.
+   */
+  universe_source: 'api' | 'local';
+  universe_size: number;
+  universe_description: string;
+  /**
+   * Fee model applied to the P&L. 'none' means the reported P&L is gross
+   * (no fees, no spreads). 'taker' charges the Kalshi taker fee per entry.
+   * 'maker' assumes free entry. Default 'none' so existing output is
+   * unchanged — opt in with --fees taker.
+   */
+  fee_model: 'none' | 'taker' | 'maker';
+  /** P&L net of fees when fee_model != 'none', else equal to flat_bet_pnl. */
+  flat_bet_pnl_net: number;
+  flat_bet_roi_net: number;
+  /**
+   * Sub-scorecards computed on the resolved and unresolved legs separately.
+   * Resolved settles at 0/100 — realized outcomes. Unresolved is marked to
+   * an arbitrary "now" price and may reverse before settlement. Blending
+   * them in the top-level fields can hide cases where the paper P&L
+   * inflates a weak realized result.
+   *
+   * The blended top-level fields (`edge_hit_rate`, `flat_bet_roi`, etc.)
+   * are kept for backward compatibility with existing consumers.
+   */
+  resolved_metrics: LegMetrics;
+  unresolved_metrics: LegMetrics;
+  /**
+   * Zero-skill baseline ROIs on the same post-filter universe. Always-NO is
+   * the relevant null because Kalshi's universe is structurally NO-heavy:
+   * multi-outcome events have one YES and many NOs. A model that consistently
+   * beats always-NO has selection skill; one that doesn't is mostly
+   * harvesting the favorite-longshot tilt.
+   */
+  baselines: {
+    always_no_roi: number;
+    always_no_hit_rate: number;
+    always_yes_roi: number;
+    always_yes_hit_rate: number;
+    /**
+     * Model NO-bet ROI minus always-NO ROI, computed in entry-price bands
+     * (5-20, 20-40, 40-60, 60-80, 80-95) and capital-weighted across bands.
+     * This is the honest "within-band skill" delta: it controls for both
+     * the structural NO tilt AND the entry-price mix.
+     */
+    within_band_skill_pp: number;
+    /**
+     * Per-band breakdown so users can see where the skill (if any) comes from.
+     */
+    within_band_breakdown: Array<{
+      band: string;            // e.g. "20-40¢"
+      model_no_roi: number;    // model NO-bet ROI in this band
+      always_no_roi: number;   // always-NO ROI in this band
+      skill_delta_pp: number;  // (model - baseline) × 100, percentage points
+      n_model: number;         // count of model NO bets in this band
+      n_universe: number;      // count of all-NO universe contracts in this band
+    }>;
+  };
   subscription_notice?: string;
 }

package/src/commands/analyze.ts CHANGED Viewed

@@ -35,12 +35,25 @@ export interface AnalyzeData {
    * cache time but didn't get a newer underlying report from Octagon.
    */
   staleUpstream: boolean;
-  modelProb: number;
-  marketProb: number;
-  edge: number;
-  edgePp: string;
-  confidence: string;
-  mispricingSignal: string;
+  /**
+   * Octagon's model probability for this market. null when hasModel is
+   * false — we deliberately do NOT emit the 0.5 placeholder fallback to
+   * JSON consumers. Always check hasModel before reading this field.
+   */
+  modelProb: number | null;
+  /**
+   * Last traded market probability. null when hasMarketPrice is false.
+   * Always check hasMarketPrice before reading.
+   */
+  marketProb: number | null;
+  /** modelProb − marketProb. null when either input is unavailable. */
+  edge: number | null;
+  /** Pretty-printed edge ("+14pp"). null when edge is null. */
+  edgePp: string | null;
+  /** "very_high" | "high" | "moderate" | "low" — null when edge is null. */
+  confidence: string | null;
+  /** "underpriced" | "overpriced" | "fair_value" — null when edge is null. */
+  mispricingSignal: string | null;
   signal: string;
   drivers: PriceDriver[];
   catalysts: Catalyst[];
@@ -102,12 +115,43 @@ function getVolume(m: KalshiMarket): number {
   return m.volume || 0;
 }
+/**
+ * Normalize user input into a canonical Kalshi ticker.
+ *
+ * Accepts any of:
+ *   - Bare ticker, any case: `kxmeasles-26`, `KXMEASLES-26`, `KxMeAsLeS-26`
+ *   - Kalshi URL: `https://kalshi.com/markets/kxmeasles/measles-cases/kxmeasles-26`
+ *   - URL without protocol: `kalshi.com/markets/kxmeasles-26`
+ *   - URL with query / fragment: `…/kxmeasles-26?ref=foo#yes`
+ *
+ * Strategy: detect URL-shaped input, extract the last non-empty path segment
+ * (which by Kalshi convention is the ticker), then uppercase. Bare tickers
+ * are simply uppercased. Kalshi's path is case-sensitive — without this
+ * `/markets/kxmeasles-26` 404s even though the ticker exists.
+ */
+export function normalizeKalshiInput(input: string): string {
+  const trimmed = input.trim();
+  const looksLikeUrl =
+    /^https?:\/\//i.test(trimmed) || /^(www\.)?kalshi\.com\//i.test(trimmed);
+  if (looksLikeUrl) {
+    const noProto = trimmed
+      .replace(/^https?:\/\/[^/]+/i, '')
+      .replace(/^(www\.)?kalshi\.com/i, '');
+    const path = noProto.replace(/[?#].*$/, '').replace(/\/+$/, '');
+    const segments = path.split('/').filter(Boolean);
+    const last = segments[segments.length - 1] ?? '';
+    if (last) return last.toUpperCase();
+  }
+  return trimmed.toUpperCase();
+}
 /**
  * Resolve a user-provided ticker to a market ticker.
- * Accepts: market ticker, event ticker, or series ticker.
+ * Accepts: market ticker, event ticker, series ticker, or Kalshi URL.
  * Returns the resolved KalshiMarket (picking the most active open market for events/series).
  */
-export async function resolveMarket(input: string): Promise<KalshiMarket> {
+export async function resolveMarket(rawInput: string): Promise<KalshiMarket> {
+  const input = normalizeKalshiInput(rawInput);
   // 1. Try as a market ticker first
   try {
     const res = await callKalshiApi('GET', `/markets/${input}`);
@@ -165,7 +209,7 @@ export async function resolveMarket(input: string): Promise<KalshiMarket> {
     if (!(err instanceof KalshiApiError && err.statusCode === 404)) throw err;
   }
-  throw new Error(`Could not find a market for "${input}". Try a full market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), or series ticker (e.g. KXBTC).`);
+  throw new Error(`Could not find a market for "${rawInput}" (normalized to "${input}"). Try a market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), series ticker (e.g. KXBTC), or a Kalshi URL like https://kalshi.com/markets/<series>/<slug>/<event>.`);
 }
 export async function handleAnalyze(
@@ -228,6 +272,16 @@ export async function handleAnalyze(
   const latestDbReport = getLatestReport(db, resolvedTicker);
   const reportAge = latestDbReport ? formatAge(latestDbReport.fetched_at) : null;
+  // Decide trading-side gating BEFORE running edge / Kelly / signal math.
+  // hasModel uses report.modelProb directly (snapshot.modelProb is just
+  // propagated unchanged from computeEdge — verified in edge-computer.ts:38).
+  // canComputeEdge is the contract: any trading decision (signal, Kelly,
+  // mispricing) must check it first. Otherwise we'd build a "BUY YES @ $X"
+  // recommendation from a 0.5 placeholder modelProb on uncovered events.
+  const hasModel = !report.cacheMiss && Number.isFinite(report.modelProb)
+    && !(report.modelProb === 0.5 && report.drivers.length === 0 && report.catalysts.length === 0);
+  const canComputeEdge = hasModel && hasMarketPrice;
   const snapshot = edgeComputer.computeEdge(resolvedTicker, report, marketProb);
   // Persist edge
@@ -264,7 +318,9 @@ export async function handleAnalyze(
     liquidityAdjusted: false,
   };
   let kelly: KellyResult;
-  if (!hasMarketPrice) {
+  if (!canComputeEdge) {
+    // Either no model coverage or no last_price → any sizing computed from
+    // a placeholder modelProb / marketProb would be meaningless.
     kelly = emptyKelly;
   } else {
     try {
@@ -316,9 +372,16 @@ export async function handleAnalyze(
   const entryPrice = (snapshot.edge > 0 ? yesAsk : noAsk);
   let signal: string;
-  if (!hasMarketPrice) {
-    // No tradeable price — no actionable signal. Render explicitly.
-    signal = 'no signal (market has no last traded price)';
+  if (!canComputeEdge) {
+    // Any actionable signal needs both a real model probability and a real
+    // last_price. Spell out which one is missing so the user / bot knows
+    // why we're not making a recommendation.
+    const reason = !hasModel && !hasMarketPrice
+      ? 'no Octagon model coverage and no last traded price'
+      : !hasModel
+        ? 'no Octagon model coverage for this market'
+        : 'market has no last traded price';
+    signal = `no signal (${reason})`;
   } else if (existingPosition) {
     const holdDir = existingPosition.direction.toUpperCase();
     const edgeReversed =
@@ -358,28 +421,62 @@ export async function handleAnalyze(
   //                 This is the "Refreshed" date — what bumps when --refresh runs.
   //   modelRunAt  = Octagon's analysis_last_updated (when their model last
   //                 scored this event). Independent of our cache.
-  const refreshedAt = latestDbReport
-    ? new Date(latestDbReport.fetched_at * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
+  //
+  // Load timestamps from a single coherent source — the row identified by
+  // report.reportId is the exact row used for THIS analysis. The previous
+  // implementation mixed fields from market-keyed and event-keyed rows
+  // (different captured runs), so refreshedAt and modelRunAt could refer
+  // to different snapshots.
+  //
+  // If the primary row doesn't carry analysis_last_updated (fetchReport
+  // path doesn't expose it), fall back to the latest event-keyed prefetch
+  // row for that field only — never for fetched_at.
+  const primaryRow = report.reportId
+    ? db.query(
+        `SELECT fetched_at, analysis_last_updated FROM octagon_reports WHERE report_id = $rid`,
+      ).get({ $rid: report.reportId }) as
+        | { fetched_at: number; analysis_last_updated: string | null }
+        | undefined
+    : undefined;
+  let fetchedAtEpoch = primaryRow?.fetched_at ?? null;
+  let analysisLastUpdated = primaryRow?.analysis_last_updated ?? null;
+  if ((!fetchedAtEpoch || !analysisLastUpdated) && eventTicker && eventTicker !== resolvedTicker) {
+    const eventRow = db.query(
+      `SELECT fetched_at, analysis_last_updated FROM octagon_reports
+       WHERE event_ticker = $et AND variant_used = 'events-api'
+       ORDER BY fetched_at DESC LIMIT 1`,
+    ).get({ $et: eventTicker }) as { fetched_at: number; analysis_last_updated: string | null } | undefined;
+    if (eventRow) {
+      fetchedAtEpoch = fetchedAtEpoch ?? eventRow.fetched_at;
+      analysisLastUpdated = analysisLastUpdated ?? eventRow.analysis_last_updated;
+    }
+  }
+  const refreshedAt = fetchedAtEpoch
+    ? new Date(fetchedAtEpoch * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
     : null;
-  const modelRunAt = latestDbReport?.analysis_last_updated
-    ? latestDbReport.analysis_last_updated.replace('T', ' ').slice(0, 16) + ' UTC'
+  const modelRunAt = analysisLastUpdated
+    ? analysisLastUpdated.replace('T', ' ').slice(0, 16) + ' UTC'
     : null;
-  // hasModel = Octagon returned a real probability for this market. A
-  // cache-miss report keeps modelProb at the 0.5 placeholder; we must NOT
-  // render that as if it were a real prediction.
-  const hasModel = !report.cacheMiss && Number.isFinite(snapshot.modelProb)
-    && !(snapshot.modelProb === 0.5 && report.drivers.length === 0 && report.catalysts.length === 0);
+  // hasModel + canComputeEdge were computed earlier (above Kelly/signal),
+  // so trading-side math never reads a placeholder edge. See top of
+  // handleAnalyze for the contract.
   // staleUpstream = user asked for --refresh but Octagon's upstream model run
   // timestamp didn't move. Cache fetch time bumped, but the underlying report
-  // body is the same one Octagon previously generated. The user wanted fresh
-  // analysis; they got an unchanged stale one.
+  // body is the same one Octagon previously generated. Compare against the
+  // same coherent source we used for modelRunAt above — otherwise we could
+  // false-positive on staleness when the two lookups disagreed.
   const staleUpstream = refresh
     && preRefreshAnalysis != null
-    && latestDbReport?.analysis_last_updated != null
-    && preRefreshAnalysis === latestDbReport.analysis_last_updated;
+    && analysisLastUpdated != null
+    && preRefreshAnalysis === analysisLastUpdated;
+  // Null out trading-side fields when the underlying inputs are unavailable.
+  // JSON consumers previously saw modelProb: 0.5 / marketProb: 0.5 / edge: 0
+  // on degraded paths and treated them as real predictions. The hasModel and
+  // hasMarketPrice flags are the source of truth — fields here mirror them.
+  // (canComputeEdge was already evaluated at the top of the function.)
   return {
     ticker: resolvedTicker,
     eventTicker,
@@ -390,12 +487,12 @@ export async function handleAnalyze(
     staleUpstream,
     hasModel,
     hasMarketPrice,
-    modelProb: snapshot.modelProb,
-    marketProb,
-    edge: snapshot.edge,
-    edgePp,
-    confidence: snapshot.confidence,
-    mispricingSignal,
+    modelProb: hasModel ? snapshot.modelProb : null,
+    marketProb: hasMarketPrice ? marketProb : null,
+    edge: canComputeEdge ? snapshot.edge : null,
+    edgePp: canComputeEdge ? edgePp : null,
+    confidence: canComputeEdge ? snapshot.confidence : null,
+    mispricingSignal: canComputeEdge ? mispricingSignal : null,
     signal,
     drivers: snapshot.drivers,
     catalysts: snapshot.catalysts,
@@ -437,17 +534,17 @@ export function formatAnalyzeHuman(data: AnalyzeData): string {
   //   hasMarketPrice=false → Kalshi market has no last_price → Market Prob shows "--"
   // Edge needs both. Either being false means edge/confidence/mispricing
   // render "--" — we never show a number derived from a placeholder.
-  const modelStr = data.hasModel
+  const modelStr = data.hasModel && data.modelProb != null
     ? `${(data.modelProb * 100).toFixed(1)}%`
     : `--   (no Octagon model coverage for this market)`;
-  const marketStr = data.hasMarketPrice
+  const marketStr = data.hasMarketPrice && data.marketProb != null
     ? `${(data.marketProb * 100).toFixed(1)}%`
     : `--   (no last traded price — market hasn't traded yet)`;
-  const canComputeEdge = data.hasModel && data.hasMarketPrice;
+  const canComputeEdge = data.hasModel && data.hasMarketPrice && data.edge != null;
   lines.push(`  Model Prob:  ${modelStr}`);
   lines.push(`  Market Prob: ${marketStr}`);
   if (canComputeEdge) {
-    lines.push(`  Edge:        ${data.edgePp} (${(data.edge * 100).toFixed(1)}%)`);
+    lines.push(`  Edge:        ${data.edgePp} (${(data.edge! * 100).toFixed(1)}%)`);
     lines.push(`  Confidence:  ${data.confidence}`);
     lines.push(`  Mispricing:  ${data.mispricingSignal}`);
   } else {
@@ -624,8 +721,12 @@ export async function promptAnalyzeActions(data: AnalyzeData): Promise<void> {
           // Close position: sell what we hold
           const sellSide = data.existingPosition.direction;
           const sellSize = data.existingPosition.size;
+          // marketProb is guaranteed when isSell is reachable (we got a SELL
+          // recommendation, which requires a price), but type system can't
+          // see that — fall back to 50 if data was tampered with.
+          const mp = data.marketProb ?? 0.5;
           const closePrice = data.closePriceCents ?? Math.round(
-            (sellSide === 'yes' ? data.marketProb : 1 - data.marketProb) * 100
+            (sellSide === 'yes' ? mp : 1 - mp) * 100
           );
           console.log(`  Signal: SELL ${sellSize} ${sellSide.toUpperCase()} @ ${closePrice}¢ (close position)`);
@@ -701,7 +802,7 @@ export async function promptAnalyzeActions(data: AnalyzeData): Promise<void> {
           break;
         }
-        const side = data.edge > 0 ? 'yes' : 'no';
+        const side = (data.edge ?? 0) > 0 ? 'yes' : 'no';
         const price = data.kelly.entryPriceCents;
         console.log(`  Signal: BUY ${data.kelly.contracts} ${side.toUpperCase()} @ ${price}¢`);
         const confirm = await ask('  Execute? [y/n] ');

package/src/commands/backtest.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import type { ParsedArgs } from './parse-args.js';
 import type { CLIResponse } from './json.js';
 import { wrapSuccess } from './json.js';
 import { getDb } from '../db/index.js';
-import { discoverSettledMarkets, discoverOpenMarkets, parallelMap } from '../backtest/discovery.js';
+import { discoverSettledMarkets, discoverOpenMarkets, parallelMap, resolveUniverse, fetchEventPayloads } from '../backtest/discovery.js';
 import { fetchAndCacheHistory, selectSnapshotByDate, SubscriptionRequiredError, type OutcomeProbability } from '../backtest/fetcher.js';
 import { computeMetrics } from '../backtest/metrics.js';
 import type { BacktestResult, ScoredSignal } from '../backtest/types.js';
@@ -37,21 +37,24 @@ function edgeBucketLabel(edgePp: number): string {
 }
 /**
- * Return the tradeable volume for a contract.
- * Prefers per-contract volume fields from the Octagon snapshot (as the
- * Supabase methodology does); falls back to Kalshi lifetime volume for
- * older cached snapshots that pre-date the API's per-contract volume.
+ * Return the tradeable volume for a contract, measured AT SNAPSHOT TIME.
+ *
+ * Returns null when the Octagon snapshot has no per-contract volume — older
+ * snapshots predate the per-contract field. We deliberately do NOT fall back
+ * to Kalshi LIFETIME volume here: lifetime volume includes trading that
+ * happened *after* the entry date, so a contract with zero liquidity at
+ * entry that later became active would silently pass the tradeability gate
+ * retroactively (look-ahead bias on the tradeable filter).
+ *
+ * Callers should skip the signal when this returns null and count it as
+ * "dropped via no per-contract volume" so the coverage cost is visible.
  */
-function contractVolume(
-  perContract: OutcomeProbability | null,
-  fallbackLifetimeVolume: number,
-): number {
-  if (perContract) {
-    const v = typeof perContract.volume === 'number' ? perContract.volume : null;
-    const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
-    if (v !== null || v24 !== null) return Math.max(v ?? 0, v24 ?? 0);
-  }
-  return fallbackLifetimeVolume;
+function contractVolume(perContract: OutcomeProbability | null): number | null {
+  if (!perContract) return null;
+  const v = typeof perContract.volume === 'number' ? perContract.volume : null;
+  const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
+  if (v === null && v24 === null) return null;
+  return Math.max(v ?? 0, v24 ?? 0);
 }
 export { formatBacktestHuman };
@@ -74,11 +77,26 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
   const signals: ScoredSignal[] = [];
   let subscriptionNotice: string | undefined;
+  // Counter for signals dropped because the Octagon snapshot had no
+  // per-contract volume. Surfaced in the result so users can see how much
+  // coverage the strict (no lifetime-volume look-ahead) gate cost them.
+  let signalsDroppedNoVolume = 0;
+  // ─── UNIVERSE RESOLUTION (Phase 4, Issue 7) ────────────────────────────
+  // Resolve once and share the Kalshi event-payload map between both legs
+  // so we fetch each event payload only once instead of twice. The
+  // payloads map is built lazily — we only fetch when at least one leg
+  // will use it.
+  const universeSource = args.backtestUniverse ?? 'api';
+  const universe = await resolveUniverse(db, { source: universeSource, category: args.category });
+  // Fetch all event payloads once and share between legs. parallelMap
+  // caps concurrency so this doesn't hammer Kalshi.
+  const payloads = await fetchEventPayloads(universe.events);
   // ─── RESOLVED: settled markets with historical Octagon snapshots ────────
   if (!args.unresolved) {
     try {
-      const settled = await discoverSettledMarkets(db, { category: args.category });
+      const settled = await discoverSettledMarkets(db, { universe, payloads, category: args.category });
       if (settled.length > 0) {
         // Group by event_ticker to batch history fetches
@@ -115,12 +133,17 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
             const marketThen = perMarket.market_probability;
             if (!Number.isFinite(modelProb) || !Number.isFinite(marketThen)) continue;
             const marketNow = m.result === 'yes' ? 100 : 0;
-            const edgePp = Math.round((modelProb - marketThen) * 10) / 10;
+            // Unrounded edge — filtering happens downstream against the
+            // raw value. Display layer rounds for presentation. Rounding
+            // here makes the minEdge filter asymmetric (0.449 rounds to 0.4
+            // and is excluded; 0.451 rounds to 0.5 and is included).
+            const edgePp = modelProb - marketThen;
-            // Tradeable filter — per-contract volume from the Octagon snapshot
-            // (matches Supabase methodology); falls back to Kalshi lifetime
-            // volume for pre-API-change cached snapshots.
-            const vol = contractVolume(perMarket, m.volume);
+            // Tradeable filter — per-contract volume from the Octagon
+            // snapshot only (no Kalshi lifetime-volume fallback, which would
+            // be a look-ahead since lifetime includes post-entry trading).
+            const vol = contractVolume(perMarket);
+            if (vol === null) { signalsDroppedNoVolume++; continue; }
             if (vol < minVolume) continue;
             // Price is marketThen (the price you'd transact at for a resolved bet).
             if (marketThen < minPrice || marketThen > maxPrice) continue;
@@ -137,8 +160,13 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
               pnl = (marketThen - marketNow) / 100;
               capital = (100 - marketThen) / 100;
             } else {
-              // Zero edge: capital still reflects the tradeable side implied by sign
-              // (use YES side so divide-by-zero checks don't fire on 0-edge signals).
+              // Zero edge: model and market agree exactly. Such signals are
+              // excluded from edge metrics (metrics.ts filters edge_pp != 0
+              // && |edge_pp| >= minEdgePp) but kept in `signals` so the CSV
+              // export retains a complete picture of what was scored. We
+              // assign YES-side capital so divide-by-zero checks don't fire
+              // — the capital field is consulted only when computing ROI on
+              // the edge subset, where these rows aren't present.
               capital = marketThen / 100;
             }
             if (capital <= 0) continue;
@@ -175,7 +203,7 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
   // ─── UNRESOLVED: open markets with current Kalshi prices ───────────────
   if (!args.resolved) {
     try {
-      const openMarkets = await discoverOpenMarkets(db, { category: args.category });
+      const openMarkets = await discoverOpenMarkets(db, { universe, payloads, category: args.category });
       // Group by event_ticker to batch history fetches (same as resolved path).
       const openByEvent = new Map<string, typeof openMarkets>();
@@ -207,13 +235,21 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
           const confidenceScore = snap.confidence_score ?? 0;
           const marketNow = m.market_prob * 100; // current Kalshi price (0-100)
-          const edgePp = Math.round((modelProb - marketThen) * 10) / 10;
+          // Unrounded edge — see resolved-leg comment above.
+          const edgePp = modelProb - marketThen;
-          // Tradeable filter — per-contract volume from the Octagon snapshot.
-          const vol = contractVolume(perMarket, m.volume);
+          // Tradeable filter — per-contract volume from the Octagon snapshot
+          // only (no Kalshi lifetime-volume fallback, see contractVolume).
+          const vol = contractVolume(perMarket);
+          if (vol === null) { signalsDroppedNoVolume++; continue; }
           if (vol < minVolume) continue;
-          // Price is marketNow (the current transactable price for an open position).
-          if (marketNow < minPrice || marketNow > maxPrice) continue;
+          // Filter on the ENTRY price (marketThen), not the current mark
+          // (marketNow). Filtering on marketNow conditions the sample on
+          // the outcome: positions that collapsed below minPrice or ran
+          // above maxPrice get silently dropped *after* we observe the
+          // move. That truncates both tails of the P&L distribution and
+          // is a look-ahead bias. Matches the resolved leg above.
+          if (marketThen < minPrice || marketThen > maxPrice) continue;
           // M2M P&L and capital per $1 face value.
           let pnl = 0;
@@ -262,9 +298,34 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
   // ─── COMPUTE METRICS ───────────────────────────────────────────────────
   const metrics = computeMetrics(signals, minEdgePp);
+  // Fee model — defaults to 'none' so existing output is unchanged. With
+  // --fees taker we apply Kalshi's taker formula: 0.07 × p × (1−p) per
+  // entry, where p is the entry probability for the side we took. Maker
+  // execution assumes zero entry fee.
+  // We compute on the EDGE signals only (same population as flat_bet_pnl).
+  const feeModel = args.backtestFees ?? 'none';
+  let feeDrag = 0;
+  if (feeModel === 'taker') {
+    for (const s of signals) {
+      if (s.edge_pp === 0 || Math.abs(s.edge_pp) < minEdgePp) continue;
+      // Entry probability on the side we took (YES on positive edge, NO on negative).
+      const p = (s.edge_pp > 0 ? s.market_then : (100 - s.market_then)) / 100;
+      feeDrag += 0.07 * p * (1 - p);
+    }
+  }
+  const flatBetPnlNet = metrics.flat_bet_pnl - feeDrag;
+  const flatBetRoiNet = metrics.total_capital > 0 ? flatBetPnlNet / metrics.total_capital : 0;
   const result: BacktestResult = {
     ...metrics,
     days,
+    signals_dropped_no_volume: signalsDroppedNoVolume,
+    universe_source: universe.source,
+    universe_size: universe.events.length,
+    universe_description: universe.description,
+    fee_model: feeModel,
+    flat_bet_pnl_net: flatBetPnlNet,
+    flat_bet_roi_net: flatBetRoiNet,
     subscription_notice: subscriptionNotice,
   };

package/src/commands/help.ts CHANGED Viewed

@@ -120,6 +120,10 @@ ${p}backtest --category crypto            Filter by category
 ${p}backtest --min-edge 10                Stricter edge threshold in pp (default 0.5pp)
 ${p}backtest --min-volume 10              Per-contract volume gate (default 1)
 ${p}backtest --min-price 5 --max-price 95 Tradeable price band 0-100 (defaults: 5 / 95)
+${p}backtest --universe api              Systematic Octagon-API universe (default; reproducible across machines)
+${p}backtest --universe local            Legacy local octagon_reports universe (offline, NON-SYSTEMATIC)
+${p}backtest --fees taker                Apply Kalshi taker fee (0.07·p·(1−p) per entry); default 'none' = gross
+${p}backtest --fees maker                Maker execution (free entry)
 ${p}backtest --export results.csv         Per-market detail CSV
 ${p}backtest --json                       Machine-readable output
@@ -127,8 +131,9 @@ Looks back N days, compares what the model said then to where the market is now.
 Resolved markets: scored against Kalshi settlement (0 or 100).
 Unresolved markets: mark-to-market vs current Kalshi trading price.
 Per-contract entry: mp/kp come from the per-contract outcome_probabilities on the
-Octagon snapshot (no event-level fallback). Volume gate uses per-contract volume
-from the snapshot when available, else current Kalshi lifetime volume.
+Octagon snapshot (no event-level fallback). Volume gate requires per-contract
+volume from the snapshot; signals without it are dropped (the legacy fallback
+to Kalshi lifetime volume was a look-ahead and has been removed).
 ROI is capital-weighted: sum(pnl) / sum(capital) across edge signals, where capital
 is kp/100 for YES edges and (100-kp)/100 for NO edges (matches Supabase methodology).`,
@@ -467,7 +472,8 @@ System:
 Flags: --json, --refresh, --performance, --dry-run, --verbose
 Backtest flags: --days, --max-age, --resolved, --unresolved, --category, --min-edge,
-                --min-volume, --min-price, --max-price, --export
+                --min-volume, --min-price, --max-price, --export,
+                --universe api|local (default api), --fees none|taker|maker (default none)
 Run "kalshi help <command>" for detailed usage.`;
   }

package/src/commands/index.ts CHANGED Viewed

@@ -157,6 +157,14 @@ export async function handleSlashCommand(input: string): Promise<CommandResult |
         else if (a === '--min-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.minPrice = v; }
         else if (a === '--max-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.maxPrice = v; }
         else if (a === '--export') { const v = args[++i]; if (v) btArgs.exportPath = v; }
+        else if (a === '--universe') { const v = args[++i]; if (v === 'api' || v === 'local') btArgs.backtestUniverse = v; }
+        else if (a === '--fees') { const v = args[++i]; if (v === 'none' || v === 'taker' || v === 'maker') btArgs.backtestFees = v; }
+      }
+      // Mirror parse-args' mutual-exclusion check — the slash parser above
+      // accepts both flags independently, which would put btArgs in a
+      // conflicting state before handleBacktest could see it.
+      if (btArgs.resolved && btArgs.unresolved) {
+        return { output: 'Error: --resolved and --unresolved cannot be used together.' };
       }
       const mode = btArgs.resolved ? 'resolved markets' : btArgs.unresolved ? 'open markets' : 'resolved + open markets';
       const daysLabel = btArgs.days ?? 15;