npm - kalshi-trading-bot-cli - Versions diffs - 2.1.7 → 2.1.8 - Mend

kalshi-trading-bot-cli 2.1.7 → 2.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/backtest/discovery.ts +110 -8
package/src/backtest/metrics.ts +235 -8
package/src/backtest/renderer.ts +91 -3
package/src/backtest/types.ts +92 -1
package/src/commands/analyze.ts +73 -11
package/src/commands/backtest.ts +90 -29
package/src/commands/help.ts +9 -3
package/src/commands/index.ts +8 -0
package/src/commands/parse-args.ts +23 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kalshi-trading-bot-cli",
-  "version": "2.1.7",
+  "version": "2.1.8",
   "description": "Kalshi Trading Bot CLI - AI-powered prediction market terminal.",
   "license": "MIT",
   "author": "Octagon AI, Inc.",

package/src/backtest/discovery.ts CHANGED Viewed

@@ -1,9 +1,92 @@
 import type { Database } from 'bun:sqlite';
 import { callKalshiApi } from '../tools/kalshi/api.js';
 import type { KalshiMarket } from '../tools/kalshi/types.js';
+import { fetchAllOctagonEvents } from '../scan/octagon-events-api.js';
 const CONCURRENCY = 10;
+/** Where the backtest universe is sourced from. */
+export type UniverseSource = 'api' | 'local';
+export interface UniverseEntry {
+  event_ticker: string;
+  category: string | null;
+}
+export interface Universe {
+  events: UniverseEntry[];
+  source: UniverseSource;
+  description: string;
+}
+/**
+ * Resolve the backtest universe (the set of events scored).
+ *
+ * `api` (default): paginate Octagon's covered-events list — systematic,
+ * reproducible across machines, doesn't depend on whatever this install
+ * happened to analyze in the past. Uses fetchAllOctagonEvents directly;
+ * we deliberately do NOT pass hasHistory=true because a prior audit showed
+ * that flag silently dropped 373 of 662 events. The pipeline self-filters
+ * downstream: events with no usable snapshot return null from
+ * selectSnapshotByDate and are skipped cheaply.
+ *
+ * `local`: legacy behavior — pull from the local `octagon_reports` log.
+ * Reflects past usage of this machine, not a defined universe. Useful for
+ * offline runs and for comparing against historical backtests.
+ *
+ * KNOWN LIMITATION: the API returns *today's* covered universe, not the
+ * universe as of the entry date. Events dropped from coverage mid-window
+ * vanish from the backtest — survivorship at the universe level. A true
+ * point-in-time universe requires `events?as_of=<date>` upstream.
+ */
+export async function resolveUniverse(
+  db: Database,
+  opts?: { source?: UniverseSource; category?: string },
+): Promise<Universe> {
+  const source = opts?.source ?? 'api';
+  if (source === 'api') {
+    const all = await fetchAllOctagonEvents();
+    let events: UniverseEntry[] = all.map((e) => ({
+      event_ticker: e.event_ticker,
+      category: e.series_category ?? null,
+    }));
+    if (opts?.category) {
+      const needle = opts.category.toLowerCase();
+      events = events.filter((e) => e.category?.toLowerCase().includes(needle));
+    }
+    return {
+      events,
+      source,
+      description: `${events.length} events from Octagon API (systematic universe)`,
+    };
+  }
+  // Legacy local path
+  const { query, params } = buildEventQuery('', opts?.category);
+  const rows = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
+  return {
+    events: rows.map((r) => ({ event_ticker: r.event_ticker, category: r.category })),
+    source,
+    description: `${rows.length} events from local octagon_reports (NON-SYSTEMATIC — reflects past usage of this machine)`,
+  };
+}
+/**
+ * Fetch the Kalshi event payload for each event in the universe, once.
+ * Returns a map keyed by event_ticker. Both discoverSettledMarkets and
+ * discoverOpenMarkets can read from this single map instead of each
+ * re-fetching every event payload — halves the Kalshi call count.
+ */
+export async function fetchEventPayloads(
+  universe: UniverseEntry[],
+): Promise<Map<string, KalshiMarket[]>> {
+  const out = new Map<string, KalshiMarket[]>();
+  await parallelMap(universe, async (entry) => {
+    const markets = await fetchEventMarkets(entry.event_ticker);
+    out.set(entry.event_ticker, markets);
+  }, CONCURRENCY);
+  return out;
+}
 export interface SettledMarket {
   ticker: string;
   event_ticker: string;
@@ -103,13 +186,23 @@ export async function parallelMap<T, R>(
  */
 export async function discoverSettledMarkets(
   db: Database,
-  opts?: { category?: string },
+  opts?: {
+    category?: string;
+    /** Pre-resolved universe + payloads (Phase 4 path). When omitted, falls back to the legacy local SQL path. */
+    universe?: Universe;
+    payloads?: Map<string, KalshiMarket[]>;
+  },
 ): Promise<SettledMarket[]> {
-  const { query, params } = buildEventQuery('', opts?.category);
-  const events = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
+  let events: Array<{ event_ticker: string; category: string | null }>;
+  if (opts?.universe) {
+    events = opts.universe.events;
+  } else {
+    const { query, params } = buildEventQuery('', opts?.category);
+    events = db.query(query).all(params) as Array<{ event_ticker: string; category: string | null }>;
+  }
   const batchResults = await parallelMap(events, async ({ event_ticker, category: cat }) => {
-    const markets = await fetchEventMarkets(event_ticker);
+    const markets = opts?.payloads?.get(event_ticker) ?? await fetchEventMarkets(event_ticker);
     const settled: SettledMarket[] = [];
     for (const m of markets) {
@@ -137,13 +230,22 @@ export async function discoverSettledMarkets(
  */
 export async function discoverOpenMarkets(
   db: Database,
-  opts?: { category?: string },
+  opts?: {
+    category?: string;
+    universe?: Universe;
+    payloads?: Map<string, KalshiMarket[]>;
+  },
 ): Promise<OpenMarket[]> {
-  const { query: q2, params: p2 } = buildEventQuery('', opts?.category);
-  const events2 = db.query(q2).all(p2) as Array<{ event_ticker: string; category: string | null }>;
+  let events2: Array<{ event_ticker: string; category: string | null }>;
+  if (opts?.universe) {
+    events2 = opts.universe.events;
+  } else {
+    const { query: q2, params: p2 } = buildEventQuery('', opts?.category);
+    events2 = db.query(q2).all(p2) as Array<{ event_ticker: string; category: string | null }>;
+  }
   const batchResults = await parallelMap(events2, async ({ event_ticker, category: cat }) => {
-    const markets = await fetchEventMarkets(event_ticker);
+    const markets = opts?.payloads?.get(event_ticker) ?? await fetchEventMarkets(event_ticker);
     const open: OpenMarket[] = [];
     for (const m of markets) {

package/src/backtest/metrics.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { ScoredSignal, BacktestResult } from './types.js';
+import type { ScoredSignal, BacktestResult, LegMetrics } from './types.js';
 /**
  * Skill score: how much better Octagon is vs the market as a forecaster.
@@ -44,6 +44,68 @@ export function bootstrapCI(
   return [stats[lo], stats[hi]];
 }
+/**
+ * Cluster bootstrap — resamples GROUPS with replacement, not individual rows.
+ *
+ * Use when the unit of risk is the group, not the row. In our case Kalshi
+ * events are multi-outcome: a single "Will the Fed cut N times?" event has
+ * a ladder of NO contracts that all settle together. If the model bets NO
+ * on five rungs and the Fed cuts once, all five settle NO simultaneously
+ * — that's *one* underlying outcome contributing five rows.
+ *
+ * Row-level bootstrap (`bootstrapCI` above) treats those rows as independent
+ * → CI width shrinks with √N where N is the row count. Real effective N is
+ * closer to the event count, so the honest interval is roughly 2× wider.
+ *
+ * This function takes `groups` (each group = a contiguous block of indices
+ * referring to per-row data carried in closures by `statFn`), draws
+ * `groups.length` groups with replacement per iteration, concatenates the
+ * indices, and applies `statFn` to the pooled sample.
+ */
+export function clusterBootstrapCI(
+  groups: number[][],
+  statFn: (sampleIndices: number[]) => number,
+  iterations = 10_000,
+  alpha = 0.05,
+): [number, number] {
+  if (groups.length === 0) return [0, 0];
+  if (!Number.isFinite(iterations) || !Number.isInteger(iterations) || iterations <= 0) {
+    throw new Error(`clusterBootstrapCI: iterations must be a finite integer > 0, got ${iterations}`);
+  }
+  if (!Number.isFinite(alpha) || alpha <= 0 || alpha >= 1) {
+    throw new Error(`clusterBootstrapCI: alpha must be a finite number in (0, 1), got ${alpha}`);
+  }
+  const stats: number[] = [];
+  for (let i = 0; i < iterations; i++) {
+    const pooled: number[] = [];
+    for (let j = 0; j < groups.length; j++) {
+      const g = groups[Math.floor(Math.random() * groups.length)];
+      pooled.push(...g);
+    }
+    if (pooled.length === 0) { stats.push(0); continue; }
+    stats.push(statFn(pooled));
+  }
+  stats.sort((a, b) => a - b);
+  const lo = Math.min(Math.max(0, Math.floor((alpha / 2) * stats.length)), stats.length - 1);
+  const hi = Math.min(Math.max(0, Math.floor((1 - alpha / 2) * stats.length)), stats.length - 1);
+  return [stats[lo], stats[hi]];
+}
+/**
+ * Build event-clustered groups: indices for each signal grouped by event_ticker.
+ * Used to feed clusterBootstrapCI when the underlying signals are correlated
+ * within an event (multi-outcome ladders, mutually-exclusive option sets).
+ */
+function groupIndicesByEvent<T extends { event_ticker: string }>(items: T[]): number[][] {
+  const byEvent = new Map<string, number[]>();
+  items.forEach((item, idx) => {
+    const arr = byEvent.get(item.event_ticker) ?? [];
+    arr.push(idx);
+    byEvent.set(item.event_ticker, arr);
+  });
+  return [...byEvent.values()];
+}
 /**
  * Compute Brier score: ((forecast/100) - (outcome/100))²
  * Both forecast and outcome are on 0-100 scale.
@@ -52,10 +114,162 @@ function brier(forecast: number, outcome: number): number {
   return ((forecast / 100) - (outcome / 100)) ** 2;
 }
+/** Entry-price bands used by the within-band skill calculation. */
+const PRICE_BANDS: Array<{ label: string; lo: number; hi: number }> = [
+  { label: '5-20¢',   lo: 5,  hi: 20 },
+  { label: '20-40¢',  lo: 20, hi: 40 },
+  { label: '40-60¢',  lo: 40, hi: 60 },
+  { label: '60-80¢',  lo: 60, hi: 80 },
+  { label: '80-95¢',  lo: 80, hi: 95 },
+];
+/**
+ * Compute zero-skill baselines on the same post-filter universe as the model.
+ *
+ * Why: Kalshi events are multi-outcome — most resolved contracts settle NO
+ * because each event has one YES outcome and many NOs. A model that
+ * consistently picks NO will hit ~75% by structure alone. The "always-NO"
+ * baseline strips that structural tilt out so we can see whether the model
+ * has any selection skill beyond the universe's bias.
+ *
+ * Within-band skill: model NO-bet ROI minus always-NO ROI, computed inside
+ * entry-price buckets and capital-weighted across buckets. This also
+ * controls for the entry-price mix — a model that only bets cheap
+ * longshots will look great vs. an always-NO baseline run over the full
+ * universe, but mediocre once we compare within the same price band.
+ */
+function computeBaselines(signals: ScoredSignal[]): BacktestResult['baselines'] {
+  // Universe-wide always-NO / always-YES on the same post-filter rows.
+  const noPnl = (s: ScoredSignal): { pnl: number; capital: number; hit: boolean } => {
+    const capital = (100 - s.market_then) / 100;
+    const settlement = 100 - s.market_now;
+    const pnl = (settlement - (100 - s.market_then)) / 100;
+    return { pnl, capital, hit: s.market_now < s.market_then };
+  };
+  const yesPnl = (s: ScoredSignal): { pnl: number; capital: number; hit: boolean } => {
+    const capital = s.market_then / 100;
+    const pnl = (s.market_now - s.market_then) / 100;
+    return { pnl, capital, hit: s.market_now > s.market_then };
+  };
+  let noP = 0, noC = 0, noHits = 0;
+  let yesP = 0, yesC = 0, yesHits = 0;
+  for (const s of signals) {
+    const n = noPnl(s); noP += n.pnl; noC += n.capital; if (n.hit) noHits++;
+    const y = yesPnl(s); yesP += y.pnl; yesC += y.capital; if (y.hit) yesHits++;
+  }
+  const alwaysNoRoi = noC > 0 ? noP / noC : 0;
+  const alwaysYesRoi = yesC > 0 ? yesP / yesC : 0;
+  const alwaysNoHitRate = signals.length > 0 ? noHits / signals.length : 0;
+  const alwaysYesHitRate = signals.length > 0 ? yesHits / signals.length : 0;
+  // Within-band: bucket by entry price, compute model-NO-bet ROI minus
+  // always-NO ROI per band, then capital-weight the deltas across bands.
+  // The model's NO bets are the meaningful comparable population (the
+  // structural NO tilt is the dominant source of "skill" in the universe).
+  const breakdown: BacktestResult['baselines']['within_band_breakdown'] = [];
+  let weightedDeltaNumer = 0;
+  let weightedDeltaDenom = 0;
+  for (const band of PRICE_BANDS) {
+    const inBand = signals.filter((s) => s.market_then >= band.lo && s.market_then < band.hi);
+    if (inBand.length === 0) continue;
+    let bandModelPnl = 0, bandModelCap = 0, bandModelN = 0;
+    let bandNoPnl = 0, bandNoCap = 0;
+    for (const s of inBand) {
+      const n = noPnl(s); bandNoPnl += n.pnl; bandNoCap += n.capital;
+      // Model "bet" here = signal where model picked NO (edge_pp < 0) and
+      // capital is the NO-side capital we already stored in s.capital.
+      if (s.edge_pp < 0) {
+        bandModelPnl += s.pnl;
+        bandModelCap += s.capital;
+        bandModelN++;
+      }
+    }
+    const modelRoi = bandModelCap > 0 ? bandModelPnl / bandModelCap : 0;
+    const baselineRoi = bandNoCap > 0 ? bandNoPnl / bandNoCap : 0;
+    const deltaPp = (modelRoi - baselineRoi) * 100;
+    breakdown.push({
+      band: band.label,
+      model_no_roi: modelRoi,
+      always_no_roi: baselineRoi,
+      skill_delta_pp: deltaPp,
+      n_model: bandModelN,
+      n_universe: inBand.length,
+    });
+    weightedDeltaNumer += deltaPp * bandModelCap;
+    weightedDeltaDenom += bandModelCap;
+  }
+  const withinBandSkillPp = weightedDeltaDenom > 0 ? weightedDeltaNumer / weightedDeltaDenom : 0;
+  return {
+    always_no_roi: alwaysNoRoi,
+    always_no_hit_rate: alwaysNoHitRate,
+    always_yes_roi: alwaysYesRoi,
+    always_yes_hit_rate: alwaysYesHitRate,
+    within_band_skill_pp: withinBandSkillPp,
+    within_band_breakdown: breakdown,
+  };
+}
+/**
+ * Compute the scorecard for one leg (resolved-only or unresolved-only).
+ * Same hit-rate and capital-weighted ROI definitions as the blended
+ * computation, just scoped to the subset.
+ */
+function computeLegMetrics(signals: ScoredSignal[], minEdgePp: number): LegMetrics {
+  const edgeSignals = signals.filter((s) => s.edge_pp !== 0 && Math.abs(s.edge_pp) >= minEdgePp);
+  const edgeCount = edgeSignals.length;
+  const hits = edgeSignals.filter((s) =>
+    s.edge_pp > 0 ? s.market_now > s.market_then : s.market_now < s.market_then,
+  );
+  const hitRate = edgeCount > 0 ? hits.length / edgeCount : 0;
+  const hitRateData = edgeSignals.map((s) =>
+    s.edge_pp > 0 ? (s.market_now > s.market_then ? 1 : 0) : (s.market_now < s.market_then ? 1 : 0),
+  );
+  const legEventGroups = groupIndicesByEvent(edgeSignals);
+  const hitRateCI: [number, number] = edgeCount > 0
+    ? clusterBootstrapCI(legEventGroups, (sample) => {
+        let sum = 0;
+        for (const idx of sample) sum += hitRateData[idx];
+        return sample.length > 0 ? sum / sample.length : 0;
+      })
+    : [0, 0];
+  const pnl = edgeSignals.reduce((sum, s) => sum + s.pnl, 0);
+  const totalCapital = edgeSignals.reduce((sum, s) => sum + s.capital, 0);
+  const roi = totalCapital > 0 ? pnl / totalCapital : 0;
+  return {
+    edge_signals: edgeCount,
+    edge_hit_rate: hitRate,
+    hit_rate_ci: hitRateCI,
+    flat_bet_pnl: pnl,
+    flat_bet_roi: roi,
+    total_capital: totalCapital,
+  };
+}
+const EMPTY_LEG: LegMetrics = {
+  edge_signals: 0,
+  edge_hit_rate: 0,
+  hit_rate_ci: [0, 0],
+  flat_bet_pnl: 0,
+  flat_bet_roi: 0,
+  total_capital: 0,
+};
+const EMPTY_BASELINES: BacktestResult['baselines'] = {
+  always_no_roi: 0,
+  always_no_hit_rate: 0,
+  always_yes_roi: 0,
+  always_yes_hit_rate: 0,
+  within_band_skill_pp: 0,
+  within_band_breakdown: [],
+};
 /**
  * Compute all backtest metrics from a unified list of scored signals.
  */
-export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<BacktestResult, 'subscription_notice'> {
+export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<BacktestResult, 'subscription_notice' | 'signals_dropped_no_volume' | 'universe_source' | 'universe_size' | 'universe_description' | 'fee_model' | 'flat_bet_pnl_net' | 'flat_bet_roi_net'> {
   const n = signals.length;
   if (n === 0) {
     return {
@@ -75,6 +289,9 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
       flat_bet_roi: 0,
       total_capital: 0,
       signals: [],
+      baselines: EMPTY_BASELINES,
+      resolved_metrics: EMPTY_LEG,
+      unresolved_metrics: EMPTY_LEG,
     };
   }
@@ -84,10 +301,14 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
   const brierOctagon = brierOctagonScores.reduce((a, b) => a + b, 0) / n;
   const brierMarket = brierMarketScores.reduce((a, b) => a + b, 0) / n;
-  // Skill score with bootstrap CI — resample both
+  // Skill score with EVENT-CLUSTERED bootstrap CI. Why clustered: multi-
+  // outcome events (Fed-cut ladders, election option sets, price strikes)
+  // settle as a block — N contracts from one event aren't N independent
+  // observations. Row-level bootstrap shrinks the CI with sqrt(N rows)
+  // when the right denominator is sqrt(N events).
   const skillScore = computeSkillScore(brierOctagon, brierMarket);
-  const indices = signals.map((_, i) => i);
-  const skillCI = bootstrapCI(indices, (sample) => {
+  const eventGroups = groupIndicesByEvent(signals);
+  const skillCI = clusterBootstrapCI(eventGroups, (sample) => {
     let sumOctagon = 0;
     let sumMarket = 0;
     for (const idx of sample) {
@@ -112,13 +333,16 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
   });
   const hitRate = edgeCount > 0 ? hits.length / edgeCount : 0;
-  // Bootstrap hit rate CI
+  // Event-clustered hit rate CI on the EDGE signals only.
   const hitRateData = edgeSignals.map(s => {
     if (s.edge_pp > 0) return s.market_now > s.market_then ? 1 : 0;
     return s.market_now < s.market_then ? 1 : 0;
   });
-  const hitRateCI = bootstrapCI(hitRateData, (sample) => {
-    return sample.reduce((a, b) => a + b, 0) / sample.length;
+  const edgeEventGroups = groupIndicesByEvent(edgeSignals);
+  const hitRateCI = clusterBootstrapCI(edgeEventGroups, (sample) => {
+    let sum = 0;
+    for (const idx of sample) sum += hitRateData[idx];
+    return sample.length > 0 ? sum / sample.length : 0;
   });
   // P&L and capital-weighted ROI (matches Supabase methodology):
@@ -161,5 +385,8 @@ export function computeMetrics(signals: ScoredSignal[], minEdgePp = 0.5): Omit<B
     flat_bet_roi: roi,
     total_capital: totalCapital,
     signals,
+    baselines: computeBaselines(signals),
+    resolved_metrics: computeLegMetrics(signals.filter((s) => s.resolved), minEdgePp),
+    unresolved_metrics: computeLegMetrics(signals.filter((s) => !s.resolved), minEdgePp),
   };
 }

package/src/backtest/renderer.ts CHANGED Viewed

@@ -5,6 +5,16 @@ export interface FormatOpts {
   minEdge?: number;          // 0-1 scale, default 0.005 (0.5pp)
 }
+/** Format a 0-1 ROI as a signed percentage string. */
+function fmtRoi(roi: number): string {
+  return `${roi >= 0 ? '+' : ''}${(roi * 100).toFixed(1)}%`;
+}
+/** Format a percentage-point delta with sign. */
+function fmtPp(pp: number): string {
+  return `${pp >= 0 ? '+' : ''}${pp.toFixed(1)}pp`;
+}
 /**
  * Format complete backtest result for terminal display.
  */
@@ -17,6 +27,20 @@ export function formatBacktestHuman(result: BacktestResult, opts?: FormatOpts):
   const lines: string[] = [];
   lines.push(`Octagon Backtest — ${result.days}-day lookback (${fromStr} – ${toStr})`);
+  lines.push(`Universe: ${result.universe_description}`);
+  let feeHeader: string;
+  switch (result.fee_model) {
+    case 'none':
+      feeHeader = 'none — output is GROSS (pre-fee)';
+      break;
+    case 'taker':
+      feeHeader = 'taker (entries charged Kalshi taker fee = 0.07·p·(1−p))';
+      break;
+    case 'maker':
+      feeHeader = 'maker (free-entry execution assumption — net P&L equals gross)';
+      break;
+  }
+  lines.push(`Fee model: ${feeHeader}`);
   lines.push('══════════════════════════════════════════════════════════');
   lines.push('');
@@ -47,9 +71,73 @@ export function formatBacktestHuman(result: BacktestResult, opts?: FormatOpts):
   // lines.push('');
   lines.push(`  Edge signals      ${result.edge_signals}   (min edge: ${minEdgePp}pp)`);
   if (result.edge_signals > 0) {
-    lines.push(`  Hit rate          ${(result.edge_hit_rate * 100).toFixed(1)}%  [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%]`);
-    lines.push(`  Flat-bet P&L      ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (ROI: ${result.flat_bet_roi >= 0 ? '+' : ''}${(result.flat_bet_roi * 100).toFixed(1)}%)`);
-    lines.push(`  Capital deployed  $${result.total_capital.toFixed(2)}   (capital-weighted ROI)`);
+    // Resolved settles at 0/100 — realized. Unresolved is marked to the
+    // current Kalshi price — paper P&L that can reverse. Splitting them
+    // makes it visible when one leg is carrying a weak other.
+    const r = result.resolved_metrics;
+    const u = result.unresolved_metrics;
+    if (r.edge_signals > 0) {
+      lines.push('');
+      lines.push('  RESOLVED (realized P&L)');
+      lines.push(`    Hit rate        ${(r.edge_hit_rate * 100).toFixed(1)}%  [95% CI: ${(r.hit_rate_ci[0] * 100).toFixed(1)}% to ${(r.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered]   n=${r.edge_signals}`);
+      lines.push(`    Flat-bet P&L    ${fmtRoi(r.flat_bet_roi)} ROI  (${r.flat_bet_pnl >= 0 ? '+' : ''}$${r.flat_bet_pnl.toFixed(2)} on $${r.total_capital.toFixed(2)} capital)`);
+    }
+    if (u.edge_signals > 0) {
+      lines.push('');
+      lines.push('  UNRESOLVED (mark-to-market — paper P&L)');
+      lines.push(`    Directional drift ${(u.edge_hit_rate * 100).toFixed(1)}%  [95% CI: ${(u.hit_rate_ci[0] * 100).toFixed(1)}% to ${(u.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered]   n=${u.edge_signals}`);
+      lines.push(`    M2M P&L         ${fmtRoi(u.flat_bet_roi)} ROI  (${u.flat_bet_pnl >= 0 ? '+' : ''}$${u.flat_bet_pnl.toFixed(2)} on $${u.total_capital.toFixed(2)} capital)`);
+    }
+    if (r.edge_signals > 0 && u.edge_signals > 0) {
+      lines.push('');
+      lines.push('  COMBINED (both legs blended — interpret with care)');
+      lines.push(`    Hit rate        ${(result.edge_hit_rate * 100).toFixed(1)}%  [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%, event-clustered]`);
+      lines.push(`    Flat-bet P&L    ${fmtRoi(result.flat_bet_roi)} ROI  (${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} on $${result.total_capital.toFixed(2)} capital)`);
+    }
+    // Fee drag — show only when --fees is on so existing output is unchanged.
+    if (result.fee_model !== 'none' && result.flat_bet_pnl !== result.flat_bet_pnl_net) {
+      const feeDrag = result.flat_bet_pnl - result.flat_bet_pnl_net;
+      lines.push('');
+      lines.push(`  Fees applied (${result.fee_model})`);
+      lines.push(`    Gross P&L       ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (${fmtRoi(result.flat_bet_roi)} ROI)`);
+      lines.push(`    Fee drag        -$${feeDrag.toFixed(2)}`);
+      lines.push(`    Net P&L         ${result.flat_bet_pnl_net >= 0 ? '+' : ''}$${result.flat_bet_pnl_net.toFixed(2)} (${fmtRoi(result.flat_bet_roi_net)} ROI)`);
+    } else if (r.edge_signals === 0 && u.edge_signals === 0) {
+      // No edge signals on either leg — fall back to the old single-line view.
+      lines.push(`  Hit rate          ${(result.edge_hit_rate * 100).toFixed(1)}%  [95% CI: ${(result.hit_rate_ci[0] * 100).toFixed(1)}% to ${(result.hit_rate_ci[1] * 100).toFixed(1)}%]`);
+      lines.push(`  Flat-bet P&L      ${result.flat_bet_pnl >= 0 ? '+' : ''}$${result.flat_bet_pnl.toFixed(2)} (ROI: ${fmtRoi(result.flat_bet_roi)})`);
+    }
+  }
+  // ─── Zero-skill baselines ─────────────────────────────────────────────
+  // The headline ROI / hit rate can look strong purely from the universe's
+  // structural NO tilt (multi-outcome events resolve mostly NO). These two
+  // baselines run the same post-filter universe under zero-skill strategies
+  // so the user can see whether the model adds anything.
+  const b = result.baselines;
+  if (result.signals.length > 0) {
+    lines.push('');
+    lines.push('  Zero-skill baselines (same universe, no model):');
+    lines.push(`    Always-NO ROI     ${fmtRoi(b.always_no_roi)}   hit rate ${(b.always_no_hit_rate * 100).toFixed(1)}%`);
+    lines.push(`    Always-YES ROI    ${fmtRoi(b.always_yes_roi)}   hit rate ${(b.always_yes_hit_rate * 100).toFixed(1)}%`);
+    lines.push(`    Within-band skill ${fmtPp(b.within_band_skill_pp)}   (model NO-ROI minus always-NO ROI, capital-weighted across entry-price bands)`);
+    // Per-band breakdown when at least one band has model bets
+    if (b.within_band_breakdown.some((r) => r.n_model > 0)) {
+      lines.push('');
+      lines.push('    Per-band skill breakdown:');
+      lines.push(`      ${'Band'.padEnd(8)}  ${'Model NO ROI'.padStart(13)}  ${'Always-NO ROI'.padStart(14)}  ${'Delta'.padStart(9)}  ${'n_model'.padStart(7)}  ${'n_total'.padStart(7)}`);
+      for (const row of b.within_band_breakdown) {
+        if (row.n_universe === 0) continue;
+        const delta = `${row.skill_delta_pp >= 0 ? '+' : ''}${row.skill_delta_pp.toFixed(1)}pp`;
+        lines.push(`      ${row.band.padEnd(8)}  ${fmtRoi(row.model_no_roi).padStart(13)}  ${fmtRoi(row.always_no_roi).padStart(14)}  ${delta.padStart(9)}  ${String(row.n_model).padStart(7)}  ${String(row.n_universe).padStart(7)}`);
+      }
+    }
+  }
+  // Coverage cost of the strict (no lifetime-volume look-ahead) volume gate.
+  if (result.signals_dropped_no_volume > 0) {
+    lines.push('');
+    lines.push(`  Signals dropped: ${result.signals_dropped_no_volume} (no per-contract volume in Octagon snapshot; lifetime-volume fallback removed to avoid look-ahead bias)`);
   }
   // Resolved detail table

package/src/backtest/types.ts CHANGED Viewed

@@ -5,6 +5,10 @@ export interface BacktestOpts {
   category?: string;
   minEdge: number;            // fractional (0-1 scale), converted to pp by caller (e.g., 0.005 → 0.5pp)
   exportPath?: string;
+  /** Where the universe is sourced from. Default 'api'. */
+  universe?: 'api' | 'local';
+  /** Fee model for net P&L. Default 'none' — output is gross. */
+  fees?: 'none' | 'taker' | 'maker';
 }
 /** A single scored market signal — unified type for both resolved and unresolved. */
@@ -16,7 +20,12 @@ export interface ScoredSignal {
   market_then: number;        // 0-100 (Kalshi trading price N days ago, from Octagon snapshot)
   market_now: number;         // 0-100 (settlement for resolved, current price for unresolved)
   resolved: boolean;
-  edge_pp: number;            // model_prob - market_then
+  /**
+   * Raw, unrounded edge in percentage points: model_prob − market_then.
+   * Filtering on |edge| should always use this value; display layers
+   * round to 0.1pp or 1pp as appropriate.
+   */
+  edge_pp: number;
   pnl: number;               // computed P&L for this signal ($ per $1 face value)
   capital: number;           // $ capital deployed per $1 face value: kp/100 for YES edges, (100-kp)/100 for NO edges
   edge_bucket: string;        // absolute-edge bucket label e.g. "0-5%", "5-10%", ..., "90%+"
@@ -24,6 +33,19 @@ export interface ScoredSignal {
   close_time: string;
 }
+/**
+ * Per-leg scorecard: realized P&L on the resolved leg, mark-to-market on the
+ * unresolved leg. Computed on the leg's subset of signals.
+ */
+export interface LegMetrics {
+  edge_signals: number;
+  edge_hit_rate: number;
+  hit_rate_ci: [number, number];
+  flat_bet_pnl: number;
+  flat_bet_roi: number;
+  total_capital: number;
+}
 export interface BacktestResult {
   verdict: { summary: string; significant: boolean; profitable: boolean };
   days: number;
@@ -41,5 +63,74 @@ export interface BacktestResult {
   flat_bet_roi: number;       // capital-weighted: sum(pnl) / sum(capital) across edge signals
   total_capital: number;      // sum of capital across edge signals (ROI denominator)
   signals: ScoredSignal[];
+  /**
+   * Count of candidate signals dropped because the Octagon snapshot had no
+   * per-contract volume (older snapshots predate the per-contract field).
+   * We deliberately do NOT fall back to Kalshi lifetime volume — that
+   * would be a look-ahead bias (lifetime includes post-entry trading).
+   * Surfaced so users can see the coverage cost of the strict gate.
+   */
+  signals_dropped_no_volume: number;
+  /**
+   * Provenance for the universe — printed in the scorecard header so users
+   * (and downstream JSON consumers) can see whether the backtest ran over
+   * the systematic Octagon-API universe or the legacy local-DB universe.
+   */
+  universe_source: 'api' | 'local';
+  universe_size: number;
+  universe_description: string;
+  /**
+   * Fee model applied to the P&L. 'none' means the reported P&L is gross
+   * (no fees, no spreads). 'taker' charges the Kalshi taker fee per entry.
+   * 'maker' assumes free entry. Default 'none' so existing output is
+   * unchanged — opt in with --fees taker.
+   */
+  fee_model: 'none' | 'taker' | 'maker';
+  /** P&L net of fees when fee_model != 'none', else equal to flat_bet_pnl. */
+  flat_bet_pnl_net: number;
+  flat_bet_roi_net: number;
+  /**
+   * Sub-scorecards computed on the resolved and unresolved legs separately.
+   * Resolved settles at 0/100 — realized outcomes. Unresolved is marked to
+   * an arbitrary "now" price and may reverse before settlement. Blending
+   * them in the top-level fields can hide cases where the paper P&L
+   * inflates a weak realized result.
+   *
+   * The blended top-level fields (`edge_hit_rate`, `flat_bet_roi`, etc.)
+   * are kept for backward compatibility with existing consumers.
+   */
+  resolved_metrics: LegMetrics;
+  unresolved_metrics: LegMetrics;
+  /**
+   * Zero-skill baseline ROIs on the same post-filter universe. Always-NO is
+   * the relevant null because Kalshi's universe is structurally NO-heavy:
+   * multi-outcome events have one YES and many NOs. A model that consistently
+   * beats always-NO has selection skill; one that doesn't is mostly
+   * harvesting the favorite-longshot tilt.
+   */
+  baselines: {
+    always_no_roi: number;
+    always_no_hit_rate: number;
+    always_yes_roi: number;
+    always_yes_hit_rate: number;
+    /**
+     * Model NO-bet ROI minus always-NO ROI, computed in entry-price bands
+     * (5-20, 20-40, 40-60, 60-80, 80-95) and capital-weighted across bands.
+     * This is the honest "within-band skill" delta: it controls for both
+     * the structural NO tilt AND the entry-price mix.
+     */
+    within_band_skill_pp: number;
+    /**
+     * Per-band breakdown so users can see where the skill (if any) comes from.
+     */
+    within_band_breakdown: Array<{
+      band: string;            // e.g. "20-40¢"
+      model_no_roi: number;    // model NO-bet ROI in this band
+      always_no_roi: number;   // always-NO ROI in this band
+      skill_delta_pp: number;  // (model - baseline) × 100, percentage points
+      n_model: number;         // count of model NO bets in this band
+      n_universe: number;      // count of all-NO universe contracts in this band
+    }>;
+  };
   subscription_notice?: string;
 }

package/src/commands/analyze.ts CHANGED Viewed

@@ -115,12 +115,43 @@ function getVolume(m: KalshiMarket): number {
   return m.volume || 0;
 }
+/**
+ * Normalize user input into a canonical Kalshi ticker.
+ *
+ * Accepts any of:
+ *   - Bare ticker, any case: `kxmeasles-26`, `KXMEASLES-26`, `KxMeAsLeS-26`
+ *   - Kalshi URL: `https://kalshi.com/markets/kxmeasles/measles-cases/kxmeasles-26`
+ *   - URL without protocol: `kalshi.com/markets/kxmeasles-26`
+ *   - URL with query / fragment: `…/kxmeasles-26?ref=foo#yes`
+ *
+ * Strategy: detect URL-shaped input, extract the last non-empty path segment
+ * (which by Kalshi convention is the ticker), then uppercase. Bare tickers
+ * are simply uppercased. Kalshi's path is case-sensitive — without this
+ * `/markets/kxmeasles-26` 404s even though the ticker exists.
+ */
+export function normalizeKalshiInput(input: string): string {
+  const trimmed = input.trim();
+  const looksLikeUrl =
+    /^https?:\/\//i.test(trimmed) || /^(www\.)?kalshi\.com\//i.test(trimmed);
+  if (looksLikeUrl) {
+    const noProto = trimmed
+      .replace(/^https?:\/\/[^/]+/i, '')
+      .replace(/^(www\.)?kalshi\.com/i, '');
+    const path = noProto.replace(/[?#].*$/, '').replace(/\/+$/, '');
+    const segments = path.split('/').filter(Boolean);
+    const last = segments[segments.length - 1] ?? '';
+    if (last) return last.toUpperCase();
+  }
+  return trimmed.toUpperCase();
+}
 /**
  * Resolve a user-provided ticker to a market ticker.
- * Accepts: market ticker, event ticker, or series ticker.
+ * Accepts: market ticker, event ticker, series ticker, or Kalshi URL.
  * Returns the resolved KalshiMarket (picking the most active open market for events/series).
  */
-export async function resolveMarket(input: string): Promise<KalshiMarket> {
+export async function resolveMarket(rawInput: string): Promise<KalshiMarket> {
+  const input = normalizeKalshiInput(rawInput);
   // 1. Try as a market ticker first
   try {
     const res = await callKalshiApi('GET', `/markets/${input}`);
@@ -178,7 +209,7 @@ export async function resolveMarket(input: string): Promise<KalshiMarket> {
     if (!(err instanceof KalshiApiError && err.statusCode === 404)) throw err;
   }
-  throw new Error(`Could not find a market for "${input}". Try a full market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), or series ticker (e.g. KXBTC).`);
+  throw new Error(`Could not find a market for "${rawInput}" (normalized to "${input}"). Try a market ticker (e.g. KXBTC-26MAR14-T50049), event ticker (e.g. KXBTC-26MAR14), series ticker (e.g. KXBTC), or a Kalshi URL like https://kalshi.com/markets/<series>/<slug>/<event>.`);
 }
 export async function handleAnalyze(
@@ -390,11 +421,41 @@ export async function handleAnalyze(
   //                 This is the "Refreshed" date — what bumps when --refresh runs.
   //   modelRunAt  = Octagon's analysis_last_updated (when their model last
   //                 scored this event). Independent of our cache.
-  const refreshedAt = latestDbReport
-    ? new Date(latestDbReport.fetched_at * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
+  //
+  // Load timestamps from a single coherent source — the row identified by
+  // report.reportId is the exact row used for THIS analysis. The previous
+  // implementation mixed fields from market-keyed and event-keyed rows
+  // (different captured runs), so refreshedAt and modelRunAt could refer
+  // to different snapshots.
+  //
+  // If the primary row doesn't carry analysis_last_updated (fetchReport
+  // path doesn't expose it), fall back to the latest event-keyed prefetch
+  // row for that field only — never for fetched_at.
+  const primaryRow = report.reportId
+    ? db.query(
+        `SELECT fetched_at, analysis_last_updated FROM octagon_reports WHERE report_id = $rid`,
+      ).get({ $rid: report.reportId }) as
+        | { fetched_at: number; analysis_last_updated: string | null }
+        | undefined
+    : undefined;
+  let fetchedAtEpoch = primaryRow?.fetched_at ?? null;
+  let analysisLastUpdated = primaryRow?.analysis_last_updated ?? null;
+  if ((!fetchedAtEpoch || !analysisLastUpdated) && eventTicker && eventTicker !== resolvedTicker) {
+    const eventRow = db.query(
+      `SELECT fetched_at, analysis_last_updated FROM octagon_reports
+       WHERE event_ticker = $et AND variant_used = 'events-api'
+       ORDER BY fetched_at DESC LIMIT 1`,
+    ).get({ $et: eventTicker }) as { fetched_at: number; analysis_last_updated: string | null } | undefined;
+    if (eventRow) {
+      fetchedAtEpoch = fetchedAtEpoch ?? eventRow.fetched_at;
+      analysisLastUpdated = analysisLastUpdated ?? eventRow.analysis_last_updated;
+    }
+  }
+  const refreshedAt = fetchedAtEpoch
+    ? new Date(fetchedAtEpoch * 1000).toISOString().replace('T', ' ').slice(0, 16) + ' UTC'
     : null;
-  const modelRunAt = latestDbReport?.analysis_last_updated
-    ? latestDbReport.analysis_last_updated.replace('T', ' ').slice(0, 16) + ' UTC'
+  const modelRunAt = analysisLastUpdated
+    ? analysisLastUpdated.replace('T', ' ').slice(0, 16) + ' UTC'
     : null;
   // hasModel + canComputeEdge were computed earlier (above Kelly/signal),
@@ -403,12 +464,13 @@ export async function handleAnalyze(
   // staleUpstream = user asked for --refresh but Octagon's upstream model run
   // timestamp didn't move. Cache fetch time bumped, but the underlying report
-  // body is the same one Octagon previously generated. The user wanted fresh
-  // analysis; they got an unchanged stale one.
+  // body is the same one Octagon previously generated. Compare against the
+  // same coherent source we used for modelRunAt above — otherwise we could
+  // false-positive on staleness when the two lookups disagreed.
   const staleUpstream = refresh
     && preRefreshAnalysis != null
-    && latestDbReport?.analysis_last_updated != null
-    && preRefreshAnalysis === latestDbReport.analysis_last_updated;
+    && analysisLastUpdated != null
+    && preRefreshAnalysis === analysisLastUpdated;
   // Null out trading-side fields when the underlying inputs are unavailable.
   // JSON consumers previously saw modelProb: 0.5 / marketProb: 0.5 / edge: 0

package/src/commands/backtest.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import type { ParsedArgs } from './parse-args.js';
 import type { CLIResponse } from './json.js';
 import { wrapSuccess } from './json.js';
 import { getDb } from '../db/index.js';
-import { discoverSettledMarkets, discoverOpenMarkets, parallelMap } from '../backtest/discovery.js';
+import { discoverSettledMarkets, discoverOpenMarkets, parallelMap, resolveUniverse, fetchEventPayloads } from '../backtest/discovery.js';
 import { fetchAndCacheHistory, selectSnapshotByDate, SubscriptionRequiredError, type OutcomeProbability } from '../backtest/fetcher.js';
 import { computeMetrics } from '../backtest/metrics.js';
 import type { BacktestResult, ScoredSignal } from '../backtest/types.js';
@@ -37,21 +37,24 @@ function edgeBucketLabel(edgePp: number): string {
 }
 /**
- * Return the tradeable volume for a contract.
- * Prefers per-contract volume fields from the Octagon snapshot (as the
- * Supabase methodology does); falls back to Kalshi lifetime volume for
- * older cached snapshots that pre-date the API's per-contract volume.
+ * Return the tradeable volume for a contract, measured AT SNAPSHOT TIME.
+ *
+ * Returns null when the Octagon snapshot has no per-contract volume — older
+ * snapshots predate the per-contract field. We deliberately do NOT fall back
+ * to Kalshi LIFETIME volume here: lifetime volume includes trading that
+ * happened *after* the entry date, so a contract with zero liquidity at
+ * entry that later became active would silently pass the tradeability gate
+ * retroactively (look-ahead bias on the tradeable filter).
+ *
+ * Callers should skip the signal when this returns null and count it as
+ * "dropped via no per-contract volume" so the coverage cost is visible.
  */
-function contractVolume(
-  perContract: OutcomeProbability | null,
-  fallbackLifetimeVolume: number,
-): number {
-  if (perContract) {
-    const v = typeof perContract.volume === 'number' ? perContract.volume : null;
-    const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
-    if (v !== null || v24 !== null) return Math.max(v ?? 0, v24 ?? 0);
-  }
-  return fallbackLifetimeVolume;
+function contractVolume(perContract: OutcomeProbability | null): number | null {
+  if (!perContract) return null;
+  const v = typeof perContract.volume === 'number' ? perContract.volume : null;
+  const v24 = typeof perContract.volume_24h === 'number' ? perContract.volume_24h : null;
+  if (v === null && v24 === null) return null;
+  return Math.max(v ?? 0, v24 ?? 0);
 }
 export { formatBacktestHuman };
@@ -74,11 +77,26 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
   const signals: ScoredSignal[] = [];
   let subscriptionNotice: string | undefined;
+  // Counter for signals dropped because the Octagon snapshot had no
+  // per-contract volume. Surfaced in the result so users can see how much
+  // coverage the strict (no lifetime-volume look-ahead) gate cost them.
+  let signalsDroppedNoVolume = 0;
+  // ─── UNIVERSE RESOLUTION (Phase 4, Issue 7) ────────────────────────────
+  // Resolve once and share the Kalshi event-payload map between both legs
+  // so we fetch each event payload only once instead of twice. The
+  // payloads map is built lazily — we only fetch when at least one leg
+  // will use it.
+  const universeSource = args.backtestUniverse ?? 'api';
+  const universe = await resolveUniverse(db, { source: universeSource, category: args.category });
+  // Fetch all event payloads once and share between legs. parallelMap
+  // caps concurrency so this doesn't hammer Kalshi.
+  const payloads = await fetchEventPayloads(universe.events);
   // ─── RESOLVED: settled markets with historical Octagon snapshots ────────
   if (!args.unresolved) {
     try {
-      const settled = await discoverSettledMarkets(db, { category: args.category });
+      const settled = await discoverSettledMarkets(db, { universe, payloads, category: args.category });
       if (settled.length > 0) {
         // Group by event_ticker to batch history fetches
@@ -115,12 +133,17 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
             const marketThen = perMarket.market_probability;
             if (!Number.isFinite(modelProb) || !Number.isFinite(marketThen)) continue;
             const marketNow = m.result === 'yes' ? 100 : 0;
-            const edgePp = Math.round((modelProb - marketThen) * 10) / 10;
+            // Unrounded edge — filtering happens downstream against the
+            // raw value. Display layer rounds for presentation. Rounding
+            // here makes the minEdge filter asymmetric (0.449 rounds to 0.4
+            // and is excluded; 0.451 rounds to 0.5 and is included).
+            const edgePp = modelProb - marketThen;
-            // Tradeable filter — per-contract volume from the Octagon snapshot
-            // (matches Supabase methodology); falls back to Kalshi lifetime
-            // volume for pre-API-change cached snapshots.
-            const vol = contractVolume(perMarket, m.volume);
+            // Tradeable filter — per-contract volume from the Octagon
+            // snapshot only (no Kalshi lifetime-volume fallback, which would
+            // be a look-ahead since lifetime includes post-entry trading).
+            const vol = contractVolume(perMarket);
+            if (vol === null) { signalsDroppedNoVolume++; continue; }
             if (vol < minVolume) continue;
             // Price is marketThen (the price you'd transact at for a resolved bet).
             if (marketThen < minPrice || marketThen > maxPrice) continue;
@@ -137,8 +160,13 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
               pnl = (marketThen - marketNow) / 100;
               capital = (100 - marketThen) / 100;
             } else {
-              // Zero edge: capital still reflects the tradeable side implied by sign
-              // (use YES side so divide-by-zero checks don't fire on 0-edge signals).
+              // Zero edge: model and market agree exactly. Such signals are
+              // excluded from edge metrics (metrics.ts filters edge_pp != 0
+              // && |edge_pp| >= minEdgePp) but kept in `signals` so the CSV
+              // export retains a complete picture of what was scored. We
+              // assign YES-side capital so divide-by-zero checks don't fire
+              // — the capital field is consulted only when computing ROI on
+              // the edge subset, where these rows aren't present.
               capital = marketThen / 100;
             }
             if (capital <= 0) continue;
@@ -175,7 +203,7 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
   // ─── UNRESOLVED: open markets with current Kalshi prices ───────────────
   if (!args.resolved) {
     try {
-      const openMarkets = await discoverOpenMarkets(db, { category: args.category });
+      const openMarkets = await discoverOpenMarkets(db, { universe, payloads, category: args.category });
       // Group by event_ticker to batch history fetches (same as resolved path).
       const openByEvent = new Map<string, typeof openMarkets>();
@@ -207,13 +235,21 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
           const confidenceScore = snap.confidence_score ?? 0;
           const marketNow = m.market_prob * 100; // current Kalshi price (0-100)
-          const edgePp = Math.round((modelProb - marketThen) * 10) / 10;
+          // Unrounded edge — see resolved-leg comment above.
+          const edgePp = modelProb - marketThen;
-          // Tradeable filter — per-contract volume from the Octagon snapshot.
-          const vol = contractVolume(perMarket, m.volume);
+          // Tradeable filter — per-contract volume from the Octagon snapshot
+          // only (no Kalshi lifetime-volume fallback, see contractVolume).
+          const vol = contractVolume(perMarket);
+          if (vol === null) { signalsDroppedNoVolume++; continue; }
           if (vol < minVolume) continue;
-          // Price is marketNow (the current transactable price for an open position).
-          if (marketNow < minPrice || marketNow > maxPrice) continue;
+          // Filter on the ENTRY price (marketThen), not the current mark
+          // (marketNow). Filtering on marketNow conditions the sample on
+          // the outcome: positions that collapsed below minPrice or ran
+          // above maxPrice get silently dropped *after* we observe the
+          // move. That truncates both tails of the P&L distribution and
+          // is a look-ahead bias. Matches the resolved leg above.
+          if (marketThen < minPrice || marketThen > maxPrice) continue;
           // M2M P&L and capital per $1 face value.
           let pnl = 0;
@@ -262,9 +298,34 @@ export async function handleBacktest(args: ParsedArgs): Promise<CLIResponse<Back
   // ─── COMPUTE METRICS ───────────────────────────────────────────────────
   const metrics = computeMetrics(signals, minEdgePp);
+  // Fee model — defaults to 'none' so existing output is unchanged. With
+  // --fees taker we apply Kalshi's taker formula: 0.07 × p × (1−p) per
+  // entry, where p is the entry probability for the side we took. Maker
+  // execution assumes zero entry fee.
+  // We compute on the EDGE signals only (same population as flat_bet_pnl).
+  const feeModel = args.backtestFees ?? 'none';
+  let feeDrag = 0;
+  if (feeModel === 'taker') {
+    for (const s of signals) {
+      if (s.edge_pp === 0 || Math.abs(s.edge_pp) < minEdgePp) continue;
+      // Entry probability on the side we took (YES on positive edge, NO on negative).
+      const p = (s.edge_pp > 0 ? s.market_then : (100 - s.market_then)) / 100;
+      feeDrag += 0.07 * p * (1 - p);
+    }
+  }
+  const flatBetPnlNet = metrics.flat_bet_pnl - feeDrag;
+  const flatBetRoiNet = metrics.total_capital > 0 ? flatBetPnlNet / metrics.total_capital : 0;
   const result: BacktestResult = {
     ...metrics,
     days,
+    signals_dropped_no_volume: signalsDroppedNoVolume,
+    universe_source: universe.source,
+    universe_size: universe.events.length,
+    universe_description: universe.description,
+    fee_model: feeModel,
+    flat_bet_pnl_net: flatBetPnlNet,
+    flat_bet_roi_net: flatBetRoiNet,
     subscription_notice: subscriptionNotice,
   };

package/src/commands/help.ts CHANGED Viewed

@@ -120,6 +120,10 @@ ${p}backtest --category crypto            Filter by category
 ${p}backtest --min-edge 10                Stricter edge threshold in pp (default 0.5pp)
 ${p}backtest --min-volume 10              Per-contract volume gate (default 1)
 ${p}backtest --min-price 5 --max-price 95 Tradeable price band 0-100 (defaults: 5 / 95)
+${p}backtest --universe api              Systematic Octagon-API universe (default; reproducible across machines)
+${p}backtest --universe local            Legacy local octagon_reports universe (offline, NON-SYSTEMATIC)
+${p}backtest --fees taker                Apply Kalshi taker fee (0.07·p·(1−p) per entry); default 'none' = gross
+${p}backtest --fees maker                Maker execution (free entry)
 ${p}backtest --export results.csv         Per-market detail CSV
 ${p}backtest --json                       Machine-readable output
@@ -127,8 +131,9 @@ Looks back N days, compares what the model said then to where the market is now.
 Resolved markets: scored against Kalshi settlement (0 or 100).
 Unresolved markets: mark-to-market vs current Kalshi trading price.
 Per-contract entry: mp/kp come from the per-contract outcome_probabilities on the
-Octagon snapshot (no event-level fallback). Volume gate uses per-contract volume
-from the snapshot when available, else current Kalshi lifetime volume.
+Octagon snapshot (no event-level fallback). Volume gate requires per-contract
+volume from the snapshot; signals without it are dropped (the legacy fallback
+to Kalshi lifetime volume was a look-ahead and has been removed).
 ROI is capital-weighted: sum(pnl) / sum(capital) across edge signals, where capital
 is kp/100 for YES edges and (100-kp)/100 for NO edges (matches Supabase methodology).`,
@@ -467,7 +472,8 @@ System:
 Flags: --json, --refresh, --performance, --dry-run, --verbose
 Backtest flags: --days, --max-age, --resolved, --unresolved, --category, --min-edge,
-                --min-volume, --min-price, --max-price, --export
+                --min-volume, --min-price, --max-price, --export,
+                --universe api|local (default api), --fees none|taker|maker (default none)
 Run "kalshi help <command>" for detailed usage.`;
   }

package/src/commands/index.ts CHANGED Viewed

@@ -157,6 +157,14 @@ export async function handleSlashCommand(input: string): Promise<CommandResult |
         else if (a === '--min-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.minPrice = v; }
         else if (a === '--max-price') { const v = Number(args[++i]); if (Number.isFinite(v) && v >= 0 && v <= 100) btArgs.maxPrice = v; }
         else if (a === '--export') { const v = args[++i]; if (v) btArgs.exportPath = v; }
+        else if (a === '--universe') { const v = args[++i]; if (v === 'api' || v === 'local') btArgs.backtestUniverse = v; }
+        else if (a === '--fees') { const v = args[++i]; if (v === 'none' || v === 'taker' || v === 'maker') btArgs.backtestFees = v; }
+      }
+      // Mirror parse-args' mutual-exclusion check — the slash parser above
+      // accepts both flags independently, which would put btArgs in a
+      // conflicting state before handleBacktest could see it.
+      if (btArgs.resolved && btArgs.unresolved) {
+        return { output: 'Error: --resolved and --unresolved cannot be used together.' };
       }
       const mode = btArgs.resolved ? 'resolved markets' : btArgs.unresolved ? 'open markets' : 'resolved + open markets';
       const daysLabel = btArgs.days ?? 15;

package/src/commands/parse-args.ts CHANGED Viewed

@@ -40,6 +40,10 @@ export interface ParsedArgs {
   category?: string;
   limit?: number;
   exportPath?: string;
+  /** Backtest universe source — 'api' (default) or 'local'. */
+  backtestUniverse?: 'api' | 'local';
+  /** Backtest fee model — 'none' (default), 'taker', or 'maker'. */
+  backtestFees?: 'none' | 'taker' | 'maker';
   minVolume?: number;
   minPrice?: number;
   maxPrice?: number;
@@ -98,6 +102,8 @@ export function parseArgs(argv: string[] = process.argv.slice(2)): ParsedArgs {
   let category: string | undefined;
   let limit: number | undefined;
   let exportPath: string | undefined;
+  let backtestUniverse: 'api' | 'local' | undefined;
+  let backtestFees: 'none' | 'taker' | 'maker' | undefined;
   let maxAge: number | undefined;
   let minVolume: number | undefined;
   let minPrice: number | undefined;
@@ -237,6 +243,22 @@ export function parseArgs(argv: string[] = process.argv.slice(2)): ParsedArgs {
     } else if (arg === '--export') {
       const val = argv[++i];
       if (val != null) { exportPath = val; } else { parseErrors.push('--export requires a value'); }
+    } else if (arg === '--universe') {
+      if (i + 1 >= argv.length) {
+        parseErrors.push('--universe requires a value (expected "api" or "local")');
+      } else {
+        const val = argv[++i];
+        if (val === 'api' || val === 'local') { backtestUniverse = val; }
+        else { parseErrors.push(`Invalid --universe value: "${val}" (expected "api" or "local")`); }
+      }
+    } else if (arg === '--fees') {
+      if (i + 1 >= argv.length) {
+        parseErrors.push('--fees requires a value (expected "none", "taker", or "maker")');
+      } else {
+        const val = argv[++i];
+        if (val === 'none' || val === 'taker' || val === 'maker') { backtestFees = val; }
+        else { parseErrors.push(`Invalid --fees value: "${val}" (expected "none", "taker", or "maker")`); }
+      }
     } else if (arg === '--max-age') {
       const raw = argv[++i];
       if (raw != null) {
@@ -431,7 +453,7 @@ export function parseArgs(argv: string[] = process.argv.slice(2)): ParsedArgs {
   return {
     subcommand, positionalArgs, json, theme, ticker, interval, since, minConfidence, minEdge, side,
     live, refresh, report, dryRun, verbose, performance, resolved, unresolved, days, maxAge, category,
-    limit, exportPath, minVolume, minPrice, maxPrice,
+    limit, exportPath, backtestUniverse, backtestFees, minVolume, minPrice, maxPrice,
     topK, behavioral, ranked, labelContains, closeBefore, windowDays, correlationInterval, timeframe,
     weights, bankroll, kellyMultiplier, n, maxPerCluster, maxCorrelation, minReturn, seriesTicker,
     sortBy, probabilities, tickers, query, showCluster, aggregateBy, activeOnly,