npm - costhawk - Versions diffs - 1.5.12 → 1.5.13 - Mend

costhawk 1.5.12 → 1.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/build-info.d.ts +1 -1
package/dist/build-info.js +1 -1
package/dist/cursor-parser.d.ts +128 -20
package/dist/cursor-parser.d.ts.map +1 -1
package/dist/cursor-parser.js +576 -43
package/dist/cursor-parser.js.map +1 -1
package/dist/index.js +66 -15
package/dist/index.js.map +1 -1
package/package.json +1 -1

package/dist/cursor-parser.js CHANGED Viewed

@@ -1,8 +1,8 @@
 /**
- * Cursor Local SQLite Parser (PR1 — dry-run only)
+ * Cursor Local SQLite Parser
  *
  * Parses Cursor IDE chat history from the local SQLite database to extract
- * token usage data. Read-only. Does not push to any backend.
+ * token usage and timestamps. Read-only. Does not push to any backend.
  *
  * Storage:
  *   macOS:   ~/Library/Application Support/Cursor/User/globalStorage/state.vscdb
@@ -18,21 +18,93 @@
  * on bubble rows. Model name at $.modelInfo.modelName. Server-side dedup id
  * at $.serverBubbleId.
  *
- * NOTE (PR1 scope): Cursor message timestamps are not yet verified across
- * versions, so this dry-run parser does NOT return startTime/endTime/dailyUsage.
- * PR2 will add timestamp support after verification on real Cursor data.
+ * Timestamps (verified in Task #30 against a real state.vscdb):
+ *   - $.createdAt on bubbles is an ISO 8601 string (~56% coverage, all-or-
+ *     nothing per composer — likely added in a newer Cursor version).
+ *   - $.createdAt on composerData rows is a Unix milliseconds number (100%
+ *     coverage). Same field name, different type — parser handles both.
+ *   - $.lastUpdatedAt on composerData rows is Unix ms (~13% coverage).
+ *   - $.timingInfo.client* on bubbles is performance.now()-style relative
+ *     (seconds since Cursor process start), NOT absolute — never use it as
+ *     a wall-clock timestamp.
  *
- * Workspace metadata fields (workspaceHash/workspaceName) are also unverified
- * and return null until PR2 confirms the stable field source.
+ * Fallback ladder for per-session timestamps: prefer min/max of bubble
+ * createdAt when present, otherwise use composerData.createdAt with optional
+ * composerData.lastUpdatedAt as end time. Every session gets non-null
+ * timestamps; the `timestampSource` / `timestampQuality` fields surface
+ * whether the values are precise or approximate.
+ *
+ * Workspace metadata fields (workspaceHash/workspaceName) remain unverified
+ * and return null. composerData.name is a candidate for workspaceName but
+ * has not been confirmed yet.
  */
 import { execFileSync } from "child_process";
-import { existsSync } from "fs";
+import { existsSync, statSync } from "fs";
 import { homedir, platform } from "os";
 import { join } from "path";
 // Defaults — overridable via env vars
 const DEFAULT_SQLITE3_PATH = "/usr/bin/sqlite3";
 const SQLITE_TIMEOUT_MS = 10_000;
 const SQLITE_MAX_BUFFER_BYTES = 32 * 1024 * 1024;
+// Sanity-check range for parsed Unix-ms timestamps. We reject anything before
+// 2020 or at/after 2100 as "not plausibly a Cursor message timestamp" — this
+// catches both `performance.now`-style relative values (which look like
+// fractional seconds) and negative / NaN parse results from malformed data.
+const MIN_UNIX_MS = Date.UTC(2020, 0, 1);
+const MAX_UNIX_MS = Date.UTC(2100, 0, 1);
+// Self-test invariant: bubbles sometimes persist a few minutes earlier than
+// the composerData row due to clock skew or write-order races. Tolerate 5
+// minutes before raising a warning.
+const INVARIANT_SKEW_TOLERANCE_MS = 5 * 60 * 1000;
+/**
+ * Parse an ISO 8601 string into Unix ms, or null if the input is not a
+ * string, not parseable, or outside the sane range. Callers should treat
+ * null as "no usable timestamp here" and fall through to the next source.
+ */
+function parseIsoToMs(value) {
+    if (typeof value !== "string")
+        return null;
+    const ms = Date.parse(value);
+    if (!Number.isFinite(ms) || ms < MIN_UNIX_MS || ms >= MAX_UNIX_MS) {
+        return null;
+    }
+    return ms;
+}
+/**
+ * Parse a number that is supposed to be Unix ms, rejecting values outside
+ * the sane range. This specifically catches `timingInfo.clientStartTime`
+ * (which is `performance.now()` seconds since process start and lands
+ * far below MIN_UNIX_MS).
+ */
+function parseUnixMsLoose(value) {
+    if (typeof value !== "number" || !Number.isFinite(value))
+        return null;
+    if (value < MIN_UNIX_MS || value >= MAX_UNIX_MS)
+        return null;
+    return Math.floor(value);
+}
+/**
+ * Accept either shape for a `createdAt`-style field. composerData.createdAt
+ * is a number; bubble.createdAt is an ISO string. We try both without
+ * signaling which one matched — the caller does not need to know.
+ */
+function parseTimestampField(value) {
+    return parseUnixMsLoose(value) ?? parseIsoToMs(value);
+}
+function msToIso(ms) {
+    return new Date(ms).toISOString();
+}
+function msToUtcDateKey(ms) {
+    return new Date(ms).toISOString().split("T")[0];
+}
+function createEmptyTokenUsage() {
+    return {
+        inputTokens: 0,
+        outputTokens: 0,
+        cacheCreationTokens: 0,
+        cacheReadTokens: 0,
+    };
+}
 /**
  * Get the default Cursor SQLite path for the current platform, honoring
  * the COSTHAWK_CURSOR_DB_PATH environment override.
@@ -162,15 +234,20 @@ function hasTokenUsage(bubble) {
     return bubble.inputTokens > 0 || bubble.outputTokens > 0;
 }
 const BUBBLE_KEY_REGEX = /^bubbleId:([^:]+):(.+)$/;
+const COMPOSER_KEY_REGEX = /^composerData:(.+)$/;
 /**
  * Parse a single bubbleId row into structured BubbleData.
  *
  * Returns null if the row key is malformed, the value is not parseable JSON,
- * or the row contains neither a non-empty model name nor any positive token
- * counts. Cursor can store model metadata and token usage on different rows
- * (model name typically lives on user-prompt bubbles, token counts live on
- * assistant-response bubbles), so the parser must accept either signal in
- * isolation and let the per-composer aggregation merge them.
+ * or the row contains neither a non-empty model name, positive token counts,
+ * nor a parseable timestamp. Cursor can store model metadata, token usage,
+ * and timestamps on different rows, so the parser accepts any usable signal
+ * in isolation and lets the per-composer aggregation merge them.
+ *
+ * Timestamp handling: `createdAt` is accepted as either an ISO 8601 string
+ * (standard shape on bubble rows) or a Unix-ms number (defensive fallback).
+ * Rows with only a timestamp and no tokens/model still return BubbleData
+ * so the timestamp contributes to per-composer start/end resolution.
  */
 function parseBubble(row) {
     const match = BUBBLE_KEY_REGEX.exec(row.key);
@@ -205,10 +282,11 @@ function parseBubble(row) {
             modelName = mi.modelName;
         }
     }
-    // Skip rows with no usable signal at all — neither model metadata nor
-    // positive token counts. These are typically system messages, empty
-    // bubbles, or tool-call bookkeeping rows.
-    if (!modelName && inputTokens === 0 && outputTokens === 0) {
+    const createdAtMs = parseTimestampField(obj.createdAt);
+    // Skip rows with no usable signal at all — neither model metadata,
+    // positive token counts, nor a parseable timestamp. These are typically
+    // system messages, empty bubbles, or tool-call bookkeeping rows.
+    if (!modelName && inputTokens === 0 && outputTokens === 0 && createdAtMs === null) {
         return null;
     }
     let serverBubbleId;
@@ -223,15 +301,126 @@ function parseBubble(row) {
         modelName,
         inputTokens,
         outputTokens,
+        createdAtMs,
     };
 }
 /**
- * Parse Cursor usage from local SQLite. Read-only dry run — does NOT push
- * anything to the costcanary backend.
+ * Parse a composerData row into ComposerMetadata.
  *
- * Returns aggregated session data per composer with per-session token totals
- * and message counts. Throws CursorParserError on unrecoverable failures
- * (missing DB, missing sqlite3 binary, malformed SQLite output).
+ * Returns null for malformed keys, unparseable JSON, or rows with no
+ * usable timestamp fields. composerData.createdAt in real Cursor data
+ * is a Unix-ms number, but the parser accepts either shape defensively.
+ *
+ * If lastUpdatedAt is earlier than createdAt (clock skew, data corruption),
+ * lastUpdatedAt is dropped rather than trusted, so downstream aggregation
+ * never produces endTime < startTime.
+ */
+function parseComposerData(row) {
+    const match = COMPOSER_KEY_REGEX.exec(row.key);
+    if (!match) {
+        return null;
+    }
+    const [, composerId] = match;
+    let value;
+    try {
+        value = JSON.parse(row.value);
+    }
+    catch {
+        return null;
+    }
+    if (typeof value !== "object" || value === null) {
+        return null;
+    }
+    const obj = value;
+    const createdAtMs = parseTimestampField(obj.createdAt);
+    const rawLastUpdatedAtMs = parseTimestampField(obj.lastUpdatedAt);
+    // Drop lastUpdatedAt if it violates the ordering invariant. We never want
+    // to produce a session where endTime < startTime because the source
+    // values were corrupt.
+    const lastUpdatedAtMs = createdAtMs !== null &&
+        rawLastUpdatedAtMs !== null &&
+        rawLastUpdatedAtMs < createdAtMs
+        ? null
+        : rawLastUpdatedAtMs;
+    if (createdAtMs === null && lastUpdatedAtMs === null) {
+        return null;
+    }
+    return {
+        composerId,
+        createdAtMs,
+        lastUpdatedAtMs,
+    };
+}
+/**
+ * Resolve per-session start/end times and provenance from the bubble and
+ * composer timestamp sources. This is the core of the PR2 fallback ladder:
+ *
+ *   - If any bubble in the composer has createdAtMs, use min/max of bubble
+ *     timestamps. Source = "bubble", quality = "precise". If composerData
+ *     provides a later lastUpdatedAt, prefer it for endTime and downgrade
+ *     the source to "mixed" (still "approximate" since we can't prove
+ *     those two sources describe the same timeline fidelity).
+ *   - Otherwise, if composerData has createdAtMs, use it for start and
+ *     (lastUpdatedAt ?? createdAtMs) for end. Source = "composer",
+ *     quality = "approximate".
+ *   - Otherwise, source = "none", quality = "none", startTime = endTime
+ *     = null. Callers should still emit the session — the tokens are real
+ *     even if the timing isn't.
+ */
+function resolveSessionTimestamps(bubbleCreatedAtsMs, composerMeta) {
+    const hasBubbleTimestamps = bubbleCreatedAtsMs.length > 0;
+    const composerCreatedAtMs = composerMeta?.createdAtMs ?? null;
+    const composerLastUpdatedAtMs = composerMeta?.lastUpdatedAtMs ?? null;
+    if (hasBubbleTimestamps) {
+        let startMs = bubbleCreatedAtsMs[0];
+        let endMs = bubbleCreatedAtsMs[0];
+        for (const ms of bubbleCreatedAtsMs) {
+            if (ms < startMs)
+                startMs = ms;
+            if (ms > endMs)
+                endMs = ms;
+        }
+        // If the composer's own lastUpdatedAt is AFTER our max bubble timestamp,
+        // prefer it — Cursor can persist the composer row when the session is
+        // closed, capturing activity that never produced a token-bearing bubble.
+        let mixed = false;
+        if (composerLastUpdatedAtMs !== null && composerLastUpdatedAtMs > endMs) {
+            endMs = composerLastUpdatedAtMs;
+            mixed = true;
+        }
+        return {
+            startTime: msToIso(startMs),
+            endTime: msToIso(endMs),
+            source: mixed ? "mixed" : "bubble",
+            quality: mixed ? "approximate" : "precise",
+        };
+    }
+    if (composerCreatedAtMs !== null) {
+        const endMs = composerLastUpdatedAtMs !== null && composerLastUpdatedAtMs >= composerCreatedAtMs
+            ? composerLastUpdatedAtMs
+            : composerCreatedAtMs;
+        return {
+            startTime: msToIso(composerCreatedAtMs),
+            endTime: msToIso(endMs),
+            source: "composer",
+            quality: "approximate",
+        };
+    }
+    return {
+        startTime: null,
+        endTime: null,
+        source: "none",
+        quality: "none",
+    };
+}
+/**
+ * Parse Cursor usage from local SQLite. Read-only — does NOT push anything
+ * to the costcanary backend.
+ *
+ * Returns aggregated session data per composer with per-session token totals,
+ * message counts, start/end timestamps, and daily usage buckets. Throws
+ * CursorParserError on unrecoverable failures (missing DB, missing sqlite3
+ * binary, malformed SQLite output).
  *
  * Dedup strategy: per composer, keep one entry per (serverBubbleId ?? bubbleId).
  * On collision, keep the candidate with the larger token total.
@@ -240,32 +429,40 @@ function parseBubble(row) {
  * the returned `model` field is "mixed". If no model info is present on any
  * bubble, the field is "unknown".
  *
- * Sort order: total tokens descending. NOT chronological — message timestamps
- * are not yet verified for Cursor.
+ * Sort order: total tokens descending.
  */
-export function parseCursorUsageDryRun() {
+export function parseCursorUsage() {
     const dbPath = getCursorDbPath();
     // Throws CursorParserError on missing DB / missing sqlite3 / query failure
-    const rows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'");
-    // Cursor splits model metadata and token usage across different bubble
-    // rows: model names typically live on user-prompt bubbles (type 1) with
-    // zero token counts, and token counts live on assistant-response bubbles
-    // (type 2) with no model info. We collect them separately and merge per
-    // composer.
+    const bubbleRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'");
+    const composerRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'");
+    // Cursor splits model metadata, token usage, and timestamps across
+    // different bubble rows. We collect them into separate per-composer
+    // structures so each signal is captured even when rows carry only one
+    // of them.
     //
     // - tokenBubblesByComposer: per-composer dedup map for bubbles that carry
     //   positive token counts. Dedup key is (serverBubbleId ?? bubbleId).
     //   Collision rule: keep the candidate with the larger token total.
     // - modelsByComposer: per-composer set of all distinct non-empty model
-    //   names found on ANY bubble row in the composer. No dedup needed since
-    //   models are categorical, not additive.
-    //
-    // Model harvesting is intentionally not gated on type or on token presence
-    // — if Cursor ever stores model names on assistant rows in a future
-    // schema, this code already supports it.
+    //   names found on ANY bubble row in the composer.
+    // - bubbleCreatedAtsByComposer: per-composer list of parsed bubble
+    //   createdAtMs values. Not deduped — we only need min/max, and duplicate
+    //   values are harmless for those aggregations.
+    // - composerMetaById: composerData row metadata, used as the fallback
+    //   source for per-session timestamps.
     const tokenBubblesByComposer = new Map();
     const modelsByComposer = new Map();
-    for (const row of rows) {
+    const bubbleCreatedAtsByComposer = new Map();
+    const composerMetaById = new Map();
+    for (const row of composerRows) {
+        const meta = parseComposerData(row);
+        if (!meta) {
+            continue;
+        }
+        composerMetaById.set(meta.composerId, meta);
+    }
+    for (const row of bubbleRows) {
         const bubble = parseBubble(row);
         if (!bubble) {
             continue;
@@ -278,6 +475,14 @@ export function parseCursorUsageDryRun() {
             }
             composerModels.add(bubble.modelName);
         }
+        if (bubble.createdAtMs !== null) {
+            let composerCreatedAts = bubbleCreatedAtsByComposer.get(bubble.composerId);
+            if (!composerCreatedAts) {
+                composerCreatedAts = [];
+                bubbleCreatedAtsByComposer.set(bubble.composerId, composerCreatedAts);
+            }
+            composerCreatedAts.push(bubble.createdAtMs);
+        }
         if (!hasTokenUsage(bubble)) {
             continue;
         }
@@ -298,17 +503,45 @@ export function parseCursorUsageDryRun() {
         }
         composerMap.set(dedupKey, bubble);
     }
-    // Aggregate per composer into the dry-run output shape.
+    // Aggregate per composer into the parser output shape.
     const sessions = [];
     for (const [composerId, composerMap] of tokenBubblesByComposer) {
         let inputTokens = 0;
         let outputTokens = 0;
         let messageCount = 0;
         const modelsSeen = modelsByComposer.get(composerId) ?? new Set();
+        const composerMeta = composerMetaById.get(composerId);
+        const bubbleCreatedAts = bubbleCreatedAtsByComposer.get(composerId) ?? [];
+        // Daily bucketing: for each token-bearing bubble, prefer its own
+        // createdAt; otherwise fall back to the composer's createdAt so the
+        // session still contributes to some day rather than silently
+        // dropping tokens from the daily view. We track whether any bucket
+        // used the composer fallback so the session-level dailyUsageSource
+        // reflects approximate day attribution.
+        const dailyUsage = {};
+        let anyBubbleFellBackToComposer = false;
         for (const bubble of composerMap.values()) {
             inputTokens += bubble.inputTokens;
             outputTokens += bubble.outputTokens;
             messageCount += 1;
+            let bucketMs = null;
+            if (bubble.createdAtMs !== null) {
+                bucketMs = bubble.createdAtMs;
+            }
+            else if (composerMeta?.createdAtMs != null) {
+                bucketMs = composerMeta.createdAtMs;
+                anyBubbleFellBackToComposer = true;
+            }
+            if (bucketMs !== null) {
+                const dateKey = msToUtcDateKey(bucketMs);
+                let bucket = dailyUsage[dateKey];
+                if (!bucket) {
+                    bucket = createEmptyTokenUsage();
+                    dailyUsage[dateKey] = bucket;
+                }
+                bucket.inputTokens += bubble.inputTokens;
+                bucket.outputTokens += bubble.outputTokens;
+            }
         }
         if (messageCount === 0) {
             continue;
@@ -323,22 +556,47 @@ export function parseCursorUsageDryRun() {
         else {
             model = "mixed";
         }
+        const timing = resolveSessionTimestamps(bubbleCreatedAts, composerMeta);
+        // dailyUsageSource classification:
+        //   "bubble"   — every bucket came from a bubble-level createdAt (precise)
+        //   "composer" — at least one bucket fell back to composer.createdAt,
+        //                so the whole per-day view is approximate. Any fallback
+        //                downgrades the entire session so downstream renderers
+        //                don't imply message-level precision we can't back up.
+        //   "none"     — no bucket had any timestamp source; dailyUsage is empty.
+        let dailyUsageSource;
+        if (Object.keys(dailyUsage).length === 0) {
+            dailyUsageSource = "none";
+        }
+        else if (anyBubbleFellBackToComposer) {
+            dailyUsageSource = "composer";
+        }
+        else {
+            dailyUsageSource = "bubble";
+        }
         sessions.push({
             sessionId: composerId,
-            workspaceHash: null, // Unverified in PR1 — set in PR2
-            workspaceName: null, // Unverified in PR1 — set in PR2
+            workspaceHash: null,
+            workspaceName: null,
             model,
             tokens: {
                 inputTokens,
                 outputTokens,
-                cacheCreationTokens: 0, // Cursor does not have prompt cache tokens
+                cacheCreationTokens: 0,
                 cacheReadTokens: 0,
             },
             messageCount,
             filePath: dbPath,
+            startTime: timing.startTime,
+            endTime: timing.endTime,
+            timestampSource: timing.source,
+            timestampQuality: timing.quality,
+            dailyUsage,
+            dailyUsageSource,
         });
     }
-    // Sort by total tokens descending. NOT recency — no verified timestamps.
+    // Sort by total tokens descending. Downstream surfaces can re-sort by
+    // startTime if chronological order matters.
     sessions.sort((a, b) => {
         const aTotal = a.tokens.inputTokens + a.tokens.outputTokens;
         const bTotal = b.tokens.inputTokens + b.tokens.outputTokens;
@@ -349,6 +607,281 @@ export function parseCursorUsageDryRun() {
         filePath: dbPath,
     };
 }
+/**
+ * Backward-compat alias. PR1 consumers called this function name; keep it
+ * working for one release after the rename.
+ */
+export const parseCursorUsageDryRun = parseCursorUsage;
+/**
+ * Truncate a UUID-ish identifier to 8 characters for safe display in
+ * transparency output. Real UUIDs become e.g. "399974f0" — enough for a
+ * human to distinguish keys at a glance, not enough to serve as a stable
+ * correlation handle if the output leaks.
+ */
+function truncateId(id) {
+    return id.length <= 8 ? id : id.slice(0, 8);
+}
+/**
+ * Return transparency metadata about the Cursor SQLite: file size, table
+ * list, key-prefix histogram, and a small sample of bubble and composer
+ * keys with their UUIDs truncated. Powers the `what_we_read` MCP mode so
+ * users can see exactly what data CostHawk is reading.
+ *
+ * Throws CursorParserError on missing DB, missing sqlite3, or query failure.
+ */
+export function getCursorMeta() {
+    const dbPath = getCursorDbPath();
+    if (!existsSync(dbPath)) {
+        const error = {
+            code: "CURSOR_DB_NOT_FOUND",
+            message: `Cursor SQLite database not found at ${dbPath}. Make sure Cursor is installed and you have used it at least once. Set COSTHAWK_CURSOR_DB_PATH to override.`,
+        };
+        throw error;
+    }
+    let dbFileSize = 0;
+    try {
+        dbFileSize = statSync(dbPath).size;
+    }
+    catch {
+        dbFileSize = 0;
+    }
+    const tableRows = runCursorQuery("SELECT name AS key, 'table' AS value FROM sqlite_master WHERE type='table' ORDER BY name");
+    const tables = tableRows.map((row) => row.key);
+    // Histogram of key prefixes in cursorDiskKV. The CASE expression mirrors
+    // the manual probe from Task #30 — substring up to the first colon, or
+    // the whole key if there is no colon. ORDER BY count(*) (not the TEXT
+    // cast of the count) so the ordering is numeric — otherwise "9" sorts
+    // above "184" lexicographically.
+    const prefixRows = runCursorQuery("SELECT CASE WHEN instr(key,':')>0 THEN substr(key,1,instr(key,':')-1) ELSE key END AS key, CAST(count(*) AS TEXT) AS value FROM cursorDiskKV GROUP BY 1 ORDER BY count(*) DESC");
+    const keyPrefixes = {};
+    for (const row of prefixRows) {
+        const count = Number.parseInt(row.value, 10);
+        if (Number.isFinite(count)) {
+            keyPrefixes[row.key] = count;
+        }
+    }
+    const bubbleSampleRows = runCursorQuery("SELECT key, '' AS value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%' LIMIT 5");
+    const composerSampleRows = runCursorQuery("SELECT key, '' AS value FROM cursorDiskKV WHERE key LIKE 'composerData:%' LIMIT 5");
+    const sampleBubbleKeys = bubbleSampleRows.map((row) => {
+        const match = BUBBLE_KEY_REGEX.exec(row.key);
+        if (!match)
+            return row.key;
+        const [, composerId, bubbleId] = match;
+        return `bubbleId:${truncateId(composerId)}:${truncateId(bubbleId)}`;
+    });
+    const sampleComposerKeys = composerSampleRows.map((row) => {
+        const match = COMPOSER_KEY_REGEX.exec(row.key);
+        if (!match)
+            return row.key;
+        const [, composerId] = match;
+        return `composerData:${truncateId(composerId)}`;
+    });
+    return {
+        filePath: dbPath,
+        dbFileSize,
+        tables,
+        keyPrefixes,
+        sampleBubbleKeys,
+        sampleComposerKeys,
+    };
+}
+/**
+ * Run a full parser health check against the live DB. Reports coverage
+ * numbers, validates invariants, and classifies the result as PASS,
+ * DEGRADED, or FAIL.
+ *
+ * - FAIL is reserved for unrecoverable failures (DB missing, sqlite3
+ *   missing, query error). The MCP tool surfaces FAIL as isError:true.
+ * - DEGRADED means the parser ran but flagged warnings — e.g., invariant
+ *   tolerance exceeded, partial timestamp coverage, unexpected row shapes.
+ * - PASS means the parser ran cleanly with full coverage and no warnings.
+ *
+ * Never throws — catches errors and reports them as FAIL so callers can
+ * present the full structured payload to users.
+ */
+export function runCursorSelfTest() {
+    const dbPath = getCursorDbPath();
+    const sqlite3Path = getSqlite3Path();
+    const errors = [];
+    const warnings = [];
+    const invariantChecks = [];
+    const result = {
+        filePath: dbPath,
+        dbExists: existsSync(dbPath),
+        sqlite3Path,
+        canQuery: false,
+        tokenBubbleCount: 0,
+        composerCount: 0,
+        sessionsWithTokens: 0,
+        timestampCoverage: {
+            bubblesWithCreatedAt: 0,
+            totalBubbles: 0,
+            composersWithCreatedAt: 0,
+            totalComposers: 0,
+        },
+        invariantChecks,
+        warnings,
+        errors,
+        overallStatus: "FAIL",
+    };
+    if (!result.dbExists) {
+        errors.push(`Cursor SQLite database not found at ${dbPath}. Set COSTHAWK_CURSOR_DB_PATH to override.`);
+        return result;
+    }
+    let bubbleRows;
+    let composerRows;
+    try {
+        bubbleRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'");
+        composerRows = runCursorQuery("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'");
+        result.canQuery = true;
+    }
+    catch (err) {
+        const code = isCursorParserError(err) ? err.code : "UNKNOWN";
+        const message = err instanceof Error
+            ? err.message
+            : typeof err === "object" && err !== null && "message" in err
+                ? String(err.message)
+                : "Unknown error";
+        errors.push(`[${code}] ${message}`);
+        return result;
+    }
+    result.timestampCoverage.totalBubbles = bubbleRows.length;
+    result.timestampCoverage.totalComposers = composerRows.length;
+    // Count bubbles with a usable createdAt timestamp (string or number).
+    // This mirrors parseBubble's `createdAtMs` logic so the reported
+    // coverage matches what the parser will actually use.
+    for (const row of bubbleRows) {
+        let obj;
+        try {
+            obj = JSON.parse(row.value);
+        }
+        catch {
+            continue;
+        }
+        if (typeof obj !== "object" || obj === null)
+            continue;
+        const record = obj;
+        if (parseTimestampField(record.createdAt) !== null) {
+            result.timestampCoverage.bubblesWithCreatedAt += 1;
+        }
+    }
+    for (const row of composerRows) {
+        const meta = parseComposerData(row);
+        if (meta && meta.createdAtMs !== null) {
+            result.timestampCoverage.composersWithCreatedAt += 1;
+        }
+    }
+    // Invariant 1: the parser runs without throwing.
+    let parserResult = null;
+    try {
+        parserResult = parseCursorUsage();
+        invariantChecks.push({ name: "parser_runs", passed: true });
+    }
+    catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        invariantChecks.push({
+            name: "parser_runs",
+            passed: false,
+            details: message,
+        });
+        errors.push(`Parser threw: ${message}`);
+        return result;
+    }
+    result.sessionsWithTokens = parserResult.sessions.length;
+    result.tokenBubbleCount = parserResult.sessions.reduce((acc, s) => acc + s.messageCount, 0);
+    result.composerCount = composerRows.length;
+    // Invariant 2: every session has a non-null timestampSource classification.
+    const sessionsWithoutTiming = parserResult.sessions.filter((s) => s.timestampSource === "none");
+    if (sessionsWithoutTiming.length > 0) {
+        invariantChecks.push({
+            name: "all_sessions_have_timestamp_source",
+            passed: false,
+            details: `${sessionsWithoutTiming.length} sessions have timestampSource="none"`,
+        });
+        warnings.push(`${sessionsWithoutTiming.length}/${parserResult.sessions.length} sessions have no parseable timestamp source. They will appear with null startTime/endTime in usage output.`);
+    }
+    else {
+        invariantChecks.push({
+            name: "all_sessions_have_timestamp_source",
+            passed: true,
+        });
+    }
+    // Invariant 3: for every session that resolved start AND end, start <= end.
+    const ordering = parserResult.sessions.filter((s) => s.startTime !== null && s.endTime !== null);
+    const badOrdering = ordering.filter((s) => (s.startTime !== null ? Date.parse(s.startTime) : 0) >
+        (s.endTime !== null ? Date.parse(s.endTime) : 0));
+    if (badOrdering.length > 0) {
+        invariantChecks.push({
+            name: "start_time_le_end_time",
+            passed: false,
+            details: `${badOrdering.length} sessions violate start <= end`,
+        });
+        warnings.push(`${badOrdering.length} sessions have startTime > endTime after resolution. This is a parser bug — please report.`);
+    }
+    else {
+        invariantChecks.push({
+            name: "start_time_le_end_time",
+            passed: true,
+        });
+    }
+    // Invariant 4: for composers where both bubble and composer timestamps
+    // exist, min(bubble.createdAt) should be within tolerance of composer
+    // createdAt. Violations suggest schema drift or corrupt timing data.
+    //
+    // We re-derive the per-composer minimum bubble createdAtMs from
+    // bubbleRows directly rather than re-running the parser, so the check
+    // stays independent of any changes to the main aggregation logic.
+    const minBubbleCreatedAtByComposer = new Map();
+    for (const row of bubbleRows) {
+        const bubble = parseBubble(row);
+        if (!bubble || bubble.createdAtMs === null)
+            continue;
+        const prior = minBubbleCreatedAtByComposer.get(bubble.composerId);
+        if (prior === undefined || bubble.createdAtMs < prior) {
+            minBubbleCreatedAtByComposer.set(bubble.composerId, bubble.createdAtMs);
+        }
+    }
+    const composerMetaByIdForCheck = new Map();
+    for (const row of composerRows) {
+        const meta = parseComposerData(row);
+        if (meta)
+            composerMetaByIdForCheck.set(meta.composerId, meta);
+    }
+    let skewWarnings = 0;
+    for (const [composerId, minBubbleMs] of minBubbleCreatedAtByComposer) {
+        const meta = composerMetaByIdForCheck.get(composerId);
+        if (!meta || meta.createdAtMs === null)
+            continue;
+        const skew = meta.createdAtMs - minBubbleMs;
+        if (skew > INVARIANT_SKEW_TOLERANCE_MS) {
+            skewWarnings += 1;
+        }
+    }
+    if (skewWarnings > 0) {
+        invariantChecks.push({
+            name: "bubble_composer_createdat_skew",
+            passed: false,
+            details: `${skewWarnings} composers where min(bubble.createdAt) is more than ${INVARIANT_SKEW_TOLERANCE_MS / 1000}s before composerData.createdAt`,
+        });
+        warnings.push(`${skewWarnings} composers show unexpected clock skew between bubble and composer timestamps. Values are still usable but may indicate schema drift.`);
+    }
+    else {
+        invariantChecks.push({
+            name: "bubble_composer_createdat_skew",
+            passed: true,
+        });
+    }
+    if (errors.length > 0) {
+        result.overallStatus = "FAIL";
+    }
+    else if (warnings.length > 0) {
+        result.overallStatus = "DEGRADED";
+    }
+    else {
+        result.overallStatus = "PASS";
+    }
+    return result;
+}
 // Re-export the type guard so the MCP tool registration in index.ts can
 // distinguish CursorParserError from generic Error in its catch block.
 export { isCursorParserError };