npm - context-mode - Versions diffs - 1.0.165 → 1.0.167 - Mend

context-mode 1.0.165 → 1.0.167

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.codex-plugin/plugin.json +1 -1
package/.openclaw-plugin/openclaw.plugin.json +1 -1
package/.openclaw-plugin/package.json +1 -1
package/README.md +6 -4
package/build/adapters/codex/usage.d.ts +107 -0
package/build/adapters/codex/usage.js +227 -0
package/build/adapters/gemini-cli/hooks.d.ts +7 -1
package/build/adapters/gemini-cli/hooks.js +9 -1
package/build/adapters/gemini-cli/index.js +11 -0
package/build/adapters/kimi/paths.d.ts +20 -0
package/build/adapters/kimi/paths.js +41 -1
package/build/adapters/kimi/usage.d.ts +82 -0
package/build/adapters/kimi/usage.js +217 -0
package/build/adapters/omp/plugin.d.ts +6 -0
package/build/adapters/omp/plugin.js +87 -2
package/build/adapters/omp/usage.d.ts +49 -0
package/build/adapters/omp/usage.js +110 -0
package/build/adapters/openclaw/plugin.d.ts +10 -0
package/build/adapters/openclaw/plugin.js +57 -0
package/build/adapters/openclaw/usage.d.ts +34 -0
package/build/adapters/openclaw/usage.js +52 -0
package/build/adapters/opencode/plugin.d.ts +17 -0
package/build/adapters/opencode/plugin.js +40 -1
package/build/adapters/pi/extension.js +61 -10
package/build/adapters/pi/mcp-bridge.d.ts +78 -1
package/build/adapters/pi/mcp-bridge.js +105 -17
package/build/adapters/qwen-code/index.js +23 -1
package/build/adapters/qwen-code/usage.d.ts +90 -0
package/build/adapters/qwen-code/usage.js +222 -0
package/build/lifecycle.d.ts +10 -0
package/build/lifecycle.js +16 -1
package/build/session/db.d.ts +11 -0
package/build/session/db.js +33 -0
package/build/session/extract.d.ts +208 -0
package/build/session/extract.js +670 -43
package/build/session/model-prices.json +429 -0
package/build/session/pricing.d.ts +64 -0
package/build/session/pricing.js +151 -0
package/cli.bundle.mjs +84 -84
package/configs/antigravity-cli/plugin.json +1 -1
package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
package/configs/gemini-cli/settings.json +11 -0
package/hooks/codex/stop.mjs +91 -4
package/hooks/gemini-cli/aftermodel.mjs +70 -0
package/hooks/kimi/stop.mjs +74 -3
package/hooks/qwen-code/platform.mjs +1 -0
package/hooks/qwen-code/stop.mjs +168 -0
package/hooks/session-db.bundle.mjs +7 -7
package/hooks/session-extract.bundle.mjs +3 -2
package/hooks/session-loaders.mjs +9 -1
package/hooks/stop.mjs +35 -2
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/server.bundle.mjs +107 -107

package/build/adapters/qwen-code/usage.d.ts ADDED Viewed

@@ -0,0 +1,90 @@
+/**
+ * adapters/qwen-code/usage — per-turn token capture from the session JSONL.
+ *
+ * Qwen Code is a Gemini-CLI fork and normalizes EVERY backend (Gemini-native,
+ * OpenAI-compat/DashScope, Anthropic) to the same canonical token shape:
+ * `GenerateContentResponseUsageMetadata` { promptTokenCount, candidatesTokenCount,
+ * cachedContentTokenCount, thoughtsTokenCount, totalTokenCount }
+ * (matrix §1: turn.ts:96,417 + converter.ts:1145-1148). That metadata is
+ * persisted, per API call, into the session record file as a `ChatRecord`
+ * carrying `.usageMetadata` + `.model`
+ * (refs: packages/core/src/services/chatRecordingService.ts:259,261,919 file at
+ * ~/.qwen/tmp/<project_id>/chats/<sessionId>.jsonl — :451 location comment,
+ * :600,628-629 path build).
+ *
+ * CRITICAL (matrix §4): qwen-code's hook payloads carry tool I/O ONLY — token
+ * usage is unreachable through the hook stream (grep of hookEventHandler.ts /
+ * hookSystem.ts / toolHookTriggers.ts for token|usageMetadata|usage → zero
+ * matches). The ONLY live capture path is a tail of the session JSONL. This
+ * module is therefore the JSONL-tail counterpart to claude-code's
+ * `extractTranscriptUsageSince` (src/session/extract.ts) — same cursor-gated,
+ * char-algorithmic, NO-regex parse, same `buildAgentUsageEvent` emission path.
+ *
+ * Per matrix §3 each ChatRecord.usageMetadata is INCREMENTAL per API call
+ * (cumulative session totals are derived downstream via += in
+ * uiTelemetry.ts:237-241), so summing the NEW records since the cursor yields
+ * the exact billed delta with no double-count.
+ *
+ * No native USD — cost_usd is derived from the pricing catalog inside
+ * buildAgentUsageEvent (native_cost_usd omitted). Pure, null-safe, NO regex.
+ */
+import { type AgentUsageCounts, type SessionEvent } from "../../session/extract.js";
+/**
+ * Parse ONE qwen `ChatRecord` into the `buildAgentUsageEvent` input shape, or
+ * null when the record carries no usage / sums to zero.
+ *
+ * Mapping → builder shape (AgentUsageCounts):
+ *   promptTokenCount        → input_tokens
+ *   candidatesTokenCount    → output_tokens
+ *   thoughtsTokenCount      → ADDED into output_tokens (Gemini-lineage bills
+ *                             reasoning/thoughts as output — same fold as
+ *                             parseGeminiUsage in src/session/extract.ts)
+ *   cachedContentTokenCount → cache_read_tokens (when present)
+ *   model_id                → ChatRecord.model
+ *
+ * No native cost — native_cost_usd omitted (catalog-derived). NO regex.
+ */
+export declare function parseQwenUsage(record: unknown): AgentUsageCounts | null;
+/**
+ * Cursor-aware tail of the qwen session JSONL. Emits one priced `agent_usage`
+ * event PER distinct model across the records NEW since `cursor`, so re-reading
+ * the (append-only, ever-growing) JSONL each Stop never double-counts.
+ *
+ *   - cursor null/empty            → process ALL records.
+ *   - cursor found                 → process records STRICTLY AFTER it.
+ *   - cursor set but NOT found     → compaction/rotation dropped it: bounded
+ *     fallback processes ONLY THE LAST record (never re-emit full history).
+ *
+ * `cursor` returns the id of the LAST id-bearing record seen (whether or not it
+ * carried usage), so the next call resumes exactly past it. When no record
+ * carries an id, the input cursor is returned unchanged.
+ *
+ * One linear walk, JSON.parse per line, NO regex — mirrors
+ * extractTranscriptUsageSince's structure exactly.
+ */
+export declare function extractQwenUsageSince(jsonlText: string, cursor: string | null): {
+    events: SessionEvent[];
+    cursor: string | null;
+};
+/**
+ * Hash a project root into qwen-code's `<project_id>` directory segment.
+ *
+ * EXACT port of qwen's `getProjectHash`
+ * (refs/platforms/qwen-code/packages/core/src/utils/paths.ts:262 —
+ * `crypto.createHash('sha256').update(normalizedPath).digest('hex')`). On
+ * Windows qwen lowercases the path first (case-insensitive FS); we mirror that
+ * so a hook running on win32 resolves the same tmp dir qwen itself wrote.
+ * Pure, deterministic, NO regex.
+ */
+export declare function qwenProjectHash(projectRoot: string): string;
+/**
+ * Build the canonical session JSONL path qwen-code writes its ChatRecords to:
+ *   <qwenHome>/tmp/<sha256(projectRoot)>/chats/<sessionId>.jsonl
+ * (refs chatRecordingService.ts:451 location + storage.ts:316-320
+ * getProjectTempDir → getGlobalTempDir(<qwenHome>/tmp) + getProjectHash).
+ *
+ * `qwenHome` is normally `<homedir>/.qwen`. Pure path join — does NOT touch the
+ * FS, so it is fully unit-testable; existence probing + the glob fallback live
+ * in the Stop hook (which cannot import this TS at runtime). NO regex.
+ */
+export declare function qwenChatJsonlPath(qwenHome: string, projectRoot: string, sessionId: string): string;

package/build/adapters/qwen-code/usage.js ADDED Viewed

@@ -0,0 +1,222 @@
+/**
+ * adapters/qwen-code/usage — per-turn token capture from the session JSONL.
+ *
+ * Qwen Code is a Gemini-CLI fork and normalizes EVERY backend (Gemini-native,
+ * OpenAI-compat/DashScope, Anthropic) to the same canonical token shape:
+ * `GenerateContentResponseUsageMetadata` { promptTokenCount, candidatesTokenCount,
+ * cachedContentTokenCount, thoughtsTokenCount, totalTokenCount }
+ * (matrix §1: turn.ts:96,417 + converter.ts:1145-1148). That metadata is
+ * persisted, per API call, into the session record file as a `ChatRecord`
+ * carrying `.usageMetadata` + `.model`
+ * (refs: packages/core/src/services/chatRecordingService.ts:259,261,919 file at
+ * ~/.qwen/tmp/<project_id>/chats/<sessionId>.jsonl — :451 location comment,
+ * :600,628-629 path build).
+ *
+ * CRITICAL (matrix §4): qwen-code's hook payloads carry tool I/O ONLY — token
+ * usage is unreachable through the hook stream (grep of hookEventHandler.ts /
+ * hookSystem.ts / toolHookTriggers.ts for token|usageMetadata|usage → zero
+ * matches). The ONLY live capture path is a tail of the session JSONL. This
+ * module is therefore the JSONL-tail counterpart to claude-code's
+ * `extractTranscriptUsageSince` (src/session/extract.ts) — same cursor-gated,
+ * char-algorithmic, NO-regex parse, same `buildAgentUsageEvent` emission path.
+ *
+ * Per matrix §3 each ChatRecord.usageMetadata is INCREMENTAL per API call
+ * (cumulative session totals are derived downstream via += in
+ * uiTelemetry.ts:237-241), so summing the NEW records since the cursor yields
+ * the exact billed delta with no double-count.
+ *
+ * No native USD — cost_usd is derived from the pricing catalog inside
+ * buildAgentUsageEvent (native_cost_usd omitted). Pure, null-safe, NO regex.
+ */
+import { createHash } from "node:crypto";
+import { join } from "node:path";
+import { platform } from "node:os";
+import { buildAgentUsageEvent } from "../../session/extract.js";
+/** Floor-and-clamp a token field to a non-negative integer (mirrors omp/usage). */
+function tokenNum(v) {
+    if (typeof v !== "number" || !Number.isFinite(v))
+        return 0;
+    const n = Math.floor(v);
+    return n > 0 ? n : 0;
+}
+/**
+ * Parse ONE qwen `ChatRecord` into the `buildAgentUsageEvent` input shape, or
+ * null when the record carries no usage / sums to zero.
+ *
+ * Mapping → builder shape (AgentUsageCounts):
+ *   promptTokenCount        → input_tokens
+ *   candidatesTokenCount    → output_tokens
+ *   thoughtsTokenCount      → ADDED into output_tokens (Gemini-lineage bills
+ *                             reasoning/thoughts as output — same fold as
+ *                             parseGeminiUsage in src/session/extract.ts)
+ *   cachedContentTokenCount → cache_read_tokens (when present)
+ *   model_id                → ChatRecord.model
+ *
+ * No native cost — native_cost_usd omitted (catalog-derived). NO regex.
+ */
+export function parseQwenUsage(record) {
+    if (!record || typeof record !== "object" || Array.isArray(record))
+        return null;
+    const rec = record;
+    const um = rec.usageMetadata;
+    if (!um || typeof um !== "object")
+        return null;
+    const usage = um;
+    const input = tokenNum(usage.promptTokenCount);
+    const candidates = tokenNum(usage.candidatesTokenCount);
+    const thoughts = tokenNum(usage.thoughtsTokenCount);
+    const cached = tokenNum(usage.cachedContentTokenCount);
+    // Gemini-lineage bills reasoning (thoughts) as output tokens — fold into output.
+    const output = candidates + thoughts;
+    // All token fields zero → not a billable record. buildAgentUsageEvent would
+    // also reject this, but short-circuit keeps the contract explicit.
+    if (input <= 0 && output <= 0 && cached <= 0)
+        return null;
+    const model_id = typeof rec.model === "string" ? rec.model : "";
+    return {
+        model_id,
+        input_tokens: input,
+        output_tokens: output,
+        cache_creation_tokens: 0, // qwen exposes no cache-creation field
+        cache_read_tokens: cached,
+        native_cost_usd: null, // catalog-derived (no native cost on qwen records)
+    };
+}
+/** Stable cursor identity for a ChatRecord: prefer `id`, fall back to `messageId`. */
+function recordId(rec) {
+    if (typeof rec.id === "string" && rec.id.length > 0)
+        return rec.id;
+    if (typeof rec.messageId === "string" && rec.messageId.length > 0)
+        return rec.messageId;
+    return null;
+}
+/**
+ * Cursor-aware tail of the qwen session JSONL. Emits one priced `agent_usage`
+ * event PER distinct model across the records NEW since `cursor`, so re-reading
+ * the (append-only, ever-growing) JSONL each Stop never double-counts.
+ *
+ *   - cursor null/empty            → process ALL records.
+ *   - cursor found                 → process records STRICTLY AFTER it.
+ *   - cursor set but NOT found     → compaction/rotation dropped it: bounded
+ *     fallback processes ONLY THE LAST record (never re-emit full history).
+ *
+ * `cursor` returns the id of the LAST id-bearing record seen (whether or not it
+ * carried usage), so the next call resumes exactly past it. When no record
+ * carries an id, the input cursor is returned unchanged.
+ *
+ * One linear walk, JSON.parse per line, NO regex — mirrors
+ * extractTranscriptUsageSince's structure exactly.
+ */
+export function extractQwenUsageSince(jsonlText, cursor) {
+    const inputCursor = typeof cursor === "string" && cursor.length > 0 ? cursor : null;
+    if (typeof jsonlText !== "string" || jsonlText.length === 0) {
+        return { events: [], cursor: inputCursor };
+    }
+    const rows = [];
+    let start = 0;
+    for (let i = 0; i <= jsonlText.length; i++) {
+        if (i !== jsonlText.length && jsonlText.charCodeAt(i) !== 10 /* \n */)
+            continue;
+        const line = jsonlText.slice(start, i).trim();
+        start = i + 1;
+        if (line.length === 0)
+            continue;
+        let obj;
+        try {
+            const p = JSON.parse(line);
+            if (!p || typeof p !== "object" || Array.isArray(p))
+                continue;
+            obj = p;
+        }
+        catch {
+            continue;
+        }
+        rows.push({ id: recordId(obj), counts: parseQwenUsage(obj) });
+    }
+    if (rows.length === 0)
+        return { events: [], cursor: inputCursor };
+    // Cursor always advances to the last id-bearing record's id (or stays as the
+    // input cursor when no record carries an id).
+    let lastId = inputCursor;
+    for (let i = rows.length - 1; i >= 0; i--) {
+        if (rows[i].id !== null) {
+            lastId = rows[i].id;
+            break;
+        }
+    }
+    // Select the slice to sum.
+    let slice;
+    if (inputCursor === null) {
+        slice = rows; // all records
+    }
+    else {
+        let foundAt = -1;
+        for (let i = 0; i < rows.length; i++) {
+            if (rows[i].id === inputCursor) {
+                foundAt = i;
+                break;
+            }
+        }
+        if (foundAt >= 0) {
+            slice = rows.slice(foundAt + 1); // strictly after the cursor
+        }
+        else {
+            // Compaction/rotation: cursor fell off the front. Bounded fallback — last
+            // record only. Never re-emit the whole history.
+            slice = rows.slice(rows.length - 1);
+        }
+    }
+    // Sum the selected records per model, then emit via the shared builder.
+    const sums = new Map();
+    for (const row of slice) {
+        const c = row.counts;
+        if (!c)
+            continue;
+        const cur = sums.get(c.model_id) ?? { input: 0, output: 0, cacheCreate: 0, cacheRead: 0 };
+        cur.input += c.input_tokens;
+        cur.output += c.output_tokens;
+        cur.cacheCreate += c.cache_creation_tokens;
+        cur.cacheRead += c.cache_read_tokens;
+        sums.set(c.model_id, cur);
+    }
+    const events = [];
+    for (const [model_id, s] of sums) {
+        const ev = buildAgentUsageEvent({
+            model_id,
+            input_tokens: s.input,
+            output_tokens: s.output,
+            cache_creation_tokens: s.cacheCreate,
+            cache_read_tokens: s.cacheRead,
+        });
+        if (ev)
+            events.push(ev);
+    }
+    return { events, cursor: lastId };
+}
+/**
+ * Hash a project root into qwen-code's `<project_id>` directory segment.
+ *
+ * EXACT port of qwen's `getProjectHash`
+ * (refs/platforms/qwen-code/packages/core/src/utils/paths.ts:262 —
+ * `crypto.createHash('sha256').update(normalizedPath).digest('hex')`). On
+ * Windows qwen lowercases the path first (case-insensitive FS); we mirror that
+ * so a hook running on win32 resolves the same tmp dir qwen itself wrote.
+ * Pure, deterministic, NO regex.
+ */
+export function qwenProjectHash(projectRoot) {
+    const normalized = platform() === "win32" ? projectRoot.toLowerCase() : projectRoot;
+    return createHash("sha256").update(normalized).digest("hex");
+}
+/**
+ * Build the canonical session JSONL path qwen-code writes its ChatRecords to:
+ *   <qwenHome>/tmp/<sha256(projectRoot)>/chats/<sessionId>.jsonl
+ * (refs chatRecordingService.ts:451 location + storage.ts:316-320
+ * getProjectTempDir → getGlobalTempDir(<qwenHome>/tmp) + getProjectHash).
+ *
+ * `qwenHome` is normally `<homedir>/.qwen`. Pure path join — does NOT touch the
+ * FS, so it is fully unit-testable; existence probing + the glob fallback live
+ * in the Stop hook (which cannot import this TS at runtime). NO regex.
+ */
+export function qwenChatJsonlPath(qwenHome, projectRoot, sessionId) {
+    return join(qwenHome, "tmp", qwenProjectHash(projectRoot), "chats", `${sessionId}.jsonl`);
+}

package/build/lifecycle.d.ts CHANGED Viewed

@@ -84,6 +84,16 @@ export declare function lifecycleGuardIntervalForEnv(env?: NodeJS.ProcessEnv): n
  * Exported for unit-testing.
  */
 export declare function bridgeChildIdleTimeoutMs(env?: NodeJS.ProcessEnv): number;
+/**
+ * #854 / #868: human-readable notice emitted when an idle bridge child is
+ * released. DX-tuned — human units (seconds, not raw ms), reassures that the
+ * helper reconnects automatically (it respawns on the next ctx_* call, #583),
+ * and drops the alarming "self-shutdown" jargon. Pure + exported so the wording
+ * is pinned by a test and stays grep-friendly via the #854 tag. Note: after the
+ * #868 fix this fires ONLY for sub-context / non-interactive children — the
+ * foreground interactive session's child runs with the reaper disabled.
+ */
+export declare function idleReapMessage(idleMs: number): string;
 /**
  * #854: record MCP activity (inbound message or response). The server calls this
  * so the bridge-child idle reaper in {@link startLifecycleGuard} can distinguish

package/build/lifecycle.js CHANGED Viewed

@@ -123,6 +123,19 @@ export function bridgeChildIdleTimeoutMs(env = process.env) {
     }
     return 180_000;
 }
+/**
+ * #854 / #868: human-readable notice emitted when an idle bridge child is
+ * released. DX-tuned — human units (seconds, not raw ms), reassures that the
+ * helper reconnects automatically (it respawns on the next ctx_* call, #583),
+ * and drops the alarming "self-shutdown" jargon. Pure + exported so the wording
+ * is pinned by a test and stays grep-friendly via the #854 tag. Note: after the
+ * #868 fix this fires ONLY for sub-context / non-interactive children — the
+ * foreground interactive session's child runs with the reaper disabled.
+ */
+export function idleReapMessage(idleMs) {
+    const seconds = Math.round(idleMs / 1000);
+    return `[context-mode] Released an idle MCP helper after ${seconds}s of inactivity to free memory; it reconnects automatically on next use. (#854)`;
+}
 // #854 idle-reaper state, module-level by design: an MCP server is exactly one
 // process (one StdioServerTransport + one lifecycle guard), so these are never
 // shared across concurrent servers in production. Multiple startLifecycleGuard()
@@ -245,7 +258,9 @@ export function startLifecycleGuard(opts) {
             // further messages (#643 unbounded calls) — the false-reap regression the
             // adversarial review flagged.
             if (_inFlight === 0 && Date.now() - _lastMcpActivity >= idleMs) {
-                process.stderr.write(`[context-mode] idle MCP bridge child self-shutdown after ${idleMs}ms with no activity (#854)\n`);
+                // Child's own stderr — the pi bridge forwards it to pi.logger, never the
+                // TUI terminal (#868). DX-tuned wording via idleReapMessage.
+                process.stderr.write(idleReapMessage(idleMs) + "\n");
                 shutdown();
             }
         }, Math.max(1000, Math.min(Math.floor(idleMs / 4), 30_000)));

package/build/session/db.d.ts CHANGED Viewed

@@ -380,6 +380,17 @@ export declare class SessionDB extends SQLiteBase {
      * Increment the compact_count for a session (tracks snapshot rebuilds).
      */
     incrementCompactCount(sessionId: string): void;
+    /**
+     * Read the per-session usage high-water cursor — the uuid of the last
+     * assistant turn already emitted by the Stop hook's main-turn capture.
+     * Returns null when unset (first Stop) or the session row is absent.
+     */
+    getUsageCursor(sessionId: string): string | null;
+    /**
+     * Advance the per-session usage high-water cursor to `uuid`. No-op when the
+     * session_meta row does not exist yet (callers ensureSession first).
+     */
+    setUsageCursor(sessionId: string, uuid: string): void;
     /**
      * Upsert a resume snapshot for a session. Resets consumed flag on update.
      */

package/build/session/db.js CHANGED Viewed

@@ -481,6 +481,8 @@ const S = {
     getMaxFileEdits: "getMaxFileEdits",
     getLatestCommitMessage: "getLatestCommitMessage",
     incrementCompactCount: "incrementCompactCount",
+    getUsageCursor: "getUsageCursor",
+    setUsageCursor: "setUsageCursor",
     upsertResume: "upsertResume",
     getResume: "getResume",
     markResumeConsumed: "markResumeConsumed",
@@ -662,6 +664,19 @@ export class SessionDB extends SQLiteBase {
         catch {
             // best-effort migration only
         }
+        // Migration: per-session usage high-water cursor for the Stop hook's
+        // cursor-aware main-turn capture (extractTranscriptUsageSince). Stores the
+        // uuid of the last assistant turn already emitted so the next Stop forwards
+        // only NEW spend. Idempotent — guarded by a table_xinfo column check.
+        try {
+            const metaCols = this.db.pragma("table_xinfo(session_meta)");
+            if (!metaCols.some((c) => c.name === "usage_cursor")) {
+                this.db.exec("ALTER TABLE session_meta ADD COLUMN usage_cursor TEXT");
+            }
+        }
+        catch {
+            // best-effort migration only
+        }
     }
     prepareStatements() {
         this.stmts = new Map();
@@ -759,6 +774,8 @@ export class SessionDB extends SQLiteBase {
        ORDER BY id DESC
        LIMIT 1`);
         p(S.incrementCompactCount, `UPDATE session_meta SET compact_count = compact_count + 1 WHERE session_id = ?`);
+        p(S.getUsageCursor, `SELECT usage_cursor FROM session_meta WHERE session_id = ?`);
+        p(S.setUsageCursor, `UPDATE session_meta SET usage_cursor = ? WHERE session_id = ?`);
         // ── Resume ──
         p(S.upsertResume, `INSERT INTO session_resume (session_id, snapshot, event_count)
        VALUES (?, ?, ?)
@@ -1127,6 +1144,22 @@ export class SessionDB extends SQLiteBase {
     incrementCompactCount(sessionId) {
         this.stmt(S.incrementCompactCount).run(sessionId);
     }
+    /**
+     * Read the per-session usage high-water cursor — the uuid of the last
+     * assistant turn already emitted by the Stop hook's main-turn capture.
+     * Returns null when unset (first Stop) or the session row is absent.
+     */
+    getUsageCursor(sessionId) {
+        const row = this.stmt(S.getUsageCursor).get(sessionId);
+        return row?.usage_cursor ?? null;
+    }
+    /**
+     * Advance the per-session usage high-water cursor to `uuid`. No-op when the
+     * session_meta row does not exist yet (callers ensureSession first).
+     */
+    setUsageCursor(sessionId, uuid) {
+        this.stmt(S.setUsageCursor).run(uuid, sessionId);
+    }
     // ═══════════════════════════════════════════
     // Resume
     // ═══════════════════════════════════════════

package/build/session/extract.d.ts CHANGED Viewed

@@ -22,6 +22,19 @@ export interface SessionEvent {
      * `Fetched and indexed N sections (XKB)` preamble.
      */
     bytes_avoided?: number;
+    /**
+     * Optional structured cost/usage fields (Wave 2b). Emitted by
+     * extractAgentUsage alongside the colon-string `data` so the forward
+     * envelope can spread them to the platform as typed columns instead of an
+     * opaque blob. Present only when the source signal is present; cost_usd is
+     * omitted on a price miss or a zero-token turn.
+     */
+    model_id?: string;
+    input_tokens?: number;
+    output_tokens?: number;
+    cache_read_tokens?: number;
+    cache_creation_tokens?: number;
+    cost_usd?: number;
 }
 export interface ToolCall {
     toolName: string;
@@ -43,6 +56,201 @@ export interface HookInput {
         is_error?: boolean;
     };
 }
+/** Input shape `buildAgentUsageEvent` consumes — re-exported for parser typing. */
+export interface AgentUsageCounts {
+    model_id: string;
+    input_tokens: number;
+    output_tokens: number;
+    cache_creation_tokens: number;
+    cache_read_tokens: number;
+    native_cost_usd?: number | null;
+}
+export { parseKimiUsage, extractKimiUsageSince } from "../adapters/kimi/usage.js";
+export { parseQwenUsage, extractQwenUsageSince } from "../adapters/qwen-code/usage.js";
+/**
+ * Pi (oh-my-pi) per-turn usage parser.
+ *
+ * Maps a Pi `turn_end` payload (`{ message: AssistantMessage }`) to the
+ * `buildAgentUsageEvent` input shape, or null when there is nothing to record.
+ *
+ * Field provenance (adapter-matrix/pi.md @320261f + cited refs):
+ *   - usage:        AssistantMessage.usage          (ai/src/types.ts:521 -> catalog/src/types.ts:100-145)
+ *   - model_id:     AssistantMessage.model          (ai/src/types.ts:510; kept "provider/model" — builder normalizes)
+ *   - input:        Usage.input                     -> input_tokens
+ *   - output:       Usage.output                    -> output_tokens
+ *   - cacheWrite:   Usage.cacheWrite                -> cache_creation_tokens
+ *   - cacheRead:    Usage.cacheRead                 -> cache_read_tokens
+ *   - native USD:   Usage.cost.total                -> native_cost_usd (HIGH confidence; no price-table needed)
+ *
+ * The event is per-turn incremental (per-response usage; anthropic.ts:1893-1901;
+ * "for the turn" catalog/types.ts:103), so each turn_end maps to exactly one
+ * agent_usage event with no cross-turn accumulation.
+ *
+ * Algorithmic + null-safe, NO regex. Accepts either the full TurnEndEvent
+ * (`{ message }`) or a bare AssistantMessage (`{ usage, model }`) so callers
+ * can pass `event` or `event.message` interchangeably. Returns null when the
+ * payload is not an assistant message, carries no usage object, or every token
+ * bucket is zero/absent (an all-zero turn emits no event — matches
+ * buildAgentUsageEvent's own zero->null contract).
+ */
+export declare function parsePiUsage(payload: unknown): AgentUsageCounts | null;
+/**
+ * openclaw `model.usage` diagnostic-event capture — parseOpenclawUsage.
+ *
+ * openclaw exposes a first-class `model.usage` diagnostic event
+ * (`DiagnosticUsageEvent`, refs/platforms/openclaw/src/infra/diagnostic-events.ts:18-47),
+ * emitted once per turn and consumed via `onDiagnosticEvent(listener)`
+ * (diagnostic-events.ts:1156) — the same bus the first-party diagnostics-otel /
+ * diagnostics-prometheus extensions read.
+ *
+ * Field mapping (openclaw → AgentUsageCounts):
+ *   evt.usage.input     → input_tokens
+ *   evt.usage.output    → output_tokens
+ *   evt.usage.cacheWrite→ cache_creation_tokens   (cache-creation)
+ *   evt.usage.cacheRead → cache_read_tokens       (cache-read)
+ *   evt.costUsd         → native_cost_usd  (pre-computed via estimateUsageCost,
+ *                                           agent-runner.ts:1995 — preferred over catalog)
+ *   evt.model           → model_id
+ *
+ * CRITICAL: read `evt.usage` (the PER-TURN TOTAL — "Last Turn Total"
+ * agent-runner.ts:943), NEVER `evt.lastCallUsage` (the last-model-call DELTA,
+ * diagnostic-events.ts:34-40). Summing both would double-count.
+ *
+ * Returns AgentUsageCounts (the buildAgentUsageEvent input shape) or null when
+ * the event is not a usage event / carries no usage / sums to zero. Pure,
+ * null-safe, algorithmic — NO regex.
+ */
+export declare function parseOpenclawUsage(payload: unknown): AgentUsageCounts | null;
+/**
+ * opencode per-turn usage parser.
+ *
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
+ * adapter-matrix/opencode.md. opencode tracks usage per *assistant message*; the
+ * usage-bearing payload reaches a plugin via the `message.updated` bus event,
+ * whose `event.properties.info` is the full Message. The assistant token shape
+ * (refs platforms/opencode .../session/message.ts) is:
+ *   info.tokens = { input, output, reasoning, cache: { read, write } }
+ *   info.cost   = USD cost for this message
+ *   info.modelID / info.providerID  (older refs may expose a single info.model)
+ *
+ * Field mapping (refs message.ts):
+ *   tokens.input        -> input_tokens
+ *   tokens.output       -> output_tokens
+ *   tokens.cache.read   -> cache_read_tokens
+ *   tokens.cache.write  -> cache_creation_tokens
+ *   modelID/providerID  -> model_id (`${providerID}/${modelID}` when both present)
+ *   cost                -> native_cost_usd
+ *
+ * LAST-STEP-SNAPSHOT CAVEAT (refs processor.ts:717-718): message-level
+ * `.tokens` is OVERWRITTEN every step-finish, so it holds the LAST step's usage
+ * — not the turn total. `.cost`, however, ACCUMULATES (`cost += usage.cost`) and
+ * is the correct cumulative turn cost. We therefore pass `info.cost` through as
+ * native_cost_usd so the billed $ is exact even though the token snapshot is
+ * imprecise; the token columns remain best-effort (last-step) telemetry. A true
+ * turn-total token sum would require summing per-step Step.Ended parts, which the
+ * `message.updated` payload does not carry — out of scope for this snapshot-based
+ * capture.
+ *
+ * Accepts either the bus event (`{ properties: { info } }`), the wrapped
+ * `{ event: { properties: { info } } }`, or the bare Message (`info`) so the
+ * caller can hand us whatever the SDK surfaces. NO regex — pure algorithmic,
+ * null-safe traversal. Returns null when the payload is not an assistant
+ * message, carries no tokens object, or every token bucket is zero/absent
+ * (mirrors buildAgentUsageEvent's zero->null contract).
+ */
+export declare function parseOpencodeUsage(payload: unknown): AgentUsageCounts | null;
+/**
+ * Build a structured `agent_usage` event from summed per-model token counts.
+ * Emits the colon-string `data` (human/debug + back-compat) AND the structured
+ * top-level fields the forward envelope spreads to the platform. cost_usd via
+ * the pricing catalog — omitted on a price miss. Returns null when every token
+ * bucket is zero/absent (so an all-zero model emits no event).
+ */
+export declare function buildAgentUsageEvent(counts: {
+    model_id: string;
+    input_tokens: number;
+    output_tokens: number;
+    cache_creation_tokens: number;
+    cache_read_tokens: number;
+    /**
+     * Provider-supplied USD cost for this turn. When a finite number, it is
+     * preferred over the catalog computation (openclaw / pi / omp / opencode
+     * ship a native cost — trust the source over our price table). Omit/null to
+     * derive cost_usd from the pricing catalog.
+     */
+    native_cost_usd?: number | null;
+}): SessionEvent | null;
+/**
+ * gemini-cli AfterModel usage capture — parse ONE AfterModel hook payload into
+ * a builder `agent_usage` event (or null). Pure, null-safe, struct-only — NO regex.
+ *
+ * Refs (docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md):
+ *   - AfterModel fires per model call inside the gemini-cli stream loop
+ *     (geminiChat.ts:1213); the hook input carries `llm_request` + `llm_response`
+ *     (hooks/types.ts:692-695).
+ *   - `llm_response.usageMetadata` exposes promptTokenCount / candidatesTokenCount
+ *     / totalTokenCount (hookTranslator.ts:60-64).
+ *   - model_id = `response.modelVersion || req.model` (loggingContentGenerator.ts:405,553).
+ *
+ * Mapping → builder shape:
+ *   promptTokenCount        → input_tokens
+ *   candidatesTokenCount    → output_tokens
+ *   thoughtsTokenCount      → ADDED into output_tokens (Gemini bills reasoning as output)
+ *   cachedContentTokenCount → cache_read_tokens (when present)
+ *   model_id                → response.modelVersion || llm_request.model
+ *
+ * CAVEAT — the DECOUPLED AfterModel payload (hookTranslator.ts:60-64) forwards
+ * only prompt/candidates/total and DROPS cachedContentTokenCount +
+ * thoughtsTokenCount. We map those two defensively WHEN PRESENT (richer payload
+ * variant / future fix / OTel-fed input) but never depend on them — the common
+ * case is input+output only. For full cached/thoughts fidelity the OTel
+ * `api_response` exporter or the chat-recording JSON is the source of record.
+ *
+ * MULTI-CALL TURNS — one user turn that triggers tool calls spans MULTIPLE
+ * model calls, each AfterModel cumulative within itself. This fn emits ONE
+ * priced event PER AfterModel call (each call is one billed round-trip).
+ * Per-userPromptId summation into a single per-turn total is DEFERRED — emitting
+ * per-call never double-counts, since each call's usageMetadata is the
+ * authoritative total for that call.
+ */
+export declare function parseGeminiUsage(afterModelPayload: unknown): SessionEvent | null;
+/**
+ * claude-code MAIN-turn usage capture — the dominant-spend path the Task
+ * subagent capture (extractAgentUsage) misses. Parses the session transcript
+ * JSONL char-algorithmically (NO regex): each `type:"assistant"` line carries
+ * `message.usage` + `message.model`, and usage is a per-turn DELTA, so summing
+ * the assistant turns per model = the exact billed total. `isSidechain:true`
+ * lines are Task-subagent sidechains written to a SEPARATE transcript (refs:
+ * sessionStorage.ts:1042) — excluding them keeps the main-turn sum from
+ * double-counting the separate Task-subagent capture. Emits one structured
+ * `agent_usage` event per distinct model.
+ */
+export declare function extractTranscriptUsage(transcript: string): SessionEvent[];
+/**
+ * Cursor-aware variant of extractTranscriptUsage for the Stop hook.
+ *
+ * The transcript grows every turn and the forward loop forwards ALL passed
+ * events unconditionally, so re-running extractTranscriptUsage on the whole
+ * transcript each Stop would double-count every prior turn. This walks only
+ * the turns NEW since the last Stop, keyed by a per-session high-water cursor
+ * (the `uuid` of the last assistant turn seen).
+ *
+ *   - sinceUuid null/empty  → process ALL non-sidechain assistant turns.
+ *   - sinceUuid found       → process only turns AFTER it (exclusive).
+ *   - sinceUuid set but NOT found (transcript compaction dropped it) → process
+ *     ONLY THE LAST non-sidechain assistant turn. Bounded by design: we never
+ *     re-emit the whole history when the cursor falls off the front.
+ *
+ * `cursor` returns the uuid of the LAST non-sidechain assistant turn in the
+ * transcript (whether or not it carried usage), so the next Stop resumes
+ * exactly past it. When the transcript has no such turn, the input cursor is
+ * returned unchanged. Same char-algorithmic JSONL parse (NO regex), same
+ * sidechain exclusion, same buildAgentUsageEvent emission path.
+ */
+export declare function extractTranscriptUsageSince(transcript: string, sinceUuid: string | null): {
+    events: SessionEvent[];
+    cursor: string | null;
+};
 /** Reset error-resolution state (for testing). */
 export declare function resetErrorResolutionState(): void;
 /** Reset iteration-loop state (for testing). */