npm - context-mode - Versions diffs - 1.0.166 → 1.0.168 - Mend

context-mode 1.0.166 → 1.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.codex-plugin/plugin.json +1 -1
package/.openclaw-plugin/openclaw.plugin.json +1 -1
package/.openclaw-plugin/package.json +1 -1
package/README.md +6 -4
package/build/adapters/codex/usage.d.ts +107 -0
package/build/adapters/codex/usage.js +227 -0
package/build/adapters/gemini-cli/hooks.d.ts +7 -1
package/build/adapters/gemini-cli/hooks.js +9 -1
package/build/adapters/gemini-cli/index.js +11 -0
package/build/adapters/kimi/paths.d.ts +20 -0
package/build/adapters/kimi/paths.js +41 -1
package/build/adapters/kimi/usage.d.ts +82 -0
package/build/adapters/kimi/usage.js +217 -0
package/build/adapters/omp/plugin.d.ts +6 -0
package/build/adapters/omp/plugin.js +87 -2
package/build/adapters/omp/usage.d.ts +49 -0
package/build/adapters/omp/usage.js +110 -0
package/build/adapters/openclaw/plugin.d.ts +10 -0
package/build/adapters/openclaw/plugin.js +57 -0
package/build/adapters/openclaw/usage.d.ts +34 -0
package/build/adapters/openclaw/usage.js +52 -0
package/build/adapters/opencode/plugin.d.ts +17 -0
package/build/adapters/opencode/plugin.js +40 -1
package/build/adapters/pi/extension.js +34 -1
package/build/adapters/qwen-code/index.js +23 -1
package/build/adapters/qwen-code/usage.d.ts +90 -0
package/build/adapters/qwen-code/usage.js +222 -0
package/build/session/analytics.js +30 -0
package/build/session/db.d.ts +11 -0
package/build/session/db.js +33 -0
package/build/session/extract.d.ts +224 -0
package/build/session/extract.js +705 -62
package/build/session/model-prices.json +429 -0
package/build/session/pricing.d.ts +64 -0
package/build/session/pricing.js +151 -0
package/cli.bundle.mjs +177 -170
package/configs/antigravity-cli/plugin.json +1 -1
package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
package/configs/gemini-cli/settings.json +11 -0
package/hooks/codex/stop.mjs +91 -4
package/hooks/gemini-cli/aftermodel.mjs +70 -0
package/hooks/kimi/stop.mjs +74 -3
package/hooks/qwen-code/platform.mjs +1 -0
package/hooks/qwen-code/stop.mjs +168 -0
package/hooks/session-db.bundle.mjs +7 -7
package/hooks/session-extract.bundle.mjs +3 -2
package/hooks/session-loaders.mjs +16 -1
package/hooks/stop.mjs +35 -2
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/server.bundle.mjs +108 -101

package/build/session/db.js CHANGED Viewed

@@ -481,6 +481,8 @@ const S = {
     getMaxFileEdits: "getMaxFileEdits",
     getLatestCommitMessage: "getLatestCommitMessage",
     incrementCompactCount: "incrementCompactCount",
+    getUsageCursor: "getUsageCursor",
+    setUsageCursor: "setUsageCursor",
     upsertResume: "upsertResume",
     getResume: "getResume",
     markResumeConsumed: "markResumeConsumed",
@@ -662,6 +664,19 @@ export class SessionDB extends SQLiteBase {
         catch {
             // best-effort migration only
         }
+        // Migration: per-session usage high-water cursor for the Stop hook's
+        // cursor-aware main-turn capture (extractTranscriptUsageSince). Stores the
+        // uuid of the last assistant turn already emitted so the next Stop forwards
+        // only NEW spend. Idempotent — guarded by a table_xinfo column check.
+        try {
+            const metaCols = this.db.pragma("table_xinfo(session_meta)");
+            if (!metaCols.some((c) => c.name === "usage_cursor")) {
+                this.db.exec("ALTER TABLE session_meta ADD COLUMN usage_cursor TEXT");
+            }
+        }
+        catch {
+            // best-effort migration only
+        }
     }
     prepareStatements() {
         this.stmts = new Map();
@@ -759,6 +774,8 @@ export class SessionDB extends SQLiteBase {
        ORDER BY id DESC
        LIMIT 1`);
         p(S.incrementCompactCount, `UPDATE session_meta SET compact_count = compact_count + 1 WHERE session_id = ?`);
+        p(S.getUsageCursor, `SELECT usage_cursor FROM session_meta WHERE session_id = ?`);
+        p(S.setUsageCursor, `UPDATE session_meta SET usage_cursor = ? WHERE session_id = ?`);
         // ── Resume ──
         p(S.upsertResume, `INSERT INTO session_resume (session_id, snapshot, event_count)
        VALUES (?, ?, ?)
@@ -1127,6 +1144,22 @@ export class SessionDB extends SQLiteBase {
     incrementCompactCount(sessionId) {
         this.stmt(S.incrementCompactCount).run(sessionId);
     }
+    /**
+     * Read the per-session usage high-water cursor — the uuid of the last
+     * assistant turn already emitted by the Stop hook's main-turn capture.
+     * Returns null when unset (first Stop) or the session row is absent.
+     */
+    getUsageCursor(sessionId) {
+        const row = this.stmt(S.getUsageCursor).get(sessionId);
+        return row?.usage_cursor ?? null;
+    }
+    /**
+     * Advance the per-session usage high-water cursor to `uuid`. No-op when the
+     * session_meta row does not exist yet (callers ensureSession first).
+     */
+    setUsageCursor(sessionId, uuid) {
+        this.stmt(S.setUsageCursor).run(uuid, sessionId);
+    }
     // ═══════════════════════════════════════════
     // Resume
     // ═══════════════════════════════════════════

package/build/session/extract.d.ts CHANGED Viewed

@@ -22,6 +22,35 @@ export interface SessionEvent {
      * `Fetched and indexed N sections (XKB)` preamble.
      */
     bytes_avoided?: number;
+    /**
+     * Optional — bytes the model PAID to ACCESS kept-out content for this event:
+     * the tool_response byte length of a `ctx_search` / `ctx_fetch_and_index`
+     * call. This is the OTHER half of the with/without ratio (bytes_avoided is
+     * the kept-out half). Sandbox compute (ctx_execute/batch/file) is work-output
+     * and is excluded. Present only when the call is a retrieval call and its
+     * tool_response is non-empty.
+     */
+    bytes_retrieved?: number;
+    /**
+     * Optional structured cost/usage fields (Wave 2b). Emitted by
+     * extractAgentUsage alongside the colon-string `data` so the forward
+     * envelope can spread them to the platform as typed columns instead of an
+     * opaque blob. Present only when the source signal is present; cost_usd is
+     * omitted on a price miss or a zero-token turn.
+     */
+    model_id?: string;
+    input_tokens?: number;
+    output_tokens?: number;
+    cache_read_tokens?: number;
+    cache_creation_tokens?: number;
+    cost_usd?: number;
+    /**
+     * "task_cumulative" on agent_usage events whose tokens are a Task sub-agent's
+     * usage SUMMED across its whole run (not one turn). The platform buckets these
+     * as lifetime spend and never prices them per-turn — see
+     * docs/handoff/cumulative-cost-bug.md.
+     */
+    usage_scope?: string;
 }
 export interface ToolCall {
     toolName: string;
@@ -43,6 +72,201 @@ export interface HookInput {
         is_error?: boolean;
     };
 }
+/** Input shape `buildAgentUsageEvent` consumes — re-exported for parser typing. */
+export interface AgentUsageCounts {
+    model_id: string;
+    input_tokens: number;
+    output_tokens: number;
+    cache_creation_tokens: number;
+    cache_read_tokens: number;
+    native_cost_usd?: number | null;
+}
+export { parseKimiUsage, extractKimiUsageSince } from "../adapters/kimi/usage.js";
+export { parseQwenUsage, extractQwenUsageSince } from "../adapters/qwen-code/usage.js";
+/**
+ * Pi (oh-my-pi) per-turn usage parser.
+ *
+ * Maps a Pi `turn_end` payload (`{ message: AssistantMessage }`) to the
+ * `buildAgentUsageEvent` input shape, or null when there is nothing to record.
+ *
+ * Field provenance (adapter-matrix/pi.md @320261f + cited refs):
+ *   - usage:        AssistantMessage.usage          (ai/src/types.ts:521 -> catalog/src/types.ts:100-145)
+ *   - model_id:     AssistantMessage.model          (ai/src/types.ts:510; kept "provider/model" — builder normalizes)
+ *   - input:        Usage.input                     -> input_tokens
+ *   - output:       Usage.output                    -> output_tokens
+ *   - cacheWrite:   Usage.cacheWrite                -> cache_creation_tokens
+ *   - cacheRead:    Usage.cacheRead                 -> cache_read_tokens
+ *   - native USD:   Usage.cost.total                -> native_cost_usd (HIGH confidence; no price-table needed)
+ *
+ * The event is per-turn incremental (per-response usage; anthropic.ts:1893-1901;
+ * "for the turn" catalog/types.ts:103), so each turn_end maps to exactly one
+ * agent_usage event with no cross-turn accumulation.
+ *
+ * Algorithmic + null-safe, NO regex. Accepts either the full TurnEndEvent
+ * (`{ message }`) or a bare AssistantMessage (`{ usage, model }`) so callers
+ * can pass `event` or `event.message` interchangeably. Returns null when the
+ * payload is not an assistant message, carries no usage object, or every token
+ * bucket is zero/absent (an all-zero turn emits no event — matches
+ * buildAgentUsageEvent's own zero->null contract).
+ */
+export declare function parsePiUsage(payload: unknown): AgentUsageCounts | null;
+/**
+ * openclaw `model.usage` diagnostic-event capture — parseOpenclawUsage.
+ *
+ * openclaw exposes a first-class `model.usage` diagnostic event
+ * (`DiagnosticUsageEvent`, refs/platforms/openclaw/src/infra/diagnostic-events.ts:18-47),
+ * emitted once per turn and consumed via `onDiagnosticEvent(listener)`
+ * (diagnostic-events.ts:1156) — the same bus the first-party diagnostics-otel /
+ * diagnostics-prometheus extensions read.
+ *
+ * Field mapping (openclaw → AgentUsageCounts):
+ *   evt.usage.input     → input_tokens
+ *   evt.usage.output    → output_tokens
+ *   evt.usage.cacheWrite→ cache_creation_tokens   (cache-creation)
+ *   evt.usage.cacheRead → cache_read_tokens       (cache-read)
+ *   evt.costUsd         → native_cost_usd  (pre-computed via estimateUsageCost,
+ *                                           agent-runner.ts:1995 — preferred over catalog)
+ *   evt.model           → model_id
+ *
+ * CRITICAL: read `evt.usage` (the PER-TURN TOTAL — "Last Turn Total"
+ * agent-runner.ts:943), NEVER `evt.lastCallUsage` (the last-model-call DELTA,
+ * diagnostic-events.ts:34-40). Summing both would double-count.
+ *
+ * Returns AgentUsageCounts (the buildAgentUsageEvent input shape) or null when
+ * the event is not a usage event / carries no usage / sums to zero. Pure,
+ * null-safe, algorithmic — NO regex.
+ */
+export declare function parseOpenclawUsage(payload: unknown): AgentUsageCounts | null;
+/**
+ * opencode per-turn usage parser.
+ *
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
+ * adapter-matrix/opencode.md. opencode tracks usage per *assistant message*; the
+ * usage-bearing payload reaches a plugin via the `message.updated` bus event,
+ * whose `event.properties.info` is the full Message. The assistant token shape
+ * (refs platforms/opencode .../session/message.ts) is:
+ *   info.tokens = { input, output, reasoning, cache: { read, write } }
+ *   info.cost   = USD cost for this message
+ *   info.modelID / info.providerID  (older refs may expose a single info.model)
+ *
+ * Field mapping (refs message.ts):
+ *   tokens.input        -> input_tokens
+ *   tokens.output       -> output_tokens
+ *   tokens.cache.read   -> cache_read_tokens
+ *   tokens.cache.write  -> cache_creation_tokens
+ *   modelID/providerID  -> model_id (`${providerID}/${modelID}` when both present)
+ *   cost                -> native_cost_usd
+ *
+ * LAST-STEP-SNAPSHOT CAVEAT (refs processor.ts:717-718): message-level
+ * `.tokens` is OVERWRITTEN every step-finish, so it holds the LAST step's usage
+ * — not the turn total. `.cost`, however, ACCUMULATES (`cost += usage.cost`) and
+ * is the correct cumulative turn cost. We therefore pass `info.cost` through as
+ * native_cost_usd so the billed $ is exact even though the token snapshot is
+ * imprecise; the token columns remain best-effort (last-step) telemetry. A true
+ * turn-total token sum would require summing per-step Step.Ended parts, which the
+ * `message.updated` payload does not carry — out of scope for this snapshot-based
+ * capture.
+ *
+ * Accepts either the bus event (`{ properties: { info } }`), the wrapped
+ * `{ event: { properties: { info } } }`, or the bare Message (`info`) so the
+ * caller can hand us whatever the SDK surfaces. NO regex — pure algorithmic,
+ * null-safe traversal. Returns null when the payload is not an assistant
+ * message, carries no tokens object, or every token bucket is zero/absent
+ * (mirrors buildAgentUsageEvent's zero->null contract).
+ */
+export declare function parseOpencodeUsage(payload: unknown): AgentUsageCounts | null;
+/**
+ * Build a structured `agent_usage` event from summed per-model token counts.
+ * Emits the colon-string `data` (human/debug + back-compat) AND the structured
+ * top-level fields the forward envelope spreads to the platform. cost_usd via
+ * the pricing catalog — omitted on a price miss. Returns null when every token
+ * bucket is zero/absent (so an all-zero model emits no event).
+ */
+export declare function buildAgentUsageEvent(counts: {
+    model_id: string;
+    input_tokens: number;
+    output_tokens: number;
+    cache_creation_tokens: number;
+    cache_read_tokens: number;
+    /**
+     * Provider-supplied USD cost for this turn. When a finite number, it is
+     * preferred over the catalog computation (openclaw / pi / omp / opencode
+     * ship a native cost — trust the source over our price table). Omit/null to
+     * derive cost_usd from the pricing catalog.
+     */
+    native_cost_usd?: number | null;
+}): SessionEvent | null;
+/**
+ * gemini-cli AfterModel usage capture — parse ONE AfterModel hook payload into
+ * a builder `agent_usage` event (or null). Pure, null-safe, struct-only — NO regex.
+ *
+ * Refs (docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md):
+ *   - AfterModel fires per model call inside the gemini-cli stream loop
+ *     (geminiChat.ts:1213); the hook input carries `llm_request` + `llm_response`
+ *     (hooks/types.ts:692-695).
+ *   - `llm_response.usageMetadata` exposes promptTokenCount / candidatesTokenCount
+ *     / totalTokenCount (hookTranslator.ts:60-64).
+ *   - model_id = `response.modelVersion || req.model` (loggingContentGenerator.ts:405,553).
+ *
+ * Mapping → builder shape:
+ *   promptTokenCount        → input_tokens
+ *   candidatesTokenCount    → output_tokens
+ *   thoughtsTokenCount      → ADDED into output_tokens (Gemini bills reasoning as output)
+ *   cachedContentTokenCount → cache_read_tokens (when present)
+ *   model_id                → response.modelVersion || llm_request.model
+ *
+ * CAVEAT — the DECOUPLED AfterModel payload (hookTranslator.ts:60-64) forwards
+ * only prompt/candidates/total and DROPS cachedContentTokenCount +
+ * thoughtsTokenCount. We map those two defensively WHEN PRESENT (richer payload
+ * variant / future fix / OTel-fed input) but never depend on them — the common
+ * case is input+output only. For full cached/thoughts fidelity the OTel
+ * `api_response` exporter or the chat-recording JSON is the source of record.
+ *
+ * MULTI-CALL TURNS — one user turn that triggers tool calls spans MULTIPLE
+ * model calls, each AfterModel cumulative within itself. This fn emits ONE
+ * priced event PER AfterModel call (each call is one billed round-trip).
+ * Per-userPromptId summation into a single per-turn total is DEFERRED — emitting
+ * per-call never double-counts, since each call's usageMetadata is the
+ * authoritative total for that call.
+ */
+export declare function parseGeminiUsage(afterModelPayload: unknown): SessionEvent | null;
+/**
+ * claude-code MAIN-turn usage capture — the dominant-spend path the Task
+ * subagent capture (extractAgentUsage) misses. Parses the session transcript
+ * JSONL char-algorithmically (NO regex): each `type:"assistant"` line carries
+ * `message.usage` + `message.model`, and usage is a per-turn DELTA, so summing
+ * the assistant turns per model = the exact billed total. `isSidechain:true`
+ * lines are Task-subagent sidechains written to a SEPARATE transcript (refs:
+ * sessionStorage.ts:1042) — excluding them keeps the main-turn sum from
+ * double-counting the separate Task-subagent capture. Emits one structured
+ * `agent_usage` event per distinct model.
+ */
+export declare function extractTranscriptUsage(transcript: string): SessionEvent[];
+/**
+ * Cursor-aware variant of extractTranscriptUsage for the Stop hook.
+ *
+ * The transcript grows every turn and the forward loop forwards ALL passed
+ * events unconditionally, so re-running extractTranscriptUsage on the whole
+ * transcript each Stop would double-count every prior turn. This walks only
+ * the turns NEW since the last Stop, keyed by a per-session high-water cursor
+ * (the `uuid` of the last assistant turn seen).
+ *
+ *   - sinceUuid null/empty  → process ALL non-sidechain assistant turns.
+ *   - sinceUuid found       → process only turns AFTER it (exclusive).
+ *   - sinceUuid set but NOT found (transcript compaction dropped it) → process
+ *     ONLY THE LAST non-sidechain assistant turn. Bounded by design: we never
+ *     re-emit the whole history when the cursor falls off the front.
+ *
+ * `cursor` returns the uuid of the LAST non-sidechain assistant turn in the
+ * transcript (whether or not it carried usage), so the next Stop resumes
+ * exactly past it. When the transcript has no such turn, the input cursor is
+ * returned unchanged. Same char-algorithmic JSONL parse (NO regex), same
+ * sidechain exclusion, same buildAgentUsageEvent emission path.
+ */
+export declare function extractTranscriptUsageSince(transcript: string, sinceUuid: string | null): {
+    events: SessionEvent[];
+    cursor: string | null;
+};
 /** Reset error-resolution state (for testing). */
 export declare function resetErrorResolutionState(): void;
 /** Reset iteration-loop state (for testing). */