npm - context-mode - Versions diffs - 1.0.166 → 1.0.168 - Mend

context-mode 1.0.166 → 1.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/.codex-plugin/plugin.json +1 -1
package/.openclaw-plugin/openclaw.plugin.json +1 -1
package/.openclaw-plugin/package.json +1 -1
package/README.md +6 -4
package/build/adapters/codex/usage.d.ts +107 -0
package/build/adapters/codex/usage.js +227 -0
package/build/adapters/gemini-cli/hooks.d.ts +7 -1
package/build/adapters/gemini-cli/hooks.js +9 -1
package/build/adapters/gemini-cli/index.js +11 -0
package/build/adapters/kimi/paths.d.ts +20 -0
package/build/adapters/kimi/paths.js +41 -1
package/build/adapters/kimi/usage.d.ts +82 -0
package/build/adapters/kimi/usage.js +217 -0
package/build/adapters/omp/plugin.d.ts +6 -0
package/build/adapters/omp/plugin.js +87 -2
package/build/adapters/omp/usage.d.ts +49 -0
package/build/adapters/omp/usage.js +110 -0
package/build/adapters/openclaw/plugin.d.ts +10 -0
package/build/adapters/openclaw/plugin.js +57 -0
package/build/adapters/openclaw/usage.d.ts +34 -0
package/build/adapters/openclaw/usage.js +52 -0
package/build/adapters/opencode/plugin.d.ts +17 -0
package/build/adapters/opencode/plugin.js +40 -1
package/build/adapters/pi/extension.js +34 -1
package/build/adapters/qwen-code/index.js +23 -1
package/build/adapters/qwen-code/usage.d.ts +90 -0
package/build/adapters/qwen-code/usage.js +222 -0
package/build/session/analytics.js +30 -0
package/build/session/db.d.ts +11 -0
package/build/session/db.js +33 -0
package/build/session/extract.d.ts +224 -0
package/build/session/extract.js +705 -62
package/build/session/model-prices.json +429 -0
package/build/session/pricing.d.ts +64 -0
package/build/session/pricing.js +151 -0
package/cli.bundle.mjs +177 -170
package/configs/antigravity-cli/plugin.json +1 -1
package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
package/configs/gemini-cli/settings.json +11 -0
package/hooks/codex/stop.mjs +91 -4
package/hooks/gemini-cli/aftermodel.mjs +70 -0
package/hooks/kimi/stop.mjs +74 -3
package/hooks/qwen-code/platform.mjs +1 -0
package/hooks/qwen-code/stop.mjs +168 -0
package/hooks/session-db.bundle.mjs +7 -7
package/hooks/session-extract.bundle.mjs +3 -2
package/hooks/session-loaders.mjs +16 -1
package/hooks/stop.mjs +35 -2
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/server.bundle.mjs +108 -101

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -6,14 +6,14 @@
   },
   "metadata": {
     "description": "Claude Code plugins by Mert Koseoğlu",
-    "version": "1.0.166"
+    "version": "1.0.168"
   },
   "plugins": [
     {
       "name": "context-mode",
       "source": "./",
       "description": "Claude Code MCP plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
-      "version": "1.0.166",
+      "version": "1.0.168",
       "author": {
         "name": "Mert Koseoğlu"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "context-mode",
-  "version": "1.0.166",
+  "version": "1.0.168",
   "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
   "author": {
     "name": "Mert Koseoğlu",

package/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "context-mode",
-  "version": "1.0.166",
+  "version": "1.0.168",
   "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
   "author": {
     "name": "Mert Koseoğlu",

package/.openclaw-plugin/openclaw.plugin.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "name": "Context Mode",
   "kind": "tool",
   "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
-  "version": "1.0.166",
+  "version": "1.0.168",
   "sandbox": {
     "mode": "permissive",
     "filesystem_access": "full",

package/.openclaw-plugin/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "context-mode",
-  "version": "1.0.166",
+  "version": "1.0.168",
   "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
   "author": {
     "name": "Mert Koseoğlu",

package/README.md CHANGED Viewed

@@ -972,15 +972,15 @@ Full configs: [`configs/kiro/mcp.json`](configs/kiro/mcp.json) | [`configs/kiro/
    {
      "context_servers": {
        "context-mode": {
-         "command": {
-           "path": "context-mode"
-         }
+         "command": "context-mode",
+         "args": [],
+         "env": {}
        }
      }
    }
    ```
-   Note: Zed uses `"context_servers"` and `"command": { "path": "..." }` syntax, not `"mcpServers"` or `"command": "..."` like other platforms.
+   Note: Zed uses `"context_servers"` instead of `"mcpServers"`. `args` and `env` are optional for context-mode, but are shown here to match Zed's custom MCP server shape.
 3. Copy routing instructions (Zed has no hook support):
@@ -1067,6 +1067,8 @@ Full configs: [`configs/kiro/mcp.json`](configs/kiro/mcp.json) | [`configs/kiro/
    Both should show `context-mode` as `enabled`.
+   > The plugin self-registers its MCP server in `~/.omp/agent/mcp.json` on first load (spawned as `node <plugin>/server.bundle.mjs`, since the plugin-install package directory is not on `PATH`), so the 11 `ctx_*` tools become reachable after the restart in step 2 — no manual `mcp.json` edit needed ([#677](https://github.com/mksglu/context-mode/issues/677)). An existing `context-mode` entry is never overwritten; remove it if you want the plugin to re-register the bundled path.
 **Install — manual plugin path (if `omp plugin install` is unavailable):**
 OMP loads anything listed under `~/.omp/plugins/package.json` `dependencies` whose own `package.json` carries an `omp` (or `pi`) field. New plugins default to enabled — the lock file at `~/.omp/plugins/omp-plugins.lock.json` is only consulted when a plugin needs to be explicitly **disabled** (loader skips `runtimeState && !runtimeState.enabled` per [`extensibility/plugins/loader.ts:89-94`](https://github.com/can1357/oh-my-pi/blob/main/packages/coding-agent/src/extensibility/plugins/loader.ts)). So the manual install is two commands:

package/build/adapters/codex/usage.d.ts ADDED Viewed

@@ -0,0 +1,107 @@
+/**
+ * adapters/codex/usage — Codex CLI per-turn token + cost capture.
+ *
+ * ── Feasibility (verified empirically against live rollout files) ───────────
+ * Codex persists a SESSION ROLLOUT transcript at
+ *   $CODEX_HOME/sessions/YYYY/MM/DD/rollout-<ts>-<session_id>.jsonl
+ * (default $CODEX_HOME = ~/.codex). Each line is a JSON record
+ *   { timestamp, type, payload }
+ * with `type` ∈ { session_meta, turn_context, response_item, event_msg }.
+ *
+ * The adapter-matrix (docs/prds/2026-06-paid-observability/adapter-matrix/
+ * codex.md) correctly notes that token usage is carried by
+ * `EventMsg::TokenCount(TokenCountEvent)` (codex-rs protocol.rs:1276) and is
+ * NOT on any hook payload (hooks carry `model` only). What the matrix does not
+ * state — and what makes a file-tail feasible — is that codex ALSO PERSISTS
+ * those EventMsgs to the rollout JSONL as `type:"event_msg"` records. The
+ * `token_count` payload mirrors `TokenCountEvent` 1:1:
+ *
+ *   {
+ *     "type": "event_msg",
+ *     "payload": {
+ *       "type": "token_count",
+ *       "info": null | {                       // Option<TokenUsageInfo>
+ *         "total_token_usage": TokenUsage,      // CUMULATIVE session sum (protocol.rs:2015)
+ *         "last_token_usage":  TokenUsage,      // INCREMENTAL last turn  (protocol.rs:2016)
+ *         "model_context_window": number | null
+ *       },
+ *       "rate_limits": { ... }
+ *     }
+ *   }
+ *
+ * where TokenUsage (protocol.rs:2000) is the full OpenAI usage shape:
+ *   { input_tokens, cached_input_tokens, output_tokens,
+ *     reasoning_output_tokens, total_tokens }.
+ *
+ * `info` is `null` until a turn COMPLETES (the initial session-start
+ * token_count and any turn that is interrupted/aborted carry `info:null`);
+ * a completed turn carries a populated `info`. We therefore read
+ * `info.last_token_usage` and SKIP records whose `info` is null.
+ *
+ * ── Incremental vs Cumulative (protocol.rs:2049-2052) ───────────────────────
+ *   append_last_usage():  total += last  (cumulative);  last = last (incremental)
+ * We use `last_token_usage` as the per-turn delta — summing it across the new
+ * turns since the cursor gives the exact NEW spend, with no double-count. We
+ * deliberately do NOT use `total_token_usage` (it is the running cumulative sum
+ * and would re-count every prior turn on each read).
+ *
+ * ── Field mapping (codex TokenUsage → AgentUsageCounts) ─────────────────────
+ *   input_tokens             → input_tokens
+ *   cached_input_tokens      → cache_read_tokens          (== OpenAI cached_tokens)
+ *   output_tokens + reasoning_output_tokens → output_tokens
+ *                              (reasoning is billed as output; fold it in)
+ *   model_id: from the most recent turn_context.model (protocol.rs:1977 /
+ *             ThreadSettingsSnapshot.model), falling back to session_meta.model
+ *             when no turn_context precedes the event.
+ *
+ * Codex carries NO native USD on the rollout — cost is derived downstream by
+ * buildAgentUsageEvent's pricing catalog (native_cost_usd omitted).
+ *
+ * Pure, null-safe, algorithmic. NO regex.
+ */
+import { type AgentUsageCounts, type SessionEvent } from "../../session/extract.js";
+/**
+ * Map one codex `token_count` payload's `info.last_token_usage` to the
+ * buildAgentUsageEvent input shape, or null when there is nothing to record.
+ *
+ * @param payload  the `event_msg.payload` object (payload.type === "token_count")
+ * @param modelId  model resolved from the enclosing turn_context/session_meta
+ *
+ * Returns null when:
+ *   - payload is not a token_count, or
+ *   - info is null/absent (session-start ping or interrupted turn), or
+ *   - last_token_usage sums to zero across every billable bucket.
+ */
+export declare function parseCodexUsage(payload: unknown, modelId: string): AgentUsageCounts | null;
+/**
+ * Cursor-aware codex rollout reader for the Stop hook.
+ *
+ * The rollout grows every turn and the forward loop forwards ALL passed events
+ * unconditionally, so re-summing the whole rollout each Stop would double-count
+ * every prior turn. This walks only the `token_count` records NEW since the
+ * last Stop, keyed by a per-session high-water cursor.
+ *
+ * The cursor is the 0-based LINE INDEX of the last `token_count` record we have
+ * already processed (stored as a decimal string in the usage_cursor column).
+ * Line index is a stable monotonic key here because codex APPENDS to the
+ * rollout (it never rewrites/compacts a line in place), so a prior line's
+ * position never shifts.
+ *
+ *   - sinceCursor null/empty → process ALL token_count records.
+ *   - sinceCursor = "N"      → process only token_count records at line idx > N.
+ *
+ * Model resolution: we track the most-recent `model` seen on any preceding
+ * `turn_context` (protocol.rs:1977) or `session_meta`; each token_count is
+ * attributed to that model. Sums are grouped per-model and emitted via the
+ * shared buildAgentUsageEvent path (so a session that switches models mid-run
+ * yields one agent_usage event per model for the new slice).
+ *
+ * `cursor` returns the line index of the LAST line in the rollout (string),
+ * so the next Stop resumes strictly past it. When the rollout is empty/
+ * unparseable, the input cursor is returned unchanged. Same linear JSONL walk,
+ * JSON.parse per line, NO regex.
+ */
+export declare function extractCodexUsageSince(rollout: string, sinceCursor: string | null): {
+    events: SessionEvent[];
+    cursor: string | null;
+};

package/build/adapters/codex/usage.js ADDED Viewed

@@ -0,0 +1,227 @@
+/**
+ * adapters/codex/usage — Codex CLI per-turn token + cost capture.
+ *
+ * ── Feasibility (verified empirically against live rollout files) ───────────
+ * Codex persists a SESSION ROLLOUT transcript at
+ *   $CODEX_HOME/sessions/YYYY/MM/DD/rollout-<ts>-<session_id>.jsonl
+ * (default $CODEX_HOME = ~/.codex). Each line is a JSON record
+ *   { timestamp, type, payload }
+ * with `type` ∈ { session_meta, turn_context, response_item, event_msg }.
+ *
+ * The adapter-matrix (docs/prds/2026-06-paid-observability/adapter-matrix/
+ * codex.md) correctly notes that token usage is carried by
+ * `EventMsg::TokenCount(TokenCountEvent)` (codex-rs protocol.rs:1276) and is
+ * NOT on any hook payload (hooks carry `model` only). What the matrix does not
+ * state — and what makes a file-tail feasible — is that codex ALSO PERSISTS
+ * those EventMsgs to the rollout JSONL as `type:"event_msg"` records. The
+ * `token_count` payload mirrors `TokenCountEvent` 1:1:
+ *
+ *   {
+ *     "type": "event_msg",
+ *     "payload": {
+ *       "type": "token_count",
+ *       "info": null | {                       // Option<TokenUsageInfo>
+ *         "total_token_usage": TokenUsage,      // CUMULATIVE session sum (protocol.rs:2015)
+ *         "last_token_usage":  TokenUsage,      // INCREMENTAL last turn  (protocol.rs:2016)
+ *         "model_context_window": number | null
+ *       },
+ *       "rate_limits": { ... }
+ *     }
+ *   }
+ *
+ * where TokenUsage (protocol.rs:2000) is the full OpenAI usage shape:
+ *   { input_tokens, cached_input_tokens, output_tokens,
+ *     reasoning_output_tokens, total_tokens }.
+ *
+ * `info` is `null` until a turn COMPLETES (the initial session-start
+ * token_count and any turn that is interrupted/aborted carry `info:null`);
+ * a completed turn carries a populated `info`. We therefore read
+ * `info.last_token_usage` and SKIP records whose `info` is null.
+ *
+ * ── Incremental vs Cumulative (protocol.rs:2049-2052) ───────────────────────
+ *   append_last_usage():  total += last  (cumulative);  last = last (incremental)
+ * We use `last_token_usage` as the per-turn delta — summing it across the new
+ * turns since the cursor gives the exact NEW spend, with no double-count. We
+ * deliberately do NOT use `total_token_usage` (it is the running cumulative sum
+ * and would re-count every prior turn on each read).
+ *
+ * ── Field mapping (codex TokenUsage → AgentUsageCounts) ─────────────────────
+ *   input_tokens             → input_tokens
+ *   cached_input_tokens      → cache_read_tokens          (== OpenAI cached_tokens)
+ *   output_tokens + reasoning_output_tokens → output_tokens
+ *                              (reasoning is billed as output; fold it in)
+ *   model_id: from the most recent turn_context.model (protocol.rs:1977 /
+ *             ThreadSettingsSnapshot.model), falling back to session_meta.model
+ *             when no turn_context precedes the event.
+ *
+ * Codex carries NO native USD on the rollout — cost is derived downstream by
+ * buildAgentUsageEvent's pricing catalog (native_cost_usd omitted).
+ *
+ * Pure, null-safe, algorithmic. NO regex.
+ */
+import { buildAgentUsageEvent, } from "../../session/extract.js";
+/** Codex incremental TokenUsage (protocol.rs:2000). All fields optional/defensive. */
+function toNum(v) {
+    return typeof v === "number" && Number.isFinite(v) ? v : 0;
+}
+/**
+ * Map one codex `token_count` payload's `info.last_token_usage` to the
+ * buildAgentUsageEvent input shape, or null when there is nothing to record.
+ *
+ * @param payload  the `event_msg.payload` object (payload.type === "token_count")
+ * @param modelId  model resolved from the enclosing turn_context/session_meta
+ *
+ * Returns null when:
+ *   - payload is not a token_count, or
+ *   - info is null/absent (session-start ping or interrupted turn), or
+ *   - last_token_usage sums to zero across every billable bucket.
+ */
+export function parseCodexUsage(payload, modelId) {
+    if (!payload || typeof payload !== "object")
+        return null;
+    const p = payload;
+    if (typeof p.type === "string" && p.type !== "token_count")
+        return null;
+    const info = p.info;
+    if (!info || typeof info !== "object")
+        return null; // no completed-turn usage
+    const last = info.last_token_usage;
+    if (!last || typeof last !== "object")
+        return null;
+    const u = last;
+    const input_tokens = toNum(u.input_tokens);
+    // OpenAI cached_tokens == codex cached_input_tokens == our cache-read bucket.
+    const cache_read_tokens = toNum(u.cached_input_tokens);
+    // reasoning is billed as output → fold reasoning_output_tokens into output.
+    const output_tokens = toNum(u.output_tokens) + toNum(u.reasoning_output_tokens);
+    // Codex has no separate cache-CREATION bucket (cached_input_tokens is a read
+    // hit count, not a write). Leave cache_creation_tokens at 0.
+    const cache_creation_tokens = 0;
+    if (input_tokens <= 0 &&
+        output_tokens <= 0 &&
+        cache_read_tokens <= 0) {
+        return null;
+    }
+    return {
+        model_id: typeof modelId === "string" ? modelId : "",
+        input_tokens,
+        output_tokens,
+        cache_creation_tokens,
+        cache_read_tokens,
+        native_cost_usd: null, // codex rollout carries no native USD; catalog derives
+    };
+}
+/**
+ * Cursor-aware codex rollout reader for the Stop hook.
+ *
+ * The rollout grows every turn and the forward loop forwards ALL passed events
+ * unconditionally, so re-summing the whole rollout each Stop would double-count
+ * every prior turn. This walks only the `token_count` records NEW since the
+ * last Stop, keyed by a per-session high-water cursor.
+ *
+ * The cursor is the 0-based LINE INDEX of the last `token_count` record we have
+ * already processed (stored as a decimal string in the usage_cursor column).
+ * Line index is a stable monotonic key here because codex APPENDS to the
+ * rollout (it never rewrites/compacts a line in place), so a prior line's
+ * position never shifts.
+ *
+ *   - sinceCursor null/empty → process ALL token_count records.
+ *   - sinceCursor = "N"      → process only token_count records at line idx > N.
+ *
+ * Model resolution: we track the most-recent `model` seen on any preceding
+ * `turn_context` (protocol.rs:1977) or `session_meta`; each token_count is
+ * attributed to that model. Sums are grouped per-model and emitted via the
+ * shared buildAgentUsageEvent path (so a session that switches models mid-run
+ * yields one agent_usage event per model for the new slice).
+ *
+ * `cursor` returns the line index of the LAST line in the rollout (string),
+ * so the next Stop resumes strictly past it. When the rollout is empty/
+ * unparseable, the input cursor is returned unchanged. Same linear JSONL walk,
+ * JSON.parse per line, NO regex.
+ */
+export function extractCodexUsageSince(rollout, sinceCursor) {
+    const inputCursor = typeof sinceCursor === "string" && sinceCursor.length > 0 ? sinceCursor : null;
+    if (typeof rollout !== "string" || rollout.length === 0) {
+        return { events: [], cursor: inputCursor };
+    }
+    // Parse the cursor as a line-index high-water mark. NaN/garbage → process all.
+    let sinceIdx = -1;
+    if (inputCursor !== null) {
+        const parsed = Number.parseInt(inputCursor, 10);
+        if (Number.isInteger(parsed) && parsed >= 0)
+            sinceIdx = parsed;
+    }
+    // Split into physical lines. A trailing newline yields a final empty line we
+    // skip; the surviving line index is preserved so the cursor stays stable.
+    const lines = rollout.split("\n");
+    let currentModel = "";
+    let lastLineIdx = -1; // last NON-EMPTY parseable line index (the new cursor)
+    // Per-model sums over the NEW slice.
+    const sums = new Map();
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        if (line.length === 0)
+            continue;
+        let obj;
+        try {
+            obj = JSON.parse(line);
+        }
+        catch {
+            continue; // tolerate a partially-flushed final line
+        }
+        if (!obj || typeof obj !== "object")
+            continue;
+        lastLineIdx = i;
+        const rec = obj;
+        const recType = typeof rec.type === "string" ? rec.type : "";
+        const payload = rec.payload && typeof rec.payload === "object"
+            ? rec.payload
+            : null;
+        // Track model from the most recent turn_context / session_meta.
+        if (recType === "turn_context" && payload) {
+            const m = payload.model;
+            if (typeof m === "string" && m.length > 0)
+                currentModel = m;
+            continue;
+        }
+        if (recType === "session_meta" && payload) {
+            const m = payload.model;
+            if (typeof m === "string" && m.length > 0)
+                currentModel = m;
+            continue;
+        }
+        if (recType !== "event_msg" || !payload)
+            continue;
+        if (payload.type !== "token_count")
+            continue;
+        // Cursor gate: only token_count records strictly past the high-water mark.
+        if (i <= sinceIdx)
+            continue;
+        const counts = parseCodexUsage(payload, currentModel);
+        if (!counts)
+            continue; // info:null (session ping / aborted) or zero usage
+        const key = counts.model_id;
+        const cur = sums.get(key) ?? { input: 0, output: 0, cacheRead: 0 };
+        cur.input += counts.input_tokens;
+        cur.output += counts.output_tokens;
+        cur.cacheRead += counts.cache_read_tokens;
+        sums.set(key, cur);
+    }
+    // Cursor advances to the last parseable line regardless of whether it carried
+    // usage, so we never re-scan settled lines next Stop. If nothing parsed, hold
+    // the input cursor.
+    const cursor = lastLineIdx >= 0 ? String(lastLineIdx) : inputCursor;
+    const events = [];
+    for (const [model, s] of sums) {
+        const ev = buildAgentUsageEvent({
+            model_id: model,
+            input_tokens: s.input,
+            output_tokens: s.output,
+            cache_creation_tokens: 0,
+            cache_read_tokens: s.cacheRead,
+        });
+        if (ev)
+            events.push(ev);
+    }
+    return { events, cursor };
+}

package/build/adapters/gemini-cli/hooks.d.ts CHANGED Viewed

@@ -10,7 +10,12 @@
  * Gemini CLI hook system reference:
  *   - Hooks are registered in ~/.gemini/settings.json under "hooks" key
  *   - Each hook type maps to an array of { matcher, hooks } entries
- *   - Hook names: BeforeAgent, BeforeTool, AfterTool, PreCompress, SessionStart
+ *   - Hook names: BeforeAgent, BeforeTool, AfterTool, AfterModel, PreCompress, SessionStart
+ *   - AfterModel fires per model call inside the stream loop
+ *     (packages/core/src/core/geminiChat.ts:1213); payload carries
+ *     llm_request + llm_response (hooks/types.ts:692-695) whose
+ *     usageMetadata + resolved model drive per-turn token/cost capture
+ *     (refs: docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md).
  *   - Input: JSON on stdin
  *   - Output: JSON on stdout (or empty for passthrough)
  *   - BeforeAgent fires when user submits a prompt — input.prompt carries
@@ -23,6 +28,7 @@ export declare const HOOK_TYPES: {
     readonly BEFORE_AGENT: "BeforeAgent";
     readonly BEFORE_TOOL: "BeforeTool";
     readonly AFTER_TOOL: "AfterTool";
+    readonly AFTER_MODEL: "AfterModel";
     readonly PRE_COMPRESS: "PreCompress";
     readonly SESSION_START: "SessionStart";
 };

package/build/adapters/gemini-cli/hooks.js CHANGED Viewed

@@ -11,7 +11,12 @@ import { buildHookRuntimeCommand } from "../types.js";
  * Gemini CLI hook system reference:
  *   - Hooks are registered in ~/.gemini/settings.json under "hooks" key
  *   - Each hook type maps to an array of { matcher, hooks } entries
- *   - Hook names: BeforeAgent, BeforeTool, AfterTool, PreCompress, SessionStart
+ *   - Hook names: BeforeAgent, BeforeTool, AfterTool, AfterModel, PreCompress, SessionStart
+ *   - AfterModel fires per model call inside the stream loop
+ *     (packages/core/src/core/geminiChat.ts:1213); payload carries
+ *     llm_request + llm_response (hooks/types.ts:692-695) whose
+ *     usageMetadata + resolved model drive per-turn token/cost capture
+ *     (refs: docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md).
  *   - Input: JSON on stdin
  *   - Output: JSON on stdout (or empty for passthrough)
  *   - BeforeAgent fires when user submits a prompt — input.prompt carries
@@ -27,6 +32,7 @@ export const HOOK_TYPES = {
     BEFORE_AGENT: "BeforeAgent",
     BEFORE_TOOL: "BeforeTool",
     AFTER_TOOL: "AfterTool",
+    AFTER_MODEL: "AfterModel",
     PRE_COMPRESS: "PreCompress",
     SESSION_START: "SessionStart",
 };
@@ -57,6 +63,7 @@ export const HOOK_SCRIPTS = {
     [HOOK_TYPES.BEFORE_AGENT]: "beforeagent.mjs",
     [HOOK_TYPES.BEFORE_TOOL]: "beforetool.mjs",
     [HOOK_TYPES.AFTER_TOOL]: "aftertool.mjs",
+    [HOOK_TYPES.AFTER_MODEL]: "aftermodel.mjs",
     [HOOK_TYPES.PRE_COMPRESS]: "precompress.mjs",
     [HOOK_TYPES.SESSION_START]: "sessionstart.mjs",
 };
@@ -71,6 +78,7 @@ export const REQUIRED_HOOKS = [
 /** Optional hooks that enhance functionality but aren't critical. */
 export const OPTIONAL_HOOKS = [
     HOOK_TYPES.AFTER_TOOL,
+    HOOK_TYPES.AFTER_MODEL,
     HOOK_TYPES.PRE_COMPRESS,
 ];
 /**

package/build/adapters/gemini-cli/index.js CHANGED Viewed

@@ -200,6 +200,17 @@ export class GeminiCLIAdapter extends BaseAdapter {
                     ],
                 },
             ],
+            [GEMINI_HOOK_NAMES.AFTER_MODEL]: [
+                {
+                    matcher: "",
+                    hooks: [
+                        {
+                            type: "command",
+                            command: buildGeminiHookCommand(GEMINI_HOOK_NAMES.AFTER_MODEL, pluginRoot),
+                        },
+                    ],
+                },
+            ],
             [GEMINI_HOOK_NAMES.PRE_COMPRESS]: [
                 {
                     matcher: "",

package/build/adapters/kimi/paths.d.ts CHANGED Viewed

@@ -1 +1,21 @@
 export declare function resolveKimiConfigDir(): string;
+/**
+ * Best-effort resolution of the `<sessionDir>/wire.jsonl` file for a given
+ * Kimi Code session id.
+ *
+ * Ground truth (adapter-matrix/kimi.md): the usage stream is persisted at
+ * `<sessionDir>/wire.jsonl` —
+ *   refs/platforms/kimi-code/packages/agent-core/src/agent/index.ts:142
+ *     new FileSystemAgentRecordPersistence(join(options.homedir, 'wire.jsonl'), ...)
+ * where `options.homedir` is the agent's per-session directory.
+ *
+ * NOTE / WIRE GAP: the exact on-disk mapping from `session_id` → `sessionDir`
+ * is NOT carried in the hook stdin payload, and the kimi-code refs are not
+ * checked out in this worktree to confirm the session-store directory layout
+ * (session/store/session-store.ts:278,316 are cited but unverifiable here). The
+ * candidate layouts below cover the documented patterns; this resolver returns
+ * the FIRST candidate whose `wire.jsonl` actually exists on disk, else null —
+ * so the Stop hook degrades to a no-op rather than guessing wrong. When the
+ * refs land, pin the exact layout and drop the fallback list.
+ */
+export declare function resolveKimiWireJsonlPath(sessionId: string): string | null;

package/build/adapters/kimi/paths.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { homedir } from "node:os";
-import { resolve } from "node:path";
+import { existsSync } from "node:fs";
+import { resolve, join } from "node:path";
 export function resolveKimiConfigDir() {
     const envVal = process.env.KIMI_CODE_HOME;
     if (envVal) {
@@ -10,3 +11,42 @@ export function resolveKimiConfigDir() {
     }
     return resolve(homedir(), ".kimi-code");
 }
+/**
+ * Best-effort resolution of the `<sessionDir>/wire.jsonl` file for a given
+ * Kimi Code session id.
+ *
+ * Ground truth (adapter-matrix/kimi.md): the usage stream is persisted at
+ * `<sessionDir>/wire.jsonl` —
+ *   refs/platforms/kimi-code/packages/agent-core/src/agent/index.ts:142
+ *     new FileSystemAgentRecordPersistence(join(options.homedir, 'wire.jsonl'), ...)
+ * where `options.homedir` is the agent's per-session directory.
+ *
+ * NOTE / WIRE GAP: the exact on-disk mapping from `session_id` → `sessionDir`
+ * is NOT carried in the hook stdin payload, and the kimi-code refs are not
+ * checked out in this worktree to confirm the session-store directory layout
+ * (session/store/session-store.ts:278,316 are cited but unverifiable here). The
+ * candidate layouts below cover the documented patterns; this resolver returns
+ * the FIRST candidate whose `wire.jsonl` actually exists on disk, else null —
+ * so the Stop hook degrades to a no-op rather than guessing wrong. When the
+ * refs land, pin the exact layout and drop the fallback list.
+ */
+export function resolveKimiWireJsonlPath(sessionId) {
+    if (typeof sessionId !== "string" || sessionId.length === 0)
+        return null;
+    const configDir = resolveKimiConfigDir();
+    const candidates = [
+        join(configDir, "sessions", sessionId, "wire.jsonl"),
+        join(configDir, "agents", sessionId, "wire.jsonl"),
+        join(configDir, sessionId, "wire.jsonl"),
+    ];
+    for (const candidate of candidates) {
+        try {
+            if (existsSync(candidate))
+                return candidate;
+        }
+        catch {
+            // unreadable candidate — try the next.
+        }
+    }
+    return null;
+}

package/build/adapters/kimi/usage.d.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Kimi Code (kimi-code) per-turn token usage capture.
+ *
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
+ * adapter-matrix/kimi.md (+ cited refs/platforms/kimi-code/...).
+ *
+ * Kimi Code emits REAL per-turn token usage + model, but ONLY on the
+ * `wire.jsonl` records stream — NOT through any hook stdin payload. Each usage
+ * line is an AgentRecord of `type: "usage.record"` carrying a normalized
+ * four-field Moonshot/OpenAI-compatible `TokenUsage` plus the model id:
+ *
+ *   refs/platforms/kimi-code/packages/agent-core/src/agent/usage/index.ts:27-32
+ *     — this.agent.records.logRecord({ type: 'usage.record', model, usage, usageScope })
+ *   refs/platforms/kimi-code/packages/agent-core/src/agent/records/types.ts:59-63
+ *     — record shape { model: string; usage: TokenUsage; usageScope?: UsageRecordScope }
+ *   refs/platforms/kimi-code/packages/agent-core/src/agent/index.ts:142
+ *     — new FileSystemAgentRecordPersistence(join(options.homedir, 'wire.jsonl'), ...)
+ *       => the persisted file is <sessionDir>/wire.jsonl.
+ *
+ * Normalized TokenUsage (kosong/src/usage.ts:7-13; parsed by
+ * kosong/src/providers/openai-common.ts:213-241):
+ *   { inputOther, output, inputCacheRead, inputCacheCreation }
+ *
+ * Mapping → buildAgentUsageEvent input shape:
+ *   inputOther          → input_tokens     (prompt - cached)
+ *   output              → output_tokens
+ *   inputCacheRead      → cache_read_tokens
+ *   inputCacheCreation  → cache_creation_tokens
+ *   record.model        → model_id
+ *
+ * INCREMENTAL: usage.record lines are per-step deltas (summed via addUsage;
+ * usage/index.ts:34,37). The cumulative total exists only in-memory, never on
+ * disk — so cost capture sums the NEW delta lines per model since a cursor.
+ *
+ * Native cost: kimi-code's TokenUsage carries NO USD cost field (verified
+ * against the matrix doc field list — only token counts). So native_cost_usd
+ * is left null and buildAgentUsageEvent falls back to the pricing catalog.
+ *
+ * Pure, null-safe, algorithmic — NO regex.
+ */
+import { type AgentUsageCounts, type SessionEvent } from "../../session/extract.js";
+/**
+ * Parse ONE kimi-code `usage.record` line object into the buildAgentUsageEvent
+ * input shape, or null when it is not a usage record / carries no usage /
+ * every token bucket is zero.
+ *
+ * Accepts the parsed AgentRecord object (NOT the raw JSONL string). Tolerant of
+ * the record being passed either as the full stamped record `{ type, model,
+ * usage, ... }` or a bare `{ model, usage }`.
+ */
+export declare function parseKimiUsage(record: unknown): AgentUsageCounts | null;
+/**
+ * Cursor-aware wire.jsonl reader for the Stop / SessionEnd hook.
+ *
+ * `wire.jsonl` is an append-only records stream that grows every turn; the
+ * forward loop forwards ALL passed events unconditionally, so re-summing the
+ * whole file each hook fire would double-count every prior turn. This sums only
+ * the `usage.record` lines NEW since the last fire, keyed by a per-session
+ * high-water cursor (a 1-based count of usage.record lines consumed so far,
+ * serialized as a decimal string in session_meta.usage_cursor).
+ *
+ *   - cursor null/empty/unparseable → process ALL usage.record lines.
+ *   - cursor = N (>= total)         → nothing new; no events, cursor unchanged.
+ *   - cursor = N (< total)          → process usage.record lines AFTER index N.
+ *   - BOUNDED COMPACTION FALLBACK: if the file SHRANK below the cursor (the
+ *     stream was truncated/rotated, so prior lines are gone), the cursor has
+ *     fallen off the front — process ONLY the LAST usage.record line so we
+ *     never re-emit the whole history. Mirrors extractTranscriptUsageSince.
+ *
+ * `cursor` returns the decimal string count of TOTAL usage.record lines seen,
+ * so the next fire resumes exactly past it.
+ *
+ * Per-model summation: lines are bucketed by model_id and each bucket emits one
+ * agent_usage event (incremental deltas are additive — addUsage semantics).
+ *
+ * Char-algorithmic JSONL parse (split on "\n", JSON.parse each line, skip
+ * blanks/unparseable). NO regex.
+ */
+export declare function extractKimiUsageSince(wireJsonlText: string, cursor: string | null): {
+    events: SessionEvent[];
+    cursor: string | null;
+};