context-mode 1.0.166 → 1.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.codex-plugin/plugin.json +1 -1
  4. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  5. package/.openclaw-plugin/package.json +1 -1
  6. package/README.md +6 -4
  7. package/build/adapters/codex/usage.d.ts +107 -0
  8. package/build/adapters/codex/usage.js +227 -0
  9. package/build/adapters/gemini-cli/hooks.d.ts +7 -1
  10. package/build/adapters/gemini-cli/hooks.js +9 -1
  11. package/build/adapters/gemini-cli/index.js +11 -0
  12. package/build/adapters/kimi/paths.d.ts +20 -0
  13. package/build/adapters/kimi/paths.js +41 -1
  14. package/build/adapters/kimi/usage.d.ts +82 -0
  15. package/build/adapters/kimi/usage.js +217 -0
  16. package/build/adapters/omp/plugin.d.ts +6 -0
  17. package/build/adapters/omp/plugin.js +87 -2
  18. package/build/adapters/omp/usage.d.ts +49 -0
  19. package/build/adapters/omp/usage.js +110 -0
  20. package/build/adapters/openclaw/plugin.d.ts +10 -0
  21. package/build/adapters/openclaw/plugin.js +57 -0
  22. package/build/adapters/openclaw/usage.d.ts +34 -0
  23. package/build/adapters/openclaw/usage.js +52 -0
  24. package/build/adapters/opencode/plugin.d.ts +17 -0
  25. package/build/adapters/opencode/plugin.js +40 -1
  26. package/build/adapters/pi/extension.js +34 -1
  27. package/build/adapters/qwen-code/index.js +23 -1
  28. package/build/adapters/qwen-code/usage.d.ts +90 -0
  29. package/build/adapters/qwen-code/usage.js +222 -0
  30. package/build/session/analytics.js +30 -0
  31. package/build/session/db.d.ts +11 -0
  32. package/build/session/db.js +33 -0
  33. package/build/session/extract.d.ts +224 -0
  34. package/build/session/extract.js +705 -62
  35. package/build/session/model-prices.json +429 -0
  36. package/build/session/pricing.d.ts +64 -0
  37. package/build/session/pricing.js +151 -0
  38. package/cli.bundle.mjs +177 -170
  39. package/configs/antigravity-cli/plugin.json +1 -1
  40. package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
  41. package/configs/gemini-cli/settings.json +11 -0
  42. package/hooks/codex/stop.mjs +91 -4
  43. package/hooks/gemini-cli/aftermodel.mjs +70 -0
  44. package/hooks/kimi/stop.mjs +74 -3
  45. package/hooks/qwen-code/platform.mjs +1 -0
  46. package/hooks/qwen-code/stop.mjs +168 -0
  47. package/hooks/session-db.bundle.mjs +7 -7
  48. package/hooks/session-extract.bundle.mjs +3 -2
  49. package/hooks/session-loaders.mjs +16 -1
  50. package/hooks/stop.mjs +35 -2
  51. package/openclaw.plugin.json +1 -1
  52. package/package.json +1 -1
  53. package/server.bundle.mjs +108 -101
@@ -6,14 +6,14 @@
6
6
  },
7
7
  "metadata": {
8
8
  "description": "Claude Code plugins by Mert Koseoğlu",
9
- "version": "1.0.166"
9
+ "version": "1.0.168"
10
10
  },
11
11
  "plugins": [
12
12
  {
13
13
  "name": "context-mode",
14
14
  "source": "./",
15
15
  "description": "Claude Code MCP plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
16
- "version": "1.0.166",
16
+ "version": "1.0.168",
17
17
  "author": {
18
18
  "name": "Mert Koseoğlu"
19
19
  },
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.166",
3
+ "version": "1.0.168",
4
4
  "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.166",
3
+ "version": "1.0.168",
4
4
  "description": "MCP server that saves 98% of your context window with session continuity. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and automatic state restore across compactions.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
@@ -3,7 +3,7 @@
3
3
  "name": "Context Mode",
4
4
  "kind": "tool",
5
5
  "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
6
- "version": "1.0.166",
6
+ "version": "1.0.168",
7
7
  "sandbox": {
8
8
  "mode": "permissive",
9
9
  "filesystem_access": "full",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "context-mode",
3
- "version": "1.0.166",
3
+ "version": "1.0.168",
4
4
  "description": "OpenClaw plugin that saves 98% of your context window. Sandboxed code execution in 11 languages, FTS5 knowledge base with BM25 ranking, and intent-driven search.",
5
5
  "author": {
6
6
  "name": "Mert Koseoğlu",
package/README.md CHANGED
@@ -972,15 +972,15 @@ Full configs: [`configs/kiro/mcp.json`](configs/kiro/mcp.json) | [`configs/kiro/
972
972
  {
973
973
  "context_servers": {
974
974
  "context-mode": {
975
- "command": {
976
- "path": "context-mode"
977
- }
975
+ "command": "context-mode",
976
+ "args": [],
977
+ "env": {}
978
978
  }
979
979
  }
980
980
  }
981
981
  ```
982
982
 
983
- Note: Zed uses `"context_servers"` and `"command": { "path": "..." }` syntax, not `"mcpServers"` or `"command": "..."` like other platforms.
983
+ Note: Zed uses `"context_servers"` instead of `"mcpServers"`. `args` and `env` are optional for context-mode, but are shown here to match Zed's custom MCP server shape.
984
984
 
985
985
  3. Copy routing instructions (Zed has no hook support):
986
986
 
@@ -1067,6 +1067,8 @@ Full configs: [`configs/kiro/mcp.json`](configs/kiro/mcp.json) | [`configs/kiro/
1067
1067
 
1068
1068
  Both should show `context-mode` as `enabled`.
1069
1069
 
1070
+ > The plugin self-registers its MCP server in `~/.omp/agent/mcp.json` on first load (spawned as `node <plugin>/server.bundle.mjs`, since the plugin-install package directory is not on `PATH`), so the 11 `ctx_*` tools become reachable after the restart in step 2 — no manual `mcp.json` edit needed ([#677](https://github.com/mksglu/context-mode/issues/677)). An existing `context-mode` entry is never overwritten; remove it if you want the plugin to re-register the bundled path.
1071
+
1070
1072
  **Install — manual plugin path (if `omp plugin install` is unavailable):**
1071
1073
 
1072
1074
  OMP loads anything listed under `~/.omp/plugins/package.json` `dependencies` whose own `package.json` carries an `omp` (or `pi`) field. New plugins default to enabled — the lock file at `~/.omp/plugins/omp-plugins.lock.json` is only consulted when a plugin needs to be explicitly **disabled** (loader skips `runtimeState && !runtimeState.enabled` per [`extensibility/plugins/loader.ts:89-94`](https://github.com/can1357/oh-my-pi/blob/main/packages/coding-agent/src/extensibility/plugins/loader.ts)). So the manual install is two commands:
@@ -0,0 +1,107 @@
1
+ /**
2
+ * adapters/codex/usage — Codex CLI per-turn token + cost capture.
3
+ *
4
+ * ── Feasibility (verified empirically against live rollout files) ───────────
5
+ * Codex persists a SESSION ROLLOUT transcript at
6
+ * $CODEX_HOME/sessions/YYYY/MM/DD/rollout-<ts>-<session_id>.jsonl
7
+ * (default $CODEX_HOME = ~/.codex). Each line is a JSON record
8
+ * { timestamp, type, payload }
9
+ * with `type` ∈ { session_meta, turn_context, response_item, event_msg }.
10
+ *
11
+ * The adapter-matrix (docs/prds/2026-06-paid-observability/adapter-matrix/
12
+ * codex.md) correctly notes that token usage is carried by
13
+ * `EventMsg::TokenCount(TokenCountEvent)` (codex-rs protocol.rs:1276) and is
14
+ * NOT on any hook payload (hooks carry `model` only). What the matrix does not
15
+ * state — and what makes a file-tail feasible — is that codex ALSO PERSISTS
16
+ * those EventMsgs to the rollout JSONL as `type:"event_msg"` records. The
17
+ * `token_count` payload mirrors `TokenCountEvent` 1:1:
18
+ *
19
+ * {
20
+ * "type": "event_msg",
21
+ * "payload": {
22
+ * "type": "token_count",
23
+ * "info": null | { // Option<TokenUsageInfo>
24
+ * "total_token_usage": TokenUsage, // CUMULATIVE session sum (protocol.rs:2015)
25
+ * "last_token_usage": TokenUsage, // INCREMENTAL last turn (protocol.rs:2016)
26
+ * "model_context_window": number | null
27
+ * },
28
+ * "rate_limits": { ... }
29
+ * }
30
+ * }
31
+ *
32
+ * where TokenUsage (protocol.rs:2000) is the full OpenAI usage shape:
33
+ * { input_tokens, cached_input_tokens, output_tokens,
34
+ * reasoning_output_tokens, total_tokens }.
35
+ *
36
+ * `info` is `null` until a turn COMPLETES (the initial session-start
37
+ * token_count and any turn that is interrupted/aborted carry `info:null`);
38
+ * a completed turn carries a populated `info`. We therefore read
39
+ * `info.last_token_usage` and SKIP records whose `info` is null.
40
+ *
41
+ * ── Incremental vs Cumulative (protocol.rs:2049-2052) ───────────────────────
42
+ * append_last_usage(): total += last (cumulative); last = last (incremental)
43
+ * We use `last_token_usage` as the per-turn delta — summing it across the new
44
+ * turns since the cursor gives the exact NEW spend, with no double-count. We
45
+ * deliberately do NOT use `total_token_usage` (it is the running cumulative sum
46
+ * and would re-count every prior turn on each read).
47
+ *
48
+ * ── Field mapping (codex TokenUsage → AgentUsageCounts) ─────────────────────
49
+ * input_tokens → input_tokens
50
+ * cached_input_tokens → cache_read_tokens (== OpenAI cached_tokens)
51
+ * output_tokens + reasoning_output_tokens → output_tokens
52
+ * (reasoning is billed as output; fold it in)
53
+ * model_id: from the most recent turn_context.model (protocol.rs:1977 /
54
+ * ThreadSettingsSnapshot.model), falling back to session_meta.model
55
+ * when no turn_context precedes the event.
56
+ *
57
+ * Codex carries NO native USD on the rollout — cost is derived downstream by
58
+ * buildAgentUsageEvent's pricing catalog (native_cost_usd omitted).
59
+ *
60
+ * Pure, null-safe, algorithmic. NO regex.
61
+ */
62
+ import { type AgentUsageCounts, type SessionEvent } from "../../session/extract.js";
63
+ /**
64
+ * Map one codex `token_count` payload's `info.last_token_usage` to the
65
+ * buildAgentUsageEvent input shape, or null when there is nothing to record.
66
+ *
67
+ * @param payload the `event_msg.payload` object (payload.type === "token_count")
68
+ * @param modelId model resolved from the enclosing turn_context/session_meta
69
+ *
70
+ * Returns null when:
71
+ * - payload is not a token_count, or
72
+ * - info is null/absent (session-start ping or interrupted turn), or
73
+ * - last_token_usage sums to zero across every billable bucket.
74
+ */
75
+ export declare function parseCodexUsage(payload: unknown, modelId: string): AgentUsageCounts | null;
76
+ /**
77
+ * Cursor-aware codex rollout reader for the Stop hook.
78
+ *
79
+ * The rollout grows every turn and the forward loop forwards ALL passed events
80
+ * unconditionally, so re-summing the whole rollout each Stop would double-count
81
+ * every prior turn. This walks only the `token_count` records NEW since the
82
+ * last Stop, keyed by a per-session high-water cursor.
83
+ *
84
+ * The cursor is the 0-based LINE INDEX of the last `token_count` record we have
85
+ * already processed (stored as a decimal string in the usage_cursor column).
86
+ * Line index is a stable monotonic key here because codex APPENDS to the
87
+ * rollout (it never rewrites/compacts a line in place), so a prior line's
88
+ * position never shifts.
89
+ *
90
+ * - sinceCursor null/empty → process ALL token_count records.
91
+ * - sinceCursor = "N" → process only token_count records at line idx > N.
92
+ *
93
+ * Model resolution: we track the most-recent `model` seen on any preceding
94
+ * `turn_context` (protocol.rs:1977) or `session_meta`; each token_count is
95
+ * attributed to that model. Sums are grouped per-model and emitted via the
96
+ * shared buildAgentUsageEvent path (so a session that switches models mid-run
97
+ * yields one agent_usage event per model for the new slice).
98
+ *
99
+ * `cursor` returns the line index of the LAST line in the rollout (string),
100
+ * so the next Stop resumes strictly past it. When the rollout is empty/
101
+ * unparseable, the input cursor is returned unchanged. Same linear JSONL walk,
102
+ * JSON.parse per line, NO regex.
103
+ */
104
+ export declare function extractCodexUsageSince(rollout: string, sinceCursor: string | null): {
105
+ events: SessionEvent[];
106
+ cursor: string | null;
107
+ };
@@ -0,0 +1,227 @@
1
+ /**
2
+ * adapters/codex/usage — Codex CLI per-turn token + cost capture.
3
+ *
4
+ * ── Feasibility (verified empirically against live rollout files) ───────────
5
+ * Codex persists a SESSION ROLLOUT transcript at
6
+ * $CODEX_HOME/sessions/YYYY/MM/DD/rollout-<ts>-<session_id>.jsonl
7
+ * (default $CODEX_HOME = ~/.codex). Each line is a JSON record
8
+ * { timestamp, type, payload }
9
+ * with `type` ∈ { session_meta, turn_context, response_item, event_msg }.
10
+ *
11
+ * The adapter-matrix (docs/prds/2026-06-paid-observability/adapter-matrix/
12
+ * codex.md) correctly notes that token usage is carried by
13
+ * `EventMsg::TokenCount(TokenCountEvent)` (codex-rs protocol.rs:1276) and is
14
+ * NOT on any hook payload (hooks carry `model` only). What the matrix does not
15
+ * state — and what makes a file-tail feasible — is that codex ALSO PERSISTS
16
+ * those EventMsgs to the rollout JSONL as `type:"event_msg"` records. The
17
+ * `token_count` payload mirrors `TokenCountEvent` 1:1:
18
+ *
19
+ * {
20
+ * "type": "event_msg",
21
+ * "payload": {
22
+ * "type": "token_count",
23
+ * "info": null | { // Option<TokenUsageInfo>
24
+ * "total_token_usage": TokenUsage, // CUMULATIVE session sum (protocol.rs:2015)
25
+ * "last_token_usage": TokenUsage, // INCREMENTAL last turn (protocol.rs:2016)
26
+ * "model_context_window": number | null
27
+ * },
28
+ * "rate_limits": { ... }
29
+ * }
30
+ * }
31
+ *
32
+ * where TokenUsage (protocol.rs:2000) is the full OpenAI usage shape:
33
+ * { input_tokens, cached_input_tokens, output_tokens,
34
+ * reasoning_output_tokens, total_tokens }.
35
+ *
36
+ * `info` is `null` until a turn COMPLETES (the initial session-start
37
+ * token_count and any turn that is interrupted/aborted carry `info:null`);
38
+ * a completed turn carries a populated `info`. We therefore read
39
+ * `info.last_token_usage` and SKIP records whose `info` is null.
40
+ *
41
+ * ── Incremental vs Cumulative (protocol.rs:2049-2052) ───────────────────────
42
+ * append_last_usage(): total += last (cumulative); last = last (incremental)
43
+ * We use `last_token_usage` as the per-turn delta — summing it across the new
44
+ * turns since the cursor gives the exact NEW spend, with no double-count. We
45
+ * deliberately do NOT use `total_token_usage` (it is the running cumulative sum
46
+ * and would re-count every prior turn on each read).
47
+ *
48
+ * ── Field mapping (codex TokenUsage → AgentUsageCounts) ─────────────────────
49
+ * input_tokens → input_tokens
50
+ * cached_input_tokens → cache_read_tokens (== OpenAI cached_tokens)
51
+ * output_tokens + reasoning_output_tokens → output_tokens
52
+ * (reasoning is billed as output; fold it in)
53
+ * model_id: from the most recent turn_context.model (protocol.rs:1977 /
54
+ * ThreadSettingsSnapshot.model), falling back to session_meta.model
55
+ * when no turn_context precedes the event.
56
+ *
57
+ * Codex carries NO native USD on the rollout — cost is derived downstream by
58
+ * buildAgentUsageEvent's pricing catalog (native_cost_usd omitted).
59
+ *
60
+ * Pure, null-safe, algorithmic. NO regex.
61
+ */
62
+ import { buildAgentUsageEvent, } from "../../session/extract.js";
63
+ /** Codex incremental TokenUsage (protocol.rs:2000). All fields optional/defensive. */
64
+ function toNum(v) {
65
+ return typeof v === "number" && Number.isFinite(v) ? v : 0;
66
+ }
67
+ /**
68
+ * Map one codex `token_count` payload's `info.last_token_usage` to the
69
+ * buildAgentUsageEvent input shape, or null when there is nothing to record.
70
+ *
71
+ * @param payload the `event_msg.payload` object (payload.type === "token_count")
72
+ * @param modelId model resolved from the enclosing turn_context/session_meta
73
+ *
74
+ * Returns null when:
75
+ * - payload is not a token_count, or
76
+ * - info is null/absent (session-start ping or interrupted turn), or
77
+ * - last_token_usage sums to zero across every billable bucket.
78
+ */
79
+ export function parseCodexUsage(payload, modelId) {
80
+ if (!payload || typeof payload !== "object")
81
+ return null;
82
+ const p = payload;
83
+ if (typeof p.type === "string" && p.type !== "token_count")
84
+ return null;
85
+ const info = p.info;
86
+ if (!info || typeof info !== "object")
87
+ return null; // no completed-turn usage
88
+ const last = info.last_token_usage;
89
+ if (!last || typeof last !== "object")
90
+ return null;
91
+ const u = last;
92
+ const input_tokens = toNum(u.input_tokens);
93
+ // OpenAI cached_tokens == codex cached_input_tokens == our cache-read bucket.
94
+ const cache_read_tokens = toNum(u.cached_input_tokens);
95
+ // reasoning is billed as output → fold reasoning_output_tokens into output.
96
+ const output_tokens = toNum(u.output_tokens) + toNum(u.reasoning_output_tokens);
97
+ // Codex has no separate cache-CREATION bucket (cached_input_tokens is a read
98
+ // hit count, not a write). Leave cache_creation_tokens at 0.
99
+ const cache_creation_tokens = 0;
100
+ if (input_tokens <= 0 &&
101
+ output_tokens <= 0 &&
102
+ cache_read_tokens <= 0) {
103
+ return null;
104
+ }
105
+ return {
106
+ model_id: typeof modelId === "string" ? modelId : "",
107
+ input_tokens,
108
+ output_tokens,
109
+ cache_creation_tokens,
110
+ cache_read_tokens,
111
+ native_cost_usd: null, // codex rollout carries no native USD; catalog derives
112
+ };
113
+ }
114
+ /**
115
+ * Cursor-aware codex rollout reader for the Stop hook.
116
+ *
117
+ * The rollout grows every turn and the forward loop forwards ALL passed events
118
+ * unconditionally, so re-summing the whole rollout each Stop would double-count
119
+ * every prior turn. This walks only the `token_count` records NEW since the
120
+ * last Stop, keyed by a per-session high-water cursor.
121
+ *
122
+ * The cursor is the 0-based LINE INDEX of the last `token_count` record we have
123
+ * already processed (stored as a decimal string in the usage_cursor column).
124
+ * Line index is a stable monotonic key here because codex APPENDS to the
125
+ * rollout (it never rewrites/compacts a line in place), so a prior line's
126
+ * position never shifts.
127
+ *
128
+ * - sinceCursor null/empty → process ALL token_count records.
129
+ * - sinceCursor = "N" → process only token_count records at line idx > N.
130
+ *
131
+ * Model resolution: we track the most-recent `model` seen on any preceding
132
+ * `turn_context` (protocol.rs:1977) or `session_meta`; each token_count is
133
+ * attributed to that model. Sums are grouped per-model and emitted via the
134
+ * shared buildAgentUsageEvent path (so a session that switches models mid-run
135
+ * yields one agent_usage event per model for the new slice).
136
+ *
137
+ * `cursor` returns the line index of the LAST line in the rollout (string),
138
+ * so the next Stop resumes strictly past it. When the rollout is empty/
139
+ * unparseable, the input cursor is returned unchanged. Same linear JSONL walk,
140
+ * JSON.parse per line, NO regex.
141
+ */
142
+ export function extractCodexUsageSince(rollout, sinceCursor) {
143
+ const inputCursor = typeof sinceCursor === "string" && sinceCursor.length > 0 ? sinceCursor : null;
144
+ if (typeof rollout !== "string" || rollout.length === 0) {
145
+ return { events: [], cursor: inputCursor };
146
+ }
147
+ // Parse the cursor as a line-index high-water mark. NaN/garbage → process all.
148
+ let sinceIdx = -1;
149
+ if (inputCursor !== null) {
150
+ const parsed = Number.parseInt(inputCursor, 10);
151
+ if (Number.isInteger(parsed) && parsed >= 0)
152
+ sinceIdx = parsed;
153
+ }
154
+ // Split into physical lines. A trailing newline yields a final empty line we
155
+ // skip; the surviving line index is preserved so the cursor stays stable.
156
+ const lines = rollout.split("\n");
157
+ let currentModel = "";
158
+ let lastLineIdx = -1; // last NON-EMPTY parseable line index (the new cursor)
159
+ // Per-model sums over the NEW slice.
160
+ const sums = new Map();
161
+ for (let i = 0; i < lines.length; i++) {
162
+ const line = lines[i];
163
+ if (line.length === 0)
164
+ continue;
165
+ let obj;
166
+ try {
167
+ obj = JSON.parse(line);
168
+ }
169
+ catch {
170
+ continue; // tolerate a partially-flushed final line
171
+ }
172
+ if (!obj || typeof obj !== "object")
173
+ continue;
174
+ lastLineIdx = i;
175
+ const rec = obj;
176
+ const recType = typeof rec.type === "string" ? rec.type : "";
177
+ const payload = rec.payload && typeof rec.payload === "object"
178
+ ? rec.payload
179
+ : null;
180
+ // Track model from the most recent turn_context / session_meta.
181
+ if (recType === "turn_context" && payload) {
182
+ const m = payload.model;
183
+ if (typeof m === "string" && m.length > 0)
184
+ currentModel = m;
185
+ continue;
186
+ }
187
+ if (recType === "session_meta" && payload) {
188
+ const m = payload.model;
189
+ if (typeof m === "string" && m.length > 0)
190
+ currentModel = m;
191
+ continue;
192
+ }
193
+ if (recType !== "event_msg" || !payload)
194
+ continue;
195
+ if (payload.type !== "token_count")
196
+ continue;
197
+ // Cursor gate: only token_count records strictly past the high-water mark.
198
+ if (i <= sinceIdx)
199
+ continue;
200
+ const counts = parseCodexUsage(payload, currentModel);
201
+ if (!counts)
202
+ continue; // info:null (session ping / aborted) or zero usage
203
+ const key = counts.model_id;
204
+ const cur = sums.get(key) ?? { input: 0, output: 0, cacheRead: 0 };
205
+ cur.input += counts.input_tokens;
206
+ cur.output += counts.output_tokens;
207
+ cur.cacheRead += counts.cache_read_tokens;
208
+ sums.set(key, cur);
209
+ }
210
+ // Cursor advances to the last parseable line regardless of whether it carried
211
+ // usage, so we never re-scan settled lines next Stop. If nothing parsed, hold
212
+ // the input cursor.
213
+ const cursor = lastLineIdx >= 0 ? String(lastLineIdx) : inputCursor;
214
+ const events = [];
215
+ for (const [model, s] of sums) {
216
+ const ev = buildAgentUsageEvent({
217
+ model_id: model,
218
+ input_tokens: s.input,
219
+ output_tokens: s.output,
220
+ cache_creation_tokens: 0,
221
+ cache_read_tokens: s.cacheRead,
222
+ });
223
+ if (ev)
224
+ events.push(ev);
225
+ }
226
+ return { events, cursor };
227
+ }
@@ -10,7 +10,12 @@
10
10
  * Gemini CLI hook system reference:
11
11
  * - Hooks are registered in ~/.gemini/settings.json under "hooks" key
12
12
  * - Each hook type maps to an array of { matcher, hooks } entries
13
- * - Hook names: BeforeAgent, BeforeTool, AfterTool, PreCompress, SessionStart
13
+ * - Hook names: BeforeAgent, BeforeTool, AfterTool, AfterModel, PreCompress, SessionStart
14
+ * - AfterModel fires per model call inside the stream loop
15
+ * (packages/core/src/core/geminiChat.ts:1213); payload carries
16
+ * llm_request + llm_response (hooks/types.ts:692-695) whose
17
+ * usageMetadata + resolved model drive per-turn token/cost capture
18
+ * (refs: docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md).
14
19
  * - Input: JSON on stdin
15
20
  * - Output: JSON on stdout (or empty for passthrough)
16
21
  * - BeforeAgent fires when user submits a prompt — input.prompt carries
@@ -23,6 +28,7 @@ export declare const HOOK_TYPES: {
23
28
  readonly BEFORE_AGENT: "BeforeAgent";
24
29
  readonly BEFORE_TOOL: "BeforeTool";
25
30
  readonly AFTER_TOOL: "AfterTool";
31
+ readonly AFTER_MODEL: "AfterModel";
26
32
  readonly PRE_COMPRESS: "PreCompress";
27
33
  readonly SESSION_START: "SessionStart";
28
34
  };
@@ -11,7 +11,12 @@ import { buildHookRuntimeCommand } from "../types.js";
11
11
  * Gemini CLI hook system reference:
12
12
  * - Hooks are registered in ~/.gemini/settings.json under "hooks" key
13
13
  * - Each hook type maps to an array of { matcher, hooks } entries
14
- * - Hook names: BeforeAgent, BeforeTool, AfterTool, PreCompress, SessionStart
14
+ * - Hook names: BeforeAgent, BeforeTool, AfterTool, AfterModel, PreCompress, SessionStart
15
+ * - AfterModel fires per model call inside the stream loop
16
+ * (packages/core/src/core/geminiChat.ts:1213); payload carries
17
+ * llm_request + llm_response (hooks/types.ts:692-695) whose
18
+ * usageMetadata + resolved model drive per-turn token/cost capture
19
+ * (refs: docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md).
15
20
  * - Input: JSON on stdin
16
21
  * - Output: JSON on stdout (or empty for passthrough)
17
22
  * - BeforeAgent fires when user submits a prompt — input.prompt carries
@@ -27,6 +32,7 @@ export const HOOK_TYPES = {
27
32
  BEFORE_AGENT: "BeforeAgent",
28
33
  BEFORE_TOOL: "BeforeTool",
29
34
  AFTER_TOOL: "AfterTool",
35
+ AFTER_MODEL: "AfterModel",
30
36
  PRE_COMPRESS: "PreCompress",
31
37
  SESSION_START: "SessionStart",
32
38
  };
@@ -57,6 +63,7 @@ export const HOOK_SCRIPTS = {
57
63
  [HOOK_TYPES.BEFORE_AGENT]: "beforeagent.mjs",
58
64
  [HOOK_TYPES.BEFORE_TOOL]: "beforetool.mjs",
59
65
  [HOOK_TYPES.AFTER_TOOL]: "aftertool.mjs",
66
+ [HOOK_TYPES.AFTER_MODEL]: "aftermodel.mjs",
60
67
  [HOOK_TYPES.PRE_COMPRESS]: "precompress.mjs",
61
68
  [HOOK_TYPES.SESSION_START]: "sessionstart.mjs",
62
69
  };
@@ -71,6 +78,7 @@ export const REQUIRED_HOOKS = [
71
78
  /** Optional hooks that enhance functionality but aren't critical. */
72
79
  export const OPTIONAL_HOOKS = [
73
80
  HOOK_TYPES.AFTER_TOOL,
81
+ HOOK_TYPES.AFTER_MODEL,
74
82
  HOOK_TYPES.PRE_COMPRESS,
75
83
  ];
76
84
  /**
@@ -200,6 +200,17 @@ export class GeminiCLIAdapter extends BaseAdapter {
200
200
  ],
201
201
  },
202
202
  ],
203
+ [GEMINI_HOOK_NAMES.AFTER_MODEL]: [
204
+ {
205
+ matcher: "",
206
+ hooks: [
207
+ {
208
+ type: "command",
209
+ command: buildGeminiHookCommand(GEMINI_HOOK_NAMES.AFTER_MODEL, pluginRoot),
210
+ },
211
+ ],
212
+ },
213
+ ],
203
214
  [GEMINI_HOOK_NAMES.PRE_COMPRESS]: [
204
215
  {
205
216
  matcher: "",
@@ -1 +1,21 @@
1
1
  export declare function resolveKimiConfigDir(): string;
2
+ /**
3
+ * Best-effort resolution of the `<sessionDir>/wire.jsonl` file for a given
4
+ * Kimi Code session id.
5
+ *
6
+ * Ground truth (adapter-matrix/kimi.md): the usage stream is persisted at
7
+ * `<sessionDir>/wire.jsonl` —
8
+ * refs/platforms/kimi-code/packages/agent-core/src/agent/index.ts:142
9
+ * new FileSystemAgentRecordPersistence(join(options.homedir, 'wire.jsonl'), ...)
10
+ * where `options.homedir` is the agent's per-session directory.
11
+ *
12
+ * NOTE / WIRE GAP: the exact on-disk mapping from `session_id` → `sessionDir`
13
+ * is NOT carried in the hook stdin payload, and the kimi-code refs are not
14
+ * checked out in this worktree to confirm the session-store directory layout
15
+ * (session/store/session-store.ts:278,316 are cited but unverifiable here). The
16
+ * candidate layouts below cover the documented patterns; this resolver returns
17
+ * the FIRST candidate whose `wire.jsonl` actually exists on disk, else null —
18
+ * so the Stop hook degrades to a no-op rather than guessing wrong. When the
19
+ * refs land, pin the exact layout and drop the fallback list.
20
+ */
21
+ export declare function resolveKimiWireJsonlPath(sessionId: string): string | null;
@@ -1,5 +1,6 @@
1
1
  import { homedir } from "node:os";
2
- import { resolve } from "node:path";
2
+ import { existsSync } from "node:fs";
3
+ import { resolve, join } from "node:path";
3
4
  export function resolveKimiConfigDir() {
4
5
  const envVal = process.env.KIMI_CODE_HOME;
5
6
  if (envVal) {
@@ -10,3 +11,42 @@ export function resolveKimiConfigDir() {
10
11
  }
11
12
  return resolve(homedir(), ".kimi-code");
12
13
  }
14
+ /**
15
+ * Best-effort resolution of the `<sessionDir>/wire.jsonl` file for a given
16
+ * Kimi Code session id.
17
+ *
18
+ * Ground truth (adapter-matrix/kimi.md): the usage stream is persisted at
19
+ * `<sessionDir>/wire.jsonl` —
20
+ * refs/platforms/kimi-code/packages/agent-core/src/agent/index.ts:142
21
+ * new FileSystemAgentRecordPersistence(join(options.homedir, 'wire.jsonl'), ...)
22
+ * where `options.homedir` is the agent's per-session directory.
23
+ *
24
+ * NOTE / WIRE GAP: the exact on-disk mapping from `session_id` → `sessionDir`
25
+ * is NOT carried in the hook stdin payload, and the kimi-code refs are not
26
+ * checked out in this worktree to confirm the session-store directory layout
27
+ * (session/store/session-store.ts:278,316 are cited but unverifiable here). The
28
+ * candidate layouts below cover the documented patterns; this resolver returns
29
+ * the FIRST candidate whose `wire.jsonl` actually exists on disk, else null —
30
+ * so the Stop hook degrades to a no-op rather than guessing wrong. When the
31
+ * refs land, pin the exact layout and drop the fallback list.
32
+ */
33
+ export function resolveKimiWireJsonlPath(sessionId) {
34
+ if (typeof sessionId !== "string" || sessionId.length === 0)
35
+ return null;
36
+ const configDir = resolveKimiConfigDir();
37
+ const candidates = [
38
+ join(configDir, "sessions", sessionId, "wire.jsonl"),
39
+ join(configDir, "agents", sessionId, "wire.jsonl"),
40
+ join(configDir, sessionId, "wire.jsonl"),
41
+ ];
42
+ for (const candidate of candidates) {
43
+ try {
44
+ if (existsSync(candidate))
45
+ return candidate;
46
+ }
47
+ catch {
48
+ // unreadable candidate — try the next.
49
+ }
50
+ }
51
+ return null;
52
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Kimi Code (kimi-code) per-turn token usage capture.
3
+ *
4
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
5
+ * adapter-matrix/kimi.md (+ cited refs/platforms/kimi-code/...).
6
+ *
7
+ * Kimi Code emits REAL per-turn token usage + model, but ONLY on the
8
+ * `wire.jsonl` records stream — NOT through any hook stdin payload. Each usage
9
+ * line is an AgentRecord of `type: "usage.record"` carrying a normalized
10
+ * four-field Moonshot/OpenAI-compatible `TokenUsage` plus the model id:
11
+ *
12
+ * refs/platforms/kimi-code/packages/agent-core/src/agent/usage/index.ts:27-32
13
+ * — this.agent.records.logRecord({ type: 'usage.record', model, usage, usageScope })
14
+ * refs/platforms/kimi-code/packages/agent-core/src/agent/records/types.ts:59-63
15
+ * — record shape { model: string; usage: TokenUsage; usageScope?: UsageRecordScope }
16
+ * refs/platforms/kimi-code/packages/agent-core/src/agent/index.ts:142
17
+ * — new FileSystemAgentRecordPersistence(join(options.homedir, 'wire.jsonl'), ...)
18
+ * => the persisted file is <sessionDir>/wire.jsonl.
19
+ *
20
+ * Normalized TokenUsage (kosong/src/usage.ts:7-13; parsed by
21
+ * kosong/src/providers/openai-common.ts:213-241):
22
+ * { inputOther, output, inputCacheRead, inputCacheCreation }
23
+ *
24
+ * Mapping → buildAgentUsageEvent input shape:
25
+ * inputOther → input_tokens (prompt - cached)
26
+ * output → output_tokens
27
+ * inputCacheRead → cache_read_tokens
28
+ * inputCacheCreation → cache_creation_tokens
29
+ * record.model → model_id
30
+ *
31
+ * INCREMENTAL: usage.record lines are per-step deltas (summed via addUsage;
32
+ * usage/index.ts:34,37). The cumulative total exists only in-memory, never on
33
+ * disk — so cost capture sums the NEW delta lines per model since a cursor.
34
+ *
35
+ * Native cost: kimi-code's TokenUsage carries NO USD cost field (verified
36
+ * against the matrix doc field list — only token counts). So native_cost_usd
37
+ * is left null and buildAgentUsageEvent falls back to the pricing catalog.
38
+ *
39
+ * Pure, null-safe, algorithmic — NO regex.
40
+ */
41
+ import { type AgentUsageCounts, type SessionEvent } from "../../session/extract.js";
42
+ /**
43
+ * Parse ONE kimi-code `usage.record` line object into the buildAgentUsageEvent
44
+ * input shape, or null when it is not a usage record / carries no usage /
45
+ * every token bucket is zero.
46
+ *
47
+ * Accepts the parsed AgentRecord object (NOT the raw JSONL string). Tolerant of
48
+ * the record being passed either as the full stamped record `{ type, model,
49
+ * usage, ... }` or a bare `{ model, usage }`.
50
+ */
51
+ export declare function parseKimiUsage(record: unknown): AgentUsageCounts | null;
52
+ /**
53
+ * Cursor-aware wire.jsonl reader for the Stop / SessionEnd hook.
54
+ *
55
+ * `wire.jsonl` is an append-only records stream that grows every turn; the
56
+ * forward loop forwards ALL passed events unconditionally, so re-summing the
57
+ * whole file each hook fire would double-count every prior turn. This sums only
58
+ * the `usage.record` lines NEW since the last fire, keyed by a per-session
59
+ * high-water cursor (a 1-based count of usage.record lines consumed so far,
60
+ * serialized as a decimal string in session_meta.usage_cursor).
61
+ *
62
+ * - cursor null/empty/unparseable → process ALL usage.record lines.
63
+ * - cursor = N (>= total) → nothing new; no events, cursor unchanged.
64
+ * - cursor = N (< total) → process usage.record lines AFTER index N.
65
+ * - BOUNDED COMPACTION FALLBACK: if the file SHRANK below the cursor (the
66
+ * stream was truncated/rotated, so prior lines are gone), the cursor has
67
+ * fallen off the front — process ONLY the LAST usage.record line so we
68
+ * never re-emit the whole history. Mirrors extractTranscriptUsageSince.
69
+ *
70
+ * `cursor` returns the decimal string count of TOTAL usage.record lines seen,
71
+ * so the next fire resumes exactly past it.
72
+ *
73
+ * Per-model summation: lines are bucketed by model_id and each bucket emits one
74
+ * agent_usage event (incremental deltas are additive — addUsage semantics).
75
+ *
76
+ * Char-algorithmic JSONL parse (split on "\n", JSON.parse each line, skip
77
+ * blanks/unparseable). NO regex.
78
+ */
79
+ export declare function extractKimiUsageSince(wireJsonlText: string, cursor: string | null): {
80
+ events: SessionEvent[];
81
+ cursor: string | null;
82
+ };