context-mode 1.0.165 → 1.0.167

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.codex-plugin/plugin.json +1 -1
  4. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  5. package/.openclaw-plugin/package.json +1 -1
  6. package/README.md +6 -4
  7. package/build/adapters/codex/usage.d.ts +107 -0
  8. package/build/adapters/codex/usage.js +227 -0
  9. package/build/adapters/gemini-cli/hooks.d.ts +7 -1
  10. package/build/adapters/gemini-cli/hooks.js +9 -1
  11. package/build/adapters/gemini-cli/index.js +11 -0
  12. package/build/adapters/kimi/paths.d.ts +20 -0
  13. package/build/adapters/kimi/paths.js +41 -1
  14. package/build/adapters/kimi/usage.d.ts +82 -0
  15. package/build/adapters/kimi/usage.js +217 -0
  16. package/build/adapters/omp/plugin.d.ts +6 -0
  17. package/build/adapters/omp/plugin.js +87 -2
  18. package/build/adapters/omp/usage.d.ts +49 -0
  19. package/build/adapters/omp/usage.js +110 -0
  20. package/build/adapters/openclaw/plugin.d.ts +10 -0
  21. package/build/adapters/openclaw/plugin.js +57 -0
  22. package/build/adapters/openclaw/usage.d.ts +34 -0
  23. package/build/adapters/openclaw/usage.js +52 -0
  24. package/build/adapters/opencode/plugin.d.ts +17 -0
  25. package/build/adapters/opencode/plugin.js +40 -1
  26. package/build/adapters/pi/extension.js +61 -10
  27. package/build/adapters/pi/mcp-bridge.d.ts +78 -1
  28. package/build/adapters/pi/mcp-bridge.js +105 -17
  29. package/build/adapters/qwen-code/index.js +23 -1
  30. package/build/adapters/qwen-code/usage.d.ts +90 -0
  31. package/build/adapters/qwen-code/usage.js +222 -0
  32. package/build/lifecycle.d.ts +10 -0
  33. package/build/lifecycle.js +16 -1
  34. package/build/session/db.d.ts +11 -0
  35. package/build/session/db.js +33 -0
  36. package/build/session/extract.d.ts +208 -0
  37. package/build/session/extract.js +670 -43
  38. package/build/session/model-prices.json +429 -0
  39. package/build/session/pricing.d.ts +64 -0
  40. package/build/session/pricing.js +151 -0
  41. package/cli.bundle.mjs +84 -84
  42. package/configs/antigravity-cli/plugin.json +1 -1
  43. package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
  44. package/configs/gemini-cli/settings.json +11 -0
  45. package/hooks/codex/stop.mjs +91 -4
  46. package/hooks/gemini-cli/aftermodel.mjs +70 -0
  47. package/hooks/kimi/stop.mjs +74 -3
  48. package/hooks/qwen-code/platform.mjs +1 -0
  49. package/hooks/qwen-code/stop.mjs +168 -0
  50. package/hooks/session-db.bundle.mjs +7 -7
  51. package/hooks/session-extract.bundle.mjs +3 -2
  52. package/hooks/session-loaders.mjs +9 -1
  53. package/hooks/stop.mjs +35 -2
  54. package/openclaw.plugin.json +1 -1
  55. package/package.json +1 -1
  56. package/server.bundle.mjs +107 -107
@@ -0,0 +1,90 @@
1
+ /**
2
+ * adapters/qwen-code/usage — per-turn token capture from the session JSONL.
3
+ *
4
+ * Qwen Code is a Gemini-CLI fork and normalizes EVERY backend (Gemini-native,
5
+ * OpenAI-compat/DashScope, Anthropic) to the same canonical token shape:
6
+ * `GenerateContentResponseUsageMetadata` { promptTokenCount, candidatesTokenCount,
7
+ * cachedContentTokenCount, thoughtsTokenCount, totalTokenCount }
8
+ * (matrix §1: turn.ts:96,417 + converter.ts:1145-1148). That metadata is
9
+ * persisted, per API call, into the session record file as a `ChatRecord`
10
+ * carrying `.usageMetadata` + `.model`
11
+ * (refs: packages/core/src/services/chatRecordingService.ts:259,261,919 file at
12
+ * ~/.qwen/tmp/<project_id>/chats/<sessionId>.jsonl — :451 location comment,
13
+ * :600,628-629 path build).
14
+ *
15
+ * CRITICAL (matrix §4): qwen-code's hook payloads carry tool I/O ONLY — token
16
+ * usage is unreachable through the hook stream (grep of hookEventHandler.ts /
17
+ * hookSystem.ts / toolHookTriggers.ts for token|usageMetadata|usage → zero
18
+ * matches). The ONLY live capture path is a tail of the session JSONL. This
19
+ * module is therefore the JSONL-tail counterpart to claude-code's
20
+ * `extractTranscriptUsageSince` (src/session/extract.ts) — same cursor-gated,
21
+ * char-algorithmic, NO-regex parse, same `buildAgentUsageEvent` emission path.
22
+ *
23
+ * Per matrix §3 each ChatRecord.usageMetadata is INCREMENTAL per API call
24
+ * (cumulative session totals are derived downstream via += in
25
+ * uiTelemetry.ts:237-241), so summing the NEW records since the cursor yields
26
+ * the exact billed delta with no double-count.
27
+ *
28
+ * No native USD — cost_usd is derived from the pricing catalog inside
29
+ * buildAgentUsageEvent (native_cost_usd omitted). Pure, null-safe, NO regex.
30
+ */
31
+ import { type AgentUsageCounts, type SessionEvent } from "../../session/extract.js";
32
+ /**
33
+ * Parse ONE qwen `ChatRecord` into the `buildAgentUsageEvent` input shape, or
34
+ * null when the record carries no usage / sums to zero.
35
+ *
36
+ * Mapping → builder shape (AgentUsageCounts):
37
+ * promptTokenCount → input_tokens
38
+ * candidatesTokenCount → output_tokens
39
+ * thoughtsTokenCount → ADDED into output_tokens (Gemini-lineage bills
40
+ * reasoning/thoughts as output — same fold as
41
+ * parseGeminiUsage in src/session/extract.ts)
42
+ * cachedContentTokenCount → cache_read_tokens (when present)
43
+ * model_id → ChatRecord.model
44
+ *
45
+ * No native cost — native_cost_usd omitted (catalog-derived). NO regex.
46
+ */
47
+ export declare function parseQwenUsage(record: unknown): AgentUsageCounts | null;
48
+ /**
49
+ * Cursor-aware tail of the qwen session JSONL. Emits one priced `agent_usage`
50
+ * event PER distinct model across the records NEW since `cursor`, so re-reading
51
+ * the (append-only, ever-growing) JSONL each Stop never double-counts.
52
+ *
53
+ * - cursor null/empty → process ALL records.
54
+ * - cursor found → process records STRICTLY AFTER it.
55
+ * - cursor set but NOT found → compaction/rotation dropped it: bounded
56
+ * fallback processes ONLY THE LAST record (never re-emit full history).
57
+ *
58
+ * `cursor` returns the id of the LAST id-bearing record seen (whether or not it
59
+ * carried usage), so the next call resumes exactly past it. When no record
60
+ * carries an id, the input cursor is returned unchanged.
61
+ *
62
+ * One linear walk, JSON.parse per line, NO regex — mirrors
63
+ * extractTranscriptUsageSince's structure exactly.
64
+ */
65
+ export declare function extractQwenUsageSince(jsonlText: string, cursor: string | null): {
66
+ events: SessionEvent[];
67
+ cursor: string | null;
68
+ };
69
+ /**
70
+ * Hash a project root into qwen-code's `<project_id>` directory segment.
71
+ *
72
+ * EXACT port of qwen's `getProjectHash`
73
+ * (refs/platforms/qwen-code/packages/core/src/utils/paths.ts:262 —
74
+ * `crypto.createHash('sha256').update(normalizedPath).digest('hex')`). On
75
+ * Windows qwen lowercases the path first (case-insensitive FS); we mirror that
76
+ * so a hook running on win32 resolves the same tmp dir qwen itself wrote.
77
+ * Pure, deterministic, NO regex.
78
+ */
79
+ export declare function qwenProjectHash(projectRoot: string): string;
80
+ /**
81
+ * Build the canonical session JSONL path qwen-code writes its ChatRecords to:
82
+ * <qwenHome>/tmp/<sha256(projectRoot)>/chats/<sessionId>.jsonl
83
+ * (refs chatRecordingService.ts:451 location + storage.ts:316-320
84
+ * getProjectTempDir → getGlobalTempDir(<qwenHome>/tmp) + getProjectHash).
85
+ *
86
+ * `qwenHome` is normally `<homedir>/.qwen`. Pure path join — does NOT touch the
87
+ * FS, so it is fully unit-testable; existence probing + the glob fallback live
88
+ * in the Stop hook (which cannot import this TS at runtime). NO regex.
89
+ */
90
+ export declare function qwenChatJsonlPath(qwenHome: string, projectRoot: string, sessionId: string): string;
@@ -0,0 +1,222 @@
1
+ /**
2
+ * adapters/qwen-code/usage — per-turn token capture from the session JSONL.
3
+ *
4
+ * Qwen Code is a Gemini-CLI fork and normalizes EVERY backend (Gemini-native,
5
+ * OpenAI-compat/DashScope, Anthropic) to the same canonical token shape:
6
+ * `GenerateContentResponseUsageMetadata` { promptTokenCount, candidatesTokenCount,
7
+ * cachedContentTokenCount, thoughtsTokenCount, totalTokenCount }
8
+ * (matrix §1: turn.ts:96,417 + converter.ts:1145-1148). That metadata is
9
+ * persisted, per API call, into the session record file as a `ChatRecord`
10
+ * carrying `.usageMetadata` + `.model`
11
+ * (refs: packages/core/src/services/chatRecordingService.ts:259,261,919 file at
12
+ * ~/.qwen/tmp/<project_id>/chats/<sessionId>.jsonl — :451 location comment,
13
+ * :600,628-629 path build).
14
+ *
15
+ * CRITICAL (matrix §4): qwen-code's hook payloads carry tool I/O ONLY — token
16
+ * usage is unreachable through the hook stream (grep of hookEventHandler.ts /
17
+ * hookSystem.ts / toolHookTriggers.ts for token|usageMetadata|usage → zero
18
+ * matches). The ONLY live capture path is a tail of the session JSONL. This
19
+ * module is therefore the JSONL-tail counterpart to claude-code's
20
+ * `extractTranscriptUsageSince` (src/session/extract.ts) — same cursor-gated,
21
+ * char-algorithmic, NO-regex parse, same `buildAgentUsageEvent` emission path.
22
+ *
23
+ * Per matrix §3 each ChatRecord.usageMetadata is INCREMENTAL per API call
24
+ * (cumulative session totals are derived downstream via += in
25
+ * uiTelemetry.ts:237-241), so summing the NEW records since the cursor yields
26
+ * the exact billed delta with no double-count.
27
+ *
28
+ * No native USD — cost_usd is derived from the pricing catalog inside
29
+ * buildAgentUsageEvent (native_cost_usd omitted). Pure, null-safe, NO regex.
30
+ */
31
+ import { createHash } from "node:crypto";
32
+ import { join } from "node:path";
33
+ import { platform } from "node:os";
34
+ import { buildAgentUsageEvent } from "../../session/extract.js";
35
+ /** Floor-and-clamp a token field to a non-negative integer (mirrors omp/usage). */
36
+ function tokenNum(v) {
37
+ if (typeof v !== "number" || !Number.isFinite(v))
38
+ return 0;
39
+ const n = Math.floor(v);
40
+ return n > 0 ? n : 0;
41
+ }
42
+ /**
43
+ * Parse ONE qwen `ChatRecord` into the `buildAgentUsageEvent` input shape, or
44
+ * null when the record carries no usage / sums to zero.
45
+ *
46
+ * Mapping → builder shape (AgentUsageCounts):
47
+ * promptTokenCount → input_tokens
48
+ * candidatesTokenCount → output_tokens
49
+ * thoughtsTokenCount → ADDED into output_tokens (Gemini-lineage bills
50
+ * reasoning/thoughts as output — same fold as
51
+ * parseGeminiUsage in src/session/extract.ts)
52
+ * cachedContentTokenCount → cache_read_tokens (when present)
53
+ * model_id → ChatRecord.model
54
+ *
55
+ * No native cost — native_cost_usd omitted (catalog-derived). NO regex.
56
+ */
57
+ export function parseQwenUsage(record) {
58
+ if (!record || typeof record !== "object" || Array.isArray(record))
59
+ return null;
60
+ const rec = record;
61
+ const um = rec.usageMetadata;
62
+ if (!um || typeof um !== "object")
63
+ return null;
64
+ const usage = um;
65
+ const input = tokenNum(usage.promptTokenCount);
66
+ const candidates = tokenNum(usage.candidatesTokenCount);
67
+ const thoughts = tokenNum(usage.thoughtsTokenCount);
68
+ const cached = tokenNum(usage.cachedContentTokenCount);
69
+ // Gemini-lineage bills reasoning (thoughts) as output tokens — fold into output.
70
+ const output = candidates + thoughts;
71
+ // All token fields zero → not a billable record. buildAgentUsageEvent would
72
+ // also reject this, but short-circuit keeps the contract explicit.
73
+ if (input <= 0 && output <= 0 && cached <= 0)
74
+ return null;
75
+ const model_id = typeof rec.model === "string" ? rec.model : "";
76
+ return {
77
+ model_id,
78
+ input_tokens: input,
79
+ output_tokens: output,
80
+ cache_creation_tokens: 0, // qwen exposes no cache-creation field
81
+ cache_read_tokens: cached,
82
+ native_cost_usd: null, // catalog-derived (no native cost on qwen records)
83
+ };
84
+ }
85
+ /** Stable cursor identity for a ChatRecord: prefer `id`, fall back to `messageId`. */
86
+ function recordId(rec) {
87
+ if (typeof rec.id === "string" && rec.id.length > 0)
88
+ return rec.id;
89
+ if (typeof rec.messageId === "string" && rec.messageId.length > 0)
90
+ return rec.messageId;
91
+ return null;
92
+ }
93
+ /**
94
+ * Cursor-aware tail of the qwen session JSONL. Emits one priced `agent_usage`
95
+ * event PER distinct model across the records NEW since `cursor`, so re-reading
96
+ * the (append-only, ever-growing) JSONL each Stop never double-counts.
97
+ *
98
+ * - cursor null/empty → process ALL records.
99
+ * - cursor found → process records STRICTLY AFTER it.
100
+ * - cursor set but NOT found → compaction/rotation dropped it: bounded
101
+ * fallback processes ONLY THE LAST record (never re-emit full history).
102
+ *
103
+ * `cursor` returns the id of the LAST id-bearing record seen (whether or not it
104
+ * carried usage), so the next call resumes exactly past it. When no record
105
+ * carries an id, the input cursor is returned unchanged.
106
+ *
107
+ * One linear walk, JSON.parse per line, NO regex — mirrors
108
+ * extractTranscriptUsageSince's structure exactly.
109
+ */
110
+ export function extractQwenUsageSince(jsonlText, cursor) {
111
+ const inputCursor = typeof cursor === "string" && cursor.length > 0 ? cursor : null;
112
+ if (typeof jsonlText !== "string" || jsonlText.length === 0) {
113
+ return { events: [], cursor: inputCursor };
114
+ }
115
+ const rows = [];
116
+ let start = 0;
117
+ for (let i = 0; i <= jsonlText.length; i++) {
118
+ if (i !== jsonlText.length && jsonlText.charCodeAt(i) !== 10 /* \n */)
119
+ continue;
120
+ const line = jsonlText.slice(start, i).trim();
121
+ start = i + 1;
122
+ if (line.length === 0)
123
+ continue;
124
+ let obj;
125
+ try {
126
+ const p = JSON.parse(line);
127
+ if (!p || typeof p !== "object" || Array.isArray(p))
128
+ continue;
129
+ obj = p;
130
+ }
131
+ catch {
132
+ continue;
133
+ }
134
+ rows.push({ id: recordId(obj), counts: parseQwenUsage(obj) });
135
+ }
136
+ if (rows.length === 0)
137
+ return { events: [], cursor: inputCursor };
138
+ // Cursor always advances to the last id-bearing record's id (or stays as the
139
+ // input cursor when no record carries an id).
140
+ let lastId = inputCursor;
141
+ for (let i = rows.length - 1; i >= 0; i--) {
142
+ if (rows[i].id !== null) {
143
+ lastId = rows[i].id;
144
+ break;
145
+ }
146
+ }
147
+ // Select the slice to sum.
148
+ let slice;
149
+ if (inputCursor === null) {
150
+ slice = rows; // all records
151
+ }
152
+ else {
153
+ let foundAt = -1;
154
+ for (let i = 0; i < rows.length; i++) {
155
+ if (rows[i].id === inputCursor) {
156
+ foundAt = i;
157
+ break;
158
+ }
159
+ }
160
+ if (foundAt >= 0) {
161
+ slice = rows.slice(foundAt + 1); // strictly after the cursor
162
+ }
163
+ else {
164
+ // Compaction/rotation: cursor fell off the front. Bounded fallback — last
165
+ // record only. Never re-emit the whole history.
166
+ slice = rows.slice(rows.length - 1);
167
+ }
168
+ }
169
+ // Sum the selected records per model, then emit via the shared builder.
170
+ const sums = new Map();
171
+ for (const row of slice) {
172
+ const c = row.counts;
173
+ if (!c)
174
+ continue;
175
+ const cur = sums.get(c.model_id) ?? { input: 0, output: 0, cacheCreate: 0, cacheRead: 0 };
176
+ cur.input += c.input_tokens;
177
+ cur.output += c.output_tokens;
178
+ cur.cacheCreate += c.cache_creation_tokens;
179
+ cur.cacheRead += c.cache_read_tokens;
180
+ sums.set(c.model_id, cur);
181
+ }
182
+ const events = [];
183
+ for (const [model_id, s] of sums) {
184
+ const ev = buildAgentUsageEvent({
185
+ model_id,
186
+ input_tokens: s.input,
187
+ output_tokens: s.output,
188
+ cache_creation_tokens: s.cacheCreate,
189
+ cache_read_tokens: s.cacheRead,
190
+ });
191
+ if (ev)
192
+ events.push(ev);
193
+ }
194
+ return { events, cursor: lastId };
195
+ }
196
+ /**
197
+ * Hash a project root into qwen-code's `<project_id>` directory segment.
198
+ *
199
+ * EXACT port of qwen's `getProjectHash`
200
+ * (refs/platforms/qwen-code/packages/core/src/utils/paths.ts:262 —
201
+ * `crypto.createHash('sha256').update(normalizedPath).digest('hex')`). On
202
+ * Windows qwen lowercases the path first (case-insensitive FS); we mirror that
203
+ * so a hook running on win32 resolves the same tmp dir qwen itself wrote.
204
+ * Pure, deterministic, NO regex.
205
+ */
206
+ export function qwenProjectHash(projectRoot) {
207
+ const normalized = platform() === "win32" ? projectRoot.toLowerCase() : projectRoot;
208
+ return createHash("sha256").update(normalized).digest("hex");
209
+ }
210
+ /**
211
+ * Build the canonical session JSONL path qwen-code writes its ChatRecords to:
212
+ * <qwenHome>/tmp/<sha256(projectRoot)>/chats/<sessionId>.jsonl
213
+ * (refs chatRecordingService.ts:451 location + storage.ts:316-320
214
+ * getProjectTempDir → getGlobalTempDir(<qwenHome>/tmp) + getProjectHash).
215
+ *
216
+ * `qwenHome` is normally `<homedir>/.qwen`. Pure path join — does NOT touch the
217
+ * FS, so it is fully unit-testable; existence probing + the glob fallback live
218
+ * in the Stop hook (which cannot import this TS at runtime). NO regex.
219
+ */
220
+ export function qwenChatJsonlPath(qwenHome, projectRoot, sessionId) {
221
+ return join(qwenHome, "tmp", qwenProjectHash(projectRoot), "chats", `${sessionId}.jsonl`);
222
+ }
@@ -84,6 +84,16 @@ export declare function lifecycleGuardIntervalForEnv(env?: NodeJS.ProcessEnv): n
84
84
  * Exported for unit-testing.
85
85
  */
86
86
  export declare function bridgeChildIdleTimeoutMs(env?: NodeJS.ProcessEnv): number;
87
+ /**
88
+ * #854 / #868: human-readable notice emitted when an idle bridge child is
89
+ * released. DX-tuned — human units (seconds, not raw ms), reassures that the
90
+ * helper reconnects automatically (it respawns on the next ctx_* call, #583),
91
+ * and drops the alarming "self-shutdown" jargon. Pure + exported so the wording
92
+ * is pinned by a test and stays grep-friendly via the #854 tag. Note: after the
93
+ * #868 fix this fires ONLY for sub-context / non-interactive children — the
94
+ * foreground interactive session's child runs with the reaper disabled.
95
+ */
96
+ export declare function idleReapMessage(idleMs: number): string;
87
97
  /**
88
98
  * #854: record MCP activity (inbound message or response). The server calls this
89
99
  * so the bridge-child idle reaper in {@link startLifecycleGuard} can distinguish
@@ -123,6 +123,19 @@ export function bridgeChildIdleTimeoutMs(env = process.env) {
123
123
  }
124
124
  return 180_000;
125
125
  }
126
+ /**
127
+ * #854 / #868: human-readable notice emitted when an idle bridge child is
128
+ * released. DX-tuned — human units (seconds, not raw ms), reassures that the
129
+ * helper reconnects automatically (it respawns on the next ctx_* call, #583),
130
+ * and drops the alarming "self-shutdown" jargon. Pure + exported so the wording
131
+ * is pinned by a test and stays grep-friendly via the #854 tag. Note: after the
132
+ * #868 fix this fires ONLY for sub-context / non-interactive children — the
133
+ * foreground interactive session's child runs with the reaper disabled.
134
+ */
135
+ export function idleReapMessage(idleMs) {
136
+ const seconds = Math.round(idleMs / 1000);
137
+ return `[context-mode] Released an idle MCP helper after ${seconds}s of inactivity to free memory; it reconnects automatically on next use. (#854)`;
138
+ }
126
139
  // #854 idle-reaper state, module-level by design: an MCP server is exactly one
127
140
  // process (one StdioServerTransport + one lifecycle guard), so these are never
128
141
  // shared across concurrent servers in production. Multiple startLifecycleGuard()
@@ -245,7 +258,9 @@ export function startLifecycleGuard(opts) {
245
258
  // further messages (#643 unbounded calls) — the false-reap regression the
246
259
  // adversarial review flagged.
247
260
  if (_inFlight === 0 && Date.now() - _lastMcpActivity >= idleMs) {
248
- process.stderr.write(`[context-mode] idle MCP bridge child self-shutdown after ${idleMs}ms with no activity (#854)\n`);
261
+ // Child's own stderr the pi bridge forwards it to pi.logger, never the
262
+ // TUI terminal (#868). DX-tuned wording via idleReapMessage.
263
+ process.stderr.write(idleReapMessage(idleMs) + "\n");
249
264
  shutdown();
250
265
  }
251
266
  }, Math.max(1000, Math.min(Math.floor(idleMs / 4), 30_000)));
@@ -380,6 +380,17 @@ export declare class SessionDB extends SQLiteBase {
380
380
  * Increment the compact_count for a session (tracks snapshot rebuilds).
381
381
  */
382
382
  incrementCompactCount(sessionId: string): void;
383
+ /**
384
+ * Read the per-session usage high-water cursor — the uuid of the last
385
+ * assistant turn already emitted by the Stop hook's main-turn capture.
386
+ * Returns null when unset (first Stop) or the session row is absent.
387
+ */
388
+ getUsageCursor(sessionId: string): string | null;
389
+ /**
390
+ * Advance the per-session usage high-water cursor to `uuid`. No-op when the
391
+ * session_meta row does not exist yet (callers ensureSession first).
392
+ */
393
+ setUsageCursor(sessionId: string, uuid: string): void;
383
394
  /**
384
395
  * Upsert a resume snapshot for a session. Resets consumed flag on update.
385
396
  */
@@ -481,6 +481,8 @@ const S = {
481
481
  getMaxFileEdits: "getMaxFileEdits",
482
482
  getLatestCommitMessage: "getLatestCommitMessage",
483
483
  incrementCompactCount: "incrementCompactCount",
484
+ getUsageCursor: "getUsageCursor",
485
+ setUsageCursor: "setUsageCursor",
484
486
  upsertResume: "upsertResume",
485
487
  getResume: "getResume",
486
488
  markResumeConsumed: "markResumeConsumed",
@@ -662,6 +664,19 @@ export class SessionDB extends SQLiteBase {
662
664
  catch {
663
665
  // best-effort migration only
664
666
  }
667
+ // Migration: per-session usage high-water cursor for the Stop hook's
668
+ // cursor-aware main-turn capture (extractTranscriptUsageSince). Stores the
669
+ // uuid of the last assistant turn already emitted so the next Stop forwards
670
+ // only NEW spend. Idempotent — guarded by a table_xinfo column check.
671
+ try {
672
+ const metaCols = this.db.pragma("table_xinfo(session_meta)");
673
+ if (!metaCols.some((c) => c.name === "usage_cursor")) {
674
+ this.db.exec("ALTER TABLE session_meta ADD COLUMN usage_cursor TEXT");
675
+ }
676
+ }
677
+ catch {
678
+ // best-effort migration only
679
+ }
665
680
  }
666
681
  prepareStatements() {
667
682
  this.stmts = new Map();
@@ -759,6 +774,8 @@ export class SessionDB extends SQLiteBase {
759
774
  ORDER BY id DESC
760
775
  LIMIT 1`);
761
776
  p(S.incrementCompactCount, `UPDATE session_meta SET compact_count = compact_count + 1 WHERE session_id = ?`);
777
+ p(S.getUsageCursor, `SELECT usage_cursor FROM session_meta WHERE session_id = ?`);
778
+ p(S.setUsageCursor, `UPDATE session_meta SET usage_cursor = ? WHERE session_id = ?`);
762
779
  // ── Resume ──
763
780
  p(S.upsertResume, `INSERT INTO session_resume (session_id, snapshot, event_count)
764
781
  VALUES (?, ?, ?)
@@ -1127,6 +1144,22 @@ export class SessionDB extends SQLiteBase {
1127
1144
  incrementCompactCount(sessionId) {
1128
1145
  this.stmt(S.incrementCompactCount).run(sessionId);
1129
1146
  }
1147
+ /**
1148
+ * Read the per-session usage high-water cursor — the uuid of the last
1149
+ * assistant turn already emitted by the Stop hook's main-turn capture.
1150
+ * Returns null when unset (first Stop) or the session row is absent.
1151
+ */
1152
+ getUsageCursor(sessionId) {
1153
+ const row = this.stmt(S.getUsageCursor).get(sessionId);
1154
+ return row?.usage_cursor ?? null;
1155
+ }
1156
+ /**
1157
+ * Advance the per-session usage high-water cursor to `uuid`. No-op when the
1158
+ * session_meta row does not exist yet (callers ensureSession first).
1159
+ */
1160
+ setUsageCursor(sessionId, uuid) {
1161
+ this.stmt(S.setUsageCursor).run(uuid, sessionId);
1162
+ }
1130
1163
  // ═══════════════════════════════════════════
1131
1164
  // Resume
1132
1165
  // ═══════════════════════════════════════════
@@ -22,6 +22,19 @@ export interface SessionEvent {
22
22
  * `Fetched and indexed N sections (XKB)` preamble.
23
23
  */
24
24
  bytes_avoided?: number;
25
+ /**
26
+ * Optional structured cost/usage fields (Wave 2b). Emitted by
27
+ * extractAgentUsage alongside the colon-string `data` so the forward
28
+ * envelope can spread them to the platform as typed columns instead of an
29
+ * opaque blob. Present only when the source signal is present; cost_usd is
30
+ * omitted on a price miss or a zero-token turn.
31
+ */
32
+ model_id?: string;
33
+ input_tokens?: number;
34
+ output_tokens?: number;
35
+ cache_read_tokens?: number;
36
+ cache_creation_tokens?: number;
37
+ cost_usd?: number;
25
38
  }
26
39
  export interface ToolCall {
27
40
  toolName: string;
@@ -43,6 +56,201 @@ export interface HookInput {
43
56
  is_error?: boolean;
44
57
  };
45
58
  }
59
+ /** Input shape `buildAgentUsageEvent` consumes — re-exported for parser typing. */
60
+ export interface AgentUsageCounts {
61
+ model_id: string;
62
+ input_tokens: number;
63
+ output_tokens: number;
64
+ cache_creation_tokens: number;
65
+ cache_read_tokens: number;
66
+ native_cost_usd?: number | null;
67
+ }
68
+ export { parseKimiUsage, extractKimiUsageSince } from "../adapters/kimi/usage.js";
69
+ export { parseQwenUsage, extractQwenUsageSince } from "../adapters/qwen-code/usage.js";
70
+ /**
71
+ * Pi (oh-my-pi) per-turn usage parser.
72
+ *
73
+ * Maps a Pi `turn_end` payload (`{ message: AssistantMessage }`) to the
74
+ * `buildAgentUsageEvent` input shape, or null when there is nothing to record.
75
+ *
76
+ * Field provenance (adapter-matrix/pi.md @320261f + cited refs):
77
+ * - usage: AssistantMessage.usage (ai/src/types.ts:521 -> catalog/src/types.ts:100-145)
78
+ * - model_id: AssistantMessage.model (ai/src/types.ts:510; kept "provider/model" — builder normalizes)
79
+ * - input: Usage.input -> input_tokens
80
+ * - output: Usage.output -> output_tokens
81
+ * - cacheWrite: Usage.cacheWrite -> cache_creation_tokens
82
+ * - cacheRead: Usage.cacheRead -> cache_read_tokens
83
+ * - native USD: Usage.cost.total -> native_cost_usd (HIGH confidence; no price-table needed)
84
+ *
85
+ * The event is per-turn incremental (per-response usage; anthropic.ts:1893-1901;
86
+ * "for the turn" catalog/types.ts:103), so each turn_end maps to exactly one
87
+ * agent_usage event with no cross-turn accumulation.
88
+ *
89
+ * Algorithmic + null-safe, NO regex. Accepts either the full TurnEndEvent
90
+ * (`{ message }`) or a bare AssistantMessage (`{ usage, model }`) so callers
91
+ * can pass `event` or `event.message` interchangeably. Returns null when the
92
+ * payload is not an assistant message, carries no usage object, or every token
93
+ * bucket is zero/absent (an all-zero turn emits no event — matches
94
+ * buildAgentUsageEvent's own zero->null contract).
95
+ */
96
+ export declare function parsePiUsage(payload: unknown): AgentUsageCounts | null;
97
+ /**
98
+ * openclaw `model.usage` diagnostic-event capture — parseOpenclawUsage.
99
+ *
100
+ * openclaw exposes a first-class `model.usage` diagnostic event
101
+ * (`DiagnosticUsageEvent`, refs/platforms/openclaw/src/infra/diagnostic-events.ts:18-47),
102
+ * emitted once per turn and consumed via `onDiagnosticEvent(listener)`
103
+ * (diagnostic-events.ts:1156) — the same bus the first-party diagnostics-otel /
104
+ * diagnostics-prometheus extensions read.
105
+ *
106
+ * Field mapping (openclaw → AgentUsageCounts):
107
+ * evt.usage.input → input_tokens
108
+ * evt.usage.output → output_tokens
109
+ * evt.usage.cacheWrite→ cache_creation_tokens (cache-creation)
110
+ * evt.usage.cacheRead → cache_read_tokens (cache-read)
111
+ * evt.costUsd → native_cost_usd (pre-computed via estimateUsageCost,
112
+ * agent-runner.ts:1995 — preferred over catalog)
113
+ * evt.model → model_id
114
+ *
115
+ * CRITICAL: read `evt.usage` (the PER-TURN TOTAL — "Last Turn Total"
116
+ * agent-runner.ts:943), NEVER `evt.lastCallUsage` (the last-model-call DELTA,
117
+ * diagnostic-events.ts:34-40). Summing both would double-count.
118
+ *
119
+ * Returns AgentUsageCounts (the buildAgentUsageEvent input shape) or null when
120
+ * the event is not a usage event / carries no usage / sums to zero. Pure,
121
+ * null-safe, algorithmic — NO regex.
122
+ */
123
+ export declare function parseOpenclawUsage(payload: unknown): AgentUsageCounts | null;
124
+ /**
125
+ * opencode per-turn usage parser.
126
+ *
127
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
128
+ * adapter-matrix/opencode.md. opencode tracks usage per *assistant message*; the
129
+ * usage-bearing payload reaches a plugin via the `message.updated` bus event,
130
+ * whose `event.properties.info` is the full Message. The assistant token shape
131
+ * (refs platforms/opencode .../session/message.ts) is:
132
+ * info.tokens = { input, output, reasoning, cache: { read, write } }
133
+ * info.cost = USD cost for this message
134
+ * info.modelID / info.providerID (older refs may expose a single info.model)
135
+ *
136
+ * Field mapping (refs message.ts):
137
+ * tokens.input -> input_tokens
138
+ * tokens.output -> output_tokens
139
+ * tokens.cache.read -> cache_read_tokens
140
+ * tokens.cache.write -> cache_creation_tokens
141
+ * modelID/providerID -> model_id (`${providerID}/${modelID}` when both present)
142
+ * cost -> native_cost_usd
143
+ *
144
+ * LAST-STEP-SNAPSHOT CAVEAT (refs processor.ts:717-718): message-level
145
+ * `.tokens` is OVERWRITTEN every step-finish, so it holds the LAST step's usage
146
+ * — not the turn total. `.cost`, however, ACCUMULATES (`cost += usage.cost`) and
147
+ * is the correct cumulative turn cost. We therefore pass `info.cost` through as
148
+ * native_cost_usd so the billed $ is exact even though the token snapshot is
149
+ * imprecise; the token columns remain best-effort (last-step) telemetry. A true
150
+ * turn-total token sum would require summing per-step Step.Ended parts, which the
151
+ * `message.updated` payload does not carry — out of scope for this snapshot-based
152
+ * capture.
153
+ *
154
+ * Accepts either the bus event (`{ properties: { info } }`), the wrapped
155
+ * `{ event: { properties: { info } } }`, or the bare Message (`info`) so the
156
+ * caller can hand us whatever the SDK surfaces. NO regex — pure algorithmic,
157
+ * null-safe traversal. Returns null when the payload is not an assistant
158
+ * message, carries no tokens object, or every token bucket is zero/absent
159
+ * (mirrors buildAgentUsageEvent's zero->null contract).
160
+ */
161
+ export declare function parseOpencodeUsage(payload: unknown): AgentUsageCounts | null;
162
+ /**
163
+ * Build a structured `agent_usage` event from summed per-model token counts.
164
+ * Emits the colon-string `data` (human/debug + back-compat) AND the structured
165
+ * top-level fields the forward envelope spreads to the platform. cost_usd via
166
+ * the pricing catalog — omitted on a price miss. Returns null when every token
167
+ * bucket is zero/absent (so an all-zero model emits no event).
168
+ */
169
+ export declare function buildAgentUsageEvent(counts: {
170
+ model_id: string;
171
+ input_tokens: number;
172
+ output_tokens: number;
173
+ cache_creation_tokens: number;
174
+ cache_read_tokens: number;
175
+ /**
176
+ * Provider-supplied USD cost for this turn. When a finite number, it is
177
+ * preferred over the catalog computation (openclaw / pi / omp / opencode
178
+ * ship a native cost — trust the source over our price table). Omit/null to
179
+ * derive cost_usd from the pricing catalog.
180
+ */
181
+ native_cost_usd?: number | null;
182
+ }): SessionEvent | null;
183
+ /**
184
+ * gemini-cli AfterModel usage capture — parse ONE AfterModel hook payload into
185
+ * a builder `agent_usage` event (or null). Pure, null-safe, struct-only — NO regex.
186
+ *
187
+ * Refs (docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md):
188
+ * - AfterModel fires per model call inside the gemini-cli stream loop
189
+ * (geminiChat.ts:1213); the hook input carries `llm_request` + `llm_response`
190
+ * (hooks/types.ts:692-695).
191
+ * - `llm_response.usageMetadata` exposes promptTokenCount / candidatesTokenCount
192
+ * / totalTokenCount (hookTranslator.ts:60-64).
193
+ * - model_id = `response.modelVersion || req.model` (loggingContentGenerator.ts:405,553).
194
+ *
195
+ * Mapping → builder shape:
196
+ * promptTokenCount → input_tokens
197
+ * candidatesTokenCount → output_tokens
198
+ * thoughtsTokenCount → ADDED into output_tokens (Gemini bills reasoning as output)
199
+ * cachedContentTokenCount → cache_read_tokens (when present)
200
+ * model_id → response.modelVersion || llm_request.model
201
+ *
202
+ * CAVEAT — the DECOUPLED AfterModel payload (hookTranslator.ts:60-64) forwards
203
+ * only prompt/candidates/total and DROPS cachedContentTokenCount +
204
+ * thoughtsTokenCount. We map those two defensively WHEN PRESENT (richer payload
205
+ * variant / future fix / OTel-fed input) but never depend on them — the common
206
+ * case is input+output only. For full cached/thoughts fidelity the OTel
207
+ * `api_response` exporter or the chat-recording JSON is the source of record.
208
+ *
209
+ * MULTI-CALL TURNS — one user turn that triggers tool calls spans MULTIPLE
210
+ * model calls, each AfterModel cumulative within itself. This fn emits ONE
211
+ * priced event PER AfterModel call (each call is one billed round-trip).
212
+ * Per-userPromptId summation into a single per-turn total is DEFERRED — emitting
213
+ * per-call never double-counts, since each call's usageMetadata is the
214
+ * authoritative total for that call.
215
+ */
216
+ export declare function parseGeminiUsage(afterModelPayload: unknown): SessionEvent | null;
217
+ /**
218
+ * claude-code MAIN-turn usage capture — the dominant-spend path the Task
219
+ * subagent capture (extractAgentUsage) misses. Parses the session transcript
220
+ * JSONL char-algorithmically (NO regex): each `type:"assistant"` line carries
221
+ * `message.usage` + `message.model`, and usage is a per-turn DELTA, so summing
222
+ * the assistant turns per model = the exact billed total. `isSidechain:true`
223
+ * lines are Task-subagent sidechains written to a SEPARATE transcript (refs:
224
+ * sessionStorage.ts:1042) — excluding them keeps the main-turn sum from
225
+ * double-counting the separate Task-subagent capture. Emits one structured
226
+ * `agent_usage` event per distinct model.
227
+ */
228
+ export declare function extractTranscriptUsage(transcript: string): SessionEvent[];
229
+ /**
230
+ * Cursor-aware variant of extractTranscriptUsage for the Stop hook.
231
+ *
232
+ * The transcript grows every turn and the forward loop forwards ALL passed
233
+ * events unconditionally, so re-running extractTranscriptUsage on the whole
234
+ * transcript each Stop would double-count every prior turn. This walks only
235
+ * the turns NEW since the last Stop, keyed by a per-session high-water cursor
236
+ * (the `uuid` of the last assistant turn seen).
237
+ *
238
+ * - sinceUuid null/empty → process ALL non-sidechain assistant turns.
239
+ * - sinceUuid found → process only turns AFTER it (exclusive).
240
+ * - sinceUuid set but NOT found (transcript compaction dropped it) → process
241
+ * ONLY THE LAST non-sidechain assistant turn. Bounded by design: we never
242
+ * re-emit the whole history when the cursor falls off the front.
243
+ *
244
+ * `cursor` returns the uuid of the LAST non-sidechain assistant turn in the
245
+ * transcript (whether or not it carried usage), so the next Stop resumes
246
+ * exactly past it. When the transcript has no such turn, the input cursor is
247
+ * returned unchanged. Same char-algorithmic JSONL parse (NO regex), same
248
+ * sidechain exclusion, same buildAgentUsageEvent emission path.
249
+ */
250
+ export declare function extractTranscriptUsageSince(transcript: string, sinceUuid: string | null): {
251
+ events: SessionEvent[];
252
+ cursor: string | null;
253
+ };
46
254
  /** Reset error-resolution state (for testing). */
47
255
  export declare function resetErrorResolutionState(): void;
48
256
  /** Reset iteration-loop state (for testing). */