context-mode 1.0.166 → 1.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.codex-plugin/plugin.json +1 -1
  4. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  5. package/.openclaw-plugin/package.json +1 -1
  6. package/README.md +6 -4
  7. package/build/adapters/codex/usage.d.ts +107 -0
  8. package/build/adapters/codex/usage.js +227 -0
  9. package/build/adapters/gemini-cli/hooks.d.ts +7 -1
  10. package/build/adapters/gemini-cli/hooks.js +9 -1
  11. package/build/adapters/gemini-cli/index.js +11 -0
  12. package/build/adapters/kimi/paths.d.ts +20 -0
  13. package/build/adapters/kimi/paths.js +41 -1
  14. package/build/adapters/kimi/usage.d.ts +82 -0
  15. package/build/adapters/kimi/usage.js +217 -0
  16. package/build/adapters/omp/plugin.d.ts +6 -0
  17. package/build/adapters/omp/plugin.js +87 -2
  18. package/build/adapters/omp/usage.d.ts +49 -0
  19. package/build/adapters/omp/usage.js +110 -0
  20. package/build/adapters/openclaw/plugin.d.ts +10 -0
  21. package/build/adapters/openclaw/plugin.js +57 -0
  22. package/build/adapters/openclaw/usage.d.ts +34 -0
  23. package/build/adapters/openclaw/usage.js +52 -0
  24. package/build/adapters/opencode/plugin.d.ts +17 -0
  25. package/build/adapters/opencode/plugin.js +40 -1
  26. package/build/adapters/pi/extension.js +34 -1
  27. package/build/adapters/qwen-code/index.js +23 -1
  28. package/build/adapters/qwen-code/usage.d.ts +90 -0
  29. package/build/adapters/qwen-code/usage.js +222 -0
  30. package/build/session/analytics.js +30 -0
  31. package/build/session/db.d.ts +11 -0
  32. package/build/session/db.js +33 -0
  33. package/build/session/extract.d.ts +224 -0
  34. package/build/session/extract.js +705 -62
  35. package/build/session/model-prices.json +429 -0
  36. package/build/session/pricing.d.ts +64 -0
  37. package/build/session/pricing.js +151 -0
  38. package/cli.bundle.mjs +177 -170
  39. package/configs/antigravity-cli/plugin.json +1 -1
  40. package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
  41. package/configs/gemini-cli/settings.json +11 -0
  42. package/hooks/codex/stop.mjs +91 -4
  43. package/hooks/gemini-cli/aftermodel.mjs +70 -0
  44. package/hooks/kimi/stop.mjs +74 -3
  45. package/hooks/qwen-code/platform.mjs +1 -0
  46. package/hooks/qwen-code/stop.mjs +168 -0
  47. package/hooks/session-db.bundle.mjs +7 -7
  48. package/hooks/session-extract.bundle.mjs +3 -2
  49. package/hooks/session-loaders.mjs +16 -1
  50. package/hooks/stop.mjs +35 -2
  51. package/openclaw.plugin.json +1 -1
  52. package/package.json +1 -1
  53. package/server.bundle.mjs +108 -101
@@ -481,6 +481,8 @@ const S = {
481
481
  getMaxFileEdits: "getMaxFileEdits",
482
482
  getLatestCommitMessage: "getLatestCommitMessage",
483
483
  incrementCompactCount: "incrementCompactCount",
484
+ getUsageCursor: "getUsageCursor",
485
+ setUsageCursor: "setUsageCursor",
484
486
  upsertResume: "upsertResume",
485
487
  getResume: "getResume",
486
488
  markResumeConsumed: "markResumeConsumed",
@@ -662,6 +664,19 @@ export class SessionDB extends SQLiteBase {
662
664
  catch {
663
665
  // best-effort migration only
664
666
  }
667
+ // Migration: per-session usage high-water cursor for the Stop hook's
668
+ // cursor-aware main-turn capture (extractTranscriptUsageSince). Stores the
669
+ // uuid of the last assistant turn already emitted so the next Stop forwards
670
+ // only NEW spend. Idempotent — guarded by a table_xinfo column check.
671
+ try {
672
+ const metaCols = this.db.pragma("table_xinfo(session_meta)");
673
+ if (!metaCols.some((c) => c.name === "usage_cursor")) {
674
+ this.db.exec("ALTER TABLE session_meta ADD COLUMN usage_cursor TEXT");
675
+ }
676
+ }
677
+ catch {
678
+ // best-effort migration only
679
+ }
665
680
  }
666
681
  prepareStatements() {
667
682
  this.stmts = new Map();
@@ -759,6 +774,8 @@ export class SessionDB extends SQLiteBase {
759
774
  ORDER BY id DESC
760
775
  LIMIT 1`);
761
776
  p(S.incrementCompactCount, `UPDATE session_meta SET compact_count = compact_count + 1 WHERE session_id = ?`);
777
+ p(S.getUsageCursor, `SELECT usage_cursor FROM session_meta WHERE session_id = ?`);
778
+ p(S.setUsageCursor, `UPDATE session_meta SET usage_cursor = ? WHERE session_id = ?`);
762
779
  // ── Resume ──
763
780
  p(S.upsertResume, `INSERT INTO session_resume (session_id, snapshot, event_count)
764
781
  VALUES (?, ?, ?)
@@ -1127,6 +1144,22 @@ export class SessionDB extends SQLiteBase {
1127
1144
  incrementCompactCount(sessionId) {
1128
1145
  this.stmt(S.incrementCompactCount).run(sessionId);
1129
1146
  }
1147
+ /**
1148
+ * Read the per-session usage high-water cursor — the uuid of the last
1149
+ * assistant turn already emitted by the Stop hook's main-turn capture.
1150
+ * Returns null when unset (first Stop) or the session row is absent.
1151
+ */
1152
+ getUsageCursor(sessionId) {
1153
+ const row = this.stmt(S.getUsageCursor).get(sessionId);
1154
+ return row?.usage_cursor ?? null;
1155
+ }
1156
+ /**
1157
+ * Advance the per-session usage high-water cursor to `uuid`. No-op when the
1158
+ * session_meta row does not exist yet (callers ensureSession first).
1159
+ */
1160
+ setUsageCursor(sessionId, uuid) {
1161
+ this.stmt(S.setUsageCursor).run(uuid, sessionId);
1162
+ }
1130
1163
  // ═══════════════════════════════════════════
1131
1164
  // Resume
1132
1165
  // ═══════════════════════════════════════════
@@ -22,6 +22,35 @@ export interface SessionEvent {
22
22
  * `Fetched and indexed N sections (XKB)` preamble.
23
23
  */
24
24
  bytes_avoided?: number;
25
+ /**
26
+ * Optional — bytes the model PAID to ACCESS kept-out content for this event:
27
+ * the tool_response byte length of a `ctx_search` / `ctx_fetch_and_index`
28
+ * call. This is the OTHER half of the with/without ratio (bytes_avoided is
29
+ * the kept-out half). Sandbox compute (ctx_execute/batch/file) is work-output
30
+ * and is excluded. Present only when the call is a retrieval call and its
31
+ * tool_response is non-empty.
32
+ */
33
+ bytes_retrieved?: number;
34
+ /**
35
+ * Optional structured cost/usage fields (Wave 2b). Emitted by
36
+ * extractAgentUsage alongside the colon-string `data` so the forward
37
+ * envelope can spread them to the platform as typed columns instead of an
38
+ * opaque blob. Present only when the source signal is present; cost_usd is
39
+ * omitted on a price miss or a zero-token turn.
40
+ */
41
+ model_id?: string;
42
+ input_tokens?: number;
43
+ output_tokens?: number;
44
+ cache_read_tokens?: number;
45
+ cache_creation_tokens?: number;
46
+ cost_usd?: number;
47
+ /**
48
+ * "task_cumulative" on agent_usage events whose tokens are a Task sub-agent's
49
+ * usage SUMMED across its whole run (not one turn). The platform buckets these
50
+ * as lifetime spend and never prices them per-turn — see
51
+ * docs/handoff/cumulative-cost-bug.md.
52
+ */
53
+ usage_scope?: string;
25
54
  }
26
55
  export interface ToolCall {
27
56
  toolName: string;
@@ -43,6 +72,201 @@ export interface HookInput {
43
72
  is_error?: boolean;
44
73
  };
45
74
  }
75
+ /** Input shape `buildAgentUsageEvent` consumes — re-exported for parser typing. */
76
+ export interface AgentUsageCounts {
77
+ model_id: string;
78
+ input_tokens: number;
79
+ output_tokens: number;
80
+ cache_creation_tokens: number;
81
+ cache_read_tokens: number;
82
+ native_cost_usd?: number | null;
83
+ }
84
+ export { parseKimiUsage, extractKimiUsageSince } from "../adapters/kimi/usage.js";
85
+ export { parseQwenUsage, extractQwenUsageSince } from "../adapters/qwen-code/usage.js";
86
+ /**
87
+ * Pi (oh-my-pi) per-turn usage parser.
88
+ *
89
+ * Maps a Pi `turn_end` payload (`{ message: AssistantMessage }`) to the
90
+ * `buildAgentUsageEvent` input shape, or null when there is nothing to record.
91
+ *
92
+ * Field provenance (adapter-matrix/pi.md @320261f + cited refs):
93
+ * - usage: AssistantMessage.usage (ai/src/types.ts:521 -> catalog/src/types.ts:100-145)
94
+ * - model_id: AssistantMessage.model (ai/src/types.ts:510; kept "provider/model" — builder normalizes)
95
+ * - input: Usage.input -> input_tokens
96
+ * - output: Usage.output -> output_tokens
97
+ * - cacheWrite: Usage.cacheWrite -> cache_creation_tokens
98
+ * - cacheRead: Usage.cacheRead -> cache_read_tokens
99
+ * - native USD: Usage.cost.total -> native_cost_usd (HIGH confidence; no price-table needed)
100
+ *
101
+ * The event is per-turn incremental (per-response usage; anthropic.ts:1893-1901;
102
+ * "for the turn" catalog/types.ts:103), so each turn_end maps to exactly one
103
+ * agent_usage event with no cross-turn accumulation.
104
+ *
105
+ * Algorithmic + null-safe, NO regex. Accepts either the full TurnEndEvent
106
+ * (`{ message }`) or a bare AssistantMessage (`{ usage, model }`) so callers
107
+ * can pass `event` or `event.message` interchangeably. Returns null when the
108
+ * payload is not an assistant message, carries no usage object, or every token
109
+ * bucket is zero/absent (an all-zero turn emits no event — matches
110
+ * buildAgentUsageEvent's own zero->null contract).
111
+ */
112
+ export declare function parsePiUsage(payload: unknown): AgentUsageCounts | null;
113
+ /**
114
+ * openclaw `model.usage` diagnostic-event capture — parseOpenclawUsage.
115
+ *
116
+ * openclaw exposes a first-class `model.usage` diagnostic event
117
+ * (`DiagnosticUsageEvent`, refs/platforms/openclaw/src/infra/diagnostic-events.ts:18-47),
118
+ * emitted once per turn and consumed via `onDiagnosticEvent(listener)`
119
+ * (diagnostic-events.ts:1156) — the same bus the first-party diagnostics-otel /
120
+ * diagnostics-prometheus extensions read.
121
+ *
122
+ * Field mapping (openclaw → AgentUsageCounts):
123
+ * evt.usage.input → input_tokens
124
+ * evt.usage.output → output_tokens
125
+ * evt.usage.cacheWrite→ cache_creation_tokens (cache-creation)
126
+ * evt.usage.cacheRead → cache_read_tokens (cache-read)
127
+ * evt.costUsd → native_cost_usd (pre-computed via estimateUsageCost,
128
+ * agent-runner.ts:1995 — preferred over catalog)
129
+ * evt.model → model_id
130
+ *
131
+ * CRITICAL: read `evt.usage` (the PER-TURN TOTAL — "Last Turn Total"
132
+ * agent-runner.ts:943), NEVER `evt.lastCallUsage` (the last-model-call DELTA,
133
+ * diagnostic-events.ts:34-40). Summing both would double-count.
134
+ *
135
+ * Returns AgentUsageCounts (the buildAgentUsageEvent input shape) or null when
136
+ * the event is not a usage event / carries no usage / sums to zero. Pure,
137
+ * null-safe, algorithmic — NO regex.
138
+ */
139
+ export declare function parseOpenclawUsage(payload: unknown): AgentUsageCounts | null;
140
+ /**
141
+ * opencode per-turn usage parser.
142
+ *
143
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
144
+ * adapter-matrix/opencode.md. opencode tracks usage per *assistant message*; the
145
+ * usage-bearing payload reaches a plugin via the `message.updated` bus event,
146
+ * whose `event.properties.info` is the full Message. The assistant token shape
147
+ * (refs platforms/opencode .../session/message.ts) is:
148
+ * info.tokens = { input, output, reasoning, cache: { read, write } }
149
+ * info.cost = USD cost for this message
150
+ * info.modelID / info.providerID (older refs may expose a single info.model)
151
+ *
152
+ * Field mapping (refs message.ts):
153
+ * tokens.input -> input_tokens
154
+ * tokens.output -> output_tokens
155
+ * tokens.cache.read -> cache_read_tokens
156
+ * tokens.cache.write -> cache_creation_tokens
157
+ * modelID/providerID -> model_id (`${providerID}/${modelID}` when both present)
158
+ * cost -> native_cost_usd
159
+ *
160
+ * LAST-STEP-SNAPSHOT CAVEAT (refs processor.ts:717-718): message-level
161
+ * `.tokens` is OVERWRITTEN every step-finish, so it holds the LAST step's usage
162
+ * — not the turn total. `.cost`, however, ACCUMULATES (`cost += usage.cost`) and
163
+ * is the correct cumulative turn cost. We therefore pass `info.cost` through as
164
+ * native_cost_usd so the billed $ is exact even though the token snapshot is
165
+ * imprecise; the token columns remain best-effort (last-step) telemetry. A true
166
+ * turn-total token sum would require summing per-step Step.Ended parts, which the
167
+ * `message.updated` payload does not carry — out of scope for this snapshot-based
168
+ * capture.
169
+ *
170
+ * Accepts either the bus event (`{ properties: { info } }`), the wrapped
171
+ * `{ event: { properties: { info } } }`, or the bare Message (`info`) so the
172
+ * caller can hand us whatever the SDK surfaces. NO regex — pure algorithmic,
173
+ * null-safe traversal. Returns null when the payload is not an assistant
174
+ * message, carries no tokens object, or every token bucket is zero/absent
175
+ * (mirrors buildAgentUsageEvent's zero->null contract).
176
+ */
177
+ export declare function parseOpencodeUsage(payload: unknown): AgentUsageCounts | null;
178
+ /**
179
+ * Build a structured `agent_usage` event from summed per-model token counts.
180
+ * Emits the colon-string `data` (human/debug + back-compat) AND the structured
181
+ * top-level fields the forward envelope spreads to the platform. cost_usd via
182
+ * the pricing catalog — omitted on a price miss. Returns null when every token
183
+ * bucket is zero/absent (so an all-zero model emits no event).
184
+ */
185
+ export declare function buildAgentUsageEvent(counts: {
186
+ model_id: string;
187
+ input_tokens: number;
188
+ output_tokens: number;
189
+ cache_creation_tokens: number;
190
+ cache_read_tokens: number;
191
+ /**
192
+ * Provider-supplied USD cost for this turn. When a finite number, it is
193
+ * preferred over the catalog computation (openclaw / pi / omp / opencode
194
+ * ship a native cost — trust the source over our price table). Omit/null to
195
+ * derive cost_usd from the pricing catalog.
196
+ */
197
+ native_cost_usd?: number | null;
198
+ }): SessionEvent | null;
199
+ /**
200
+ * gemini-cli AfterModel usage capture — parse ONE AfterModel hook payload into
201
+ * a builder `agent_usage` event (or null). Pure, null-safe, struct-only — NO regex.
202
+ *
203
+ * Refs (docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md):
204
+ * - AfterModel fires per model call inside the gemini-cli stream loop
205
+ * (geminiChat.ts:1213); the hook input carries `llm_request` + `llm_response`
206
+ * (hooks/types.ts:692-695).
207
+ * - `llm_response.usageMetadata` exposes promptTokenCount / candidatesTokenCount
208
+ * / totalTokenCount (hookTranslator.ts:60-64).
209
+ * - model_id = `response.modelVersion || req.model` (loggingContentGenerator.ts:405,553).
210
+ *
211
+ * Mapping → builder shape:
212
+ * promptTokenCount → input_tokens
213
+ * candidatesTokenCount → output_tokens
214
+ * thoughtsTokenCount → ADDED into output_tokens (Gemini bills reasoning as output)
215
+ * cachedContentTokenCount → cache_read_tokens (when present)
216
+ * model_id → response.modelVersion || llm_request.model
217
+ *
218
+ * CAVEAT — the DECOUPLED AfterModel payload (hookTranslator.ts:60-64) forwards
219
+ * only prompt/candidates/total and DROPS cachedContentTokenCount +
220
+ * thoughtsTokenCount. We map those two defensively WHEN PRESENT (richer payload
221
+ * variant / future fix / OTel-fed input) but never depend on them — the common
222
+ * case is input+output only. For full cached/thoughts fidelity the OTel
223
+ * `api_response` exporter or the chat-recording JSON is the source of record.
224
+ *
225
+ * MULTI-CALL TURNS — one user turn that triggers tool calls spans MULTIPLE
226
+ * model calls, each AfterModel cumulative within itself. This fn emits ONE
227
+ * priced event PER AfterModel call (each call is one billed round-trip).
228
+ * Per-userPromptId summation into a single per-turn total is DEFERRED — emitting
229
+ * per-call never double-counts, since each call's usageMetadata is the
230
+ * authoritative total for that call.
231
+ */
232
+ export declare function parseGeminiUsage(afterModelPayload: unknown): SessionEvent | null;
233
+ /**
234
+ * claude-code MAIN-turn usage capture — the dominant-spend path the Task
235
+ * subagent capture (extractAgentUsage) misses. Parses the session transcript
236
+ * JSONL char-algorithmically (NO regex): each `type:"assistant"` line carries
237
+ * `message.usage` + `message.model`, and usage is a per-turn DELTA, so summing
238
+ * the assistant turns per model = the exact billed total. `isSidechain:true`
239
+ * lines are Task-subagent sidechains written to a SEPARATE transcript (refs:
240
+ * sessionStorage.ts:1042) — excluding them keeps the main-turn sum from
241
+ * double-counting the separate Task-subagent capture. Emits one structured
242
+ * `agent_usage` event per distinct model.
243
+ */
244
+ export declare function extractTranscriptUsage(transcript: string): SessionEvent[];
245
+ /**
246
+ * Cursor-aware variant of extractTranscriptUsage for the Stop hook.
247
+ *
248
+ * The transcript grows every turn and the forward loop forwards ALL passed
249
+ * events unconditionally, so re-running extractTranscriptUsage on the whole
250
+ * transcript each Stop would double-count every prior turn. This walks only
251
+ * the turns NEW since the last Stop, keyed by a per-session high-water cursor
252
+ * (the `uuid` of the last assistant turn seen).
253
+ *
254
+ * - sinceUuid null/empty → process ALL non-sidechain assistant turns.
255
+ * - sinceUuid found → process only turns AFTER it (exclusive).
256
+ * - sinceUuid set but NOT found (transcript compaction dropped it) → process
257
+ * ONLY THE LAST non-sidechain assistant turn. Bounded by design: we never
258
+ * re-emit the whole history when the cursor falls off the front.
259
+ *
260
+ * `cursor` returns the uuid of the LAST non-sidechain assistant turn in the
261
+ * transcript (whether or not it carried usage), so the next Stop resumes
262
+ * exactly past it. When the transcript has no such turn, the input cursor is
263
+ * returned unchanged. Same char-algorithmic JSONL parse (NO regex), same
264
+ * sidechain exclusion, same buildAgentUsageEvent emission path.
265
+ */
266
+ export declare function extractTranscriptUsageSince(transcript: string, sinceUuid: string | null): {
267
+ events: SessionEvent[];
268
+ cursor: string | null;
269
+ };
46
270
  /** Reset error-resolution state (for testing). */
47
271
  export declare function resetErrorResolutionState(): void;
48
272
  /** Reset iteration-loop state (for testing). */