context-mode 1.0.166 → 1.0.168

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.codex-plugin/plugin.json +1 -1
  4. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  5. package/.openclaw-plugin/package.json +1 -1
  6. package/README.md +6 -4
  7. package/build/adapters/codex/usage.d.ts +107 -0
  8. package/build/adapters/codex/usage.js +227 -0
  9. package/build/adapters/gemini-cli/hooks.d.ts +7 -1
  10. package/build/adapters/gemini-cli/hooks.js +9 -1
  11. package/build/adapters/gemini-cli/index.js +11 -0
  12. package/build/adapters/kimi/paths.d.ts +20 -0
  13. package/build/adapters/kimi/paths.js +41 -1
  14. package/build/adapters/kimi/usage.d.ts +82 -0
  15. package/build/adapters/kimi/usage.js +217 -0
  16. package/build/adapters/omp/plugin.d.ts +6 -0
  17. package/build/adapters/omp/plugin.js +87 -2
  18. package/build/adapters/omp/usage.d.ts +49 -0
  19. package/build/adapters/omp/usage.js +110 -0
  20. package/build/adapters/openclaw/plugin.d.ts +10 -0
  21. package/build/adapters/openclaw/plugin.js +57 -0
  22. package/build/adapters/openclaw/usage.d.ts +34 -0
  23. package/build/adapters/openclaw/usage.js +52 -0
  24. package/build/adapters/opencode/plugin.d.ts +17 -0
  25. package/build/adapters/opencode/plugin.js +40 -1
  26. package/build/adapters/pi/extension.js +34 -1
  27. package/build/adapters/qwen-code/index.js +23 -1
  28. package/build/adapters/qwen-code/usage.d.ts +90 -0
  29. package/build/adapters/qwen-code/usage.js +222 -0
  30. package/build/session/analytics.js +30 -0
  31. package/build/session/db.d.ts +11 -0
  32. package/build/session/db.js +33 -0
  33. package/build/session/extract.d.ts +224 -0
  34. package/build/session/extract.js +705 -62
  35. package/build/session/model-prices.json +429 -0
  36. package/build/session/pricing.d.ts +64 -0
  37. package/build/session/pricing.js +151 -0
  38. package/cli.bundle.mjs +177 -170
  39. package/configs/antigravity-cli/plugin.json +1 -1
  40. package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
  41. package/configs/gemini-cli/settings.json +11 -0
  42. package/hooks/codex/stop.mjs +91 -4
  43. package/hooks/gemini-cli/aftermodel.mjs +70 -0
  44. package/hooks/kimi/stop.mjs +74 -3
  45. package/hooks/qwen-code/platform.mjs +1 -0
  46. package/hooks/qwen-code/stop.mjs +168 -0
  47. package/hooks/session-db.bundle.mjs +7 -7
  48. package/hooks/session-extract.bundle.mjs +3 -2
  49. package/hooks/session-loaders.mjs +16 -1
  50. package/hooks/stop.mjs +35 -2
  51. package/openclaw.plugin.json +1 -1
  52. package/package.json +1 -1
  53. package/server.bundle.mjs +108 -101
@@ -4,6 +4,7 @@
4
4
  *
5
5
  * All 13 event categories as specified in PRD Section 3.
6
6
  */
7
+ import { lookupPrice as catalogLookupPrice, computeCostUsd as catalogComputeCostUsd, } from "./pricing.js";
7
8
  // ── Internal helpers ───────────────────────────────────────────────────────
8
9
  /** Null-safe string coercion — no truncation, preserves full data. */
9
10
  function safeString(value) {
@@ -911,12 +912,40 @@ function extractMcpToolCall(input) {
911
912
  const payload = truncated
912
913
  ? `{"tool_name":${JSON.stringify(tool_name)},"params_raw":${JSON.stringify(cappedStr)},"truncated":true}`
913
914
  : `{"tool_name":${JSON.stringify(tool_name)},"params":${cappedStr}}`;
914
- return [{
915
- type: "mcp_tool_call",
916
- category: "mcp_tool_call",
917
- data: safeString(payload),
918
- priority: 4,
919
- }];
915
+ const event = {
916
+ type: "mcp_tool_call",
917
+ category: "mcp_tool_call",
918
+ data: safeString(payload),
919
+ priority: 4,
920
+ };
921
+ // Retrieval cost (the OTHER half of the with/without ratio): when this MCP
922
+ // call is a `ctx_search` or `ctx_fetch_and_index` retrieval, the tool_response
923
+ // IS the kept-out content the model paid to access — record its byte length.
924
+ // Sandbox compute (ctx_execute/batch/file) is work-output, NOT retrieval, so
925
+ // it is intentionally excluded. Match by suffix char-algorithmically (host
926
+ // prefixes the name like `mcp__plugin_…__ctx_search`); NO regex.
927
+ if (isRetrievalToolName(tool_name)) {
928
+ const response = safeString(input.tool_response);
929
+ if (response.length > 0) {
930
+ event.bytes_retrieved = Buffer.byteLength(response, "utf8");
931
+ }
932
+ }
933
+ return [event];
934
+ }
935
+ /** Tool-name suffixes that denote a RETRIEVAL call (kept-out content accessed). */
936
+ const RETRIEVAL_TOOL_SUFFIXES = ["ctx_search", "ctx_fetch_and_index"];
937
+ /**
938
+ * True when `toolName` ends with one of the retrieval suffixes. Char-level
939
+ * suffix comparison via String.prototype.endsWith — no regex. MCP host names
940
+ * arrive prefixed (e.g. `mcp__plugin_context-mode_context-mode__ctx_search`),
941
+ * so an exact-name check would miss them; suffix match is host-agnostic.
942
+ */
943
+ function isRetrievalToolName(toolName) {
944
+ for (const suffix of RETRIEVAL_TOOL_SUFFIXES) {
945
+ if (toolName.endsWith(suffix))
946
+ return true;
947
+ }
948
+ return false;
920
949
  }
921
950
  /**
922
951
  * Category 6 (tool-based): decision
@@ -1243,51 +1272,87 @@ function extractFileReadMetadata(input) {
1243
1272
  }];
1244
1273
  }
1245
1274
  /**
1246
- * Per-model USD price table Anthropic public list pricing, $/MTok.
1247
- * Verified against platform.claude.com/docs/en/about-claude/pricing,
1248
- * cloudzero.com, finout.io 2026-06 (cache: 5-min cache_write = 1.25× input,
1249
- * cache_read = 0.10× input). Fast-mode variants (e.g. opus-4-8-fast at
1250
- * $10/$50) are intentionally NOT mapped they ship as separate model
1251
- * ids and would dilute the standard-tier dashboards if blended here.
1275
+ * Per-model USD pricing now lives in the curated multi-vendor catalog
1276
+ * (src/pricing/catalog.ts), which prices each model from ITS OWN row across
1277
+ * Anthropic / OpenAI / Google / Chinese / other vendors. This kills the old
1278
+ * bug where the hardcoded Anthropic-only table here billed every non-Claude
1279
+ * model at Claude-Sonnet's `default` rate. Unknown ids now resolve to a null
1280
+ * cost (one console.warn) instead of a silently wrong Claude rate.
1252
1281
  *
1253
- * NOTE: 16-oss-verify-gap-prd Gap #1 quoted Opus at $15/$75 that is
1254
- * the prior Opus 4 (non-4.7) rate. Opus 4.7 and 4.8 ship at $5/$25.
1282
+ * resolveModelId picks the first non-empty model id from the hook candidates;
1283
+ * date-suffixed ids (e.g. claude-haiku-4-5-20251001) are reduced to a catalog
1284
+ * hit by progressively dropping trailing `-segment` suffixes (NO regex).
1255
1285
  */
1256
- const MODEL_PRICING_USD_PER_MTOK = {
1257
- "claude-opus-4-8": { input: 5.00, output: 25.00, cache_write: 6.25, cache_read: 0.50 },
1258
- "claude-opus-4-7": { input: 5.00, output: 25.00, cache_write: 6.25, cache_read: 0.50 },
1259
- "claude-sonnet-4-6": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
1260
- "claude-haiku-4-5": { input: 1.00, output: 5.00, cache_write: 1.25, cache_read: 0.10 },
1261
- default: { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
1262
- };
1263
- function resolveModelKey(input, parsedResp) {
1286
+ function resolveModelId(input, parsedResp) {
1264
1287
  const candidates = [
1265
1288
  input.tool_input?.model,
1266
1289
  input.model,
1267
1290
  parsedResp.model,
1268
1291
  ];
1269
- const keys = Object.keys(MODEL_PRICING_USD_PER_MTOK).filter((k) => k !== "default");
1270
1292
  for (const c of candidates) {
1271
- if (typeof c !== "string" || c.length === 0)
1272
- continue;
1273
- if (c in MODEL_PRICING_USD_PER_MTOK)
1293
+ if (typeof c === "string" && c.length > 0)
1274
1294
  return c;
1275
- // Prefix match for date-suffixed model ids
1276
- // (e.g. claude-haiku-4-5-20251001 → claude-haiku-4-5)
1277
- for (const key of keys) {
1278
- if (c.startsWith(key))
1279
- return key;
1280
- }
1281
1295
  }
1282
- return "default";
1296
+ return "";
1283
1297
  }
1284
- function computeCostUsd(modelKey, inputTokens, outputTokens, cacheCreationTokens, cacheReadTokens) {
1285
- const price = MODEL_PRICING_USD_PER_MTOK[modelKey] ?? MODEL_PRICING_USD_PER_MTOK.default;
1286
- const totalMicroDollars = inputTokens * price.input +
1287
- outputTokens * price.output +
1288
- cacheCreationTokens * price.cache_write +
1289
- cacheReadTokens * price.cache_read;
1290
- return totalMicroDollars / 1_000_000;
1298
+ /**
1299
+ * Drop one trailing `-<segment>` from a model id, char-algorithmically (no
1300
+ * regex): walks back to the last '-' and returns the head, or null when there
1301
+ * is no usable separator. Lets a date-suffixed id fall back to its base id
1302
+ * (claude-haiku-4-5-20251001 → claude-haiku-4-5 → … ) one segment at a time.
1303
+ */
1304
+ function dropTrailingSegment(id) {
1305
+ for (let i = id.length - 1; i > 0; i--) {
1306
+ if (id.charCodeAt(i) === 45 /* '-' */)
1307
+ return id.slice(0, i);
1308
+ }
1309
+ return null;
1310
+ }
1311
+ /**
1312
+ * Resolve a model id to one the catalog can price: try the raw id, then
1313
+ * progressively trim trailing `-segment` suffixes so a date-suffixed id still
1314
+ * prices off its base model. Probes with lookupPrice (no warn) and returns the
1315
+ * first id that hits, or "" on a full miss — so cost compute warns at most once.
1316
+ */
1317
+ function resolveCatalogId(modelId) {
1318
+ let candidate = modelId;
1319
+ while (candidate && candidate.length > 0) {
1320
+ if (catalogLookupPrice(candidate) !== null)
1321
+ return candidate;
1322
+ candidate = dropTrailingSegment(candidate);
1323
+ }
1324
+ return "";
1325
+ }
1326
+ /**
1327
+ * Cost for a turn via the catalog. Returns null on a price miss (catalog emits
1328
+ * one console.warn of the unmatched id) or when all token buckets are zero.
1329
+ */
1330
+ function computeTurnCostUsd(modelId, inputTokens, outputTokens, cacheCreationTokens, cacheReadTokens) {
1331
+ const resolved = resolveCatalogId(modelId);
1332
+ // Feed the resolved id when found; otherwise pass the raw id so the catalog's
1333
+ // single miss-warning carries the id the operator actually saw.
1334
+ return catalogComputeCostUsd(resolved || modelId, {
1335
+ input_tokens: inputTokens,
1336
+ output_tokens: outputTokens,
1337
+ cache_creation_tokens: cacheCreationTokens,
1338
+ cache_read_tokens: cacheReadTokens,
1339
+ });
1340
+ }
1341
+ /**
1342
+ * Format a cost to a compact `cost_usd` string, char-algorithmically (no
1343
+ * regex). Renders 6 decimals, drops trailing zeros, and keeps a single `.0`
1344
+ * when the fraction trims to empty (e.g. 0 → "0.0"), matching the prior
1345
+ * `.toFixed(6).replace(...)` output exactly.
1346
+ */
1347
+ function formatCostUsd(cost) {
1348
+ let s = cost.toFixed(6);
1349
+ let end = s.length;
1350
+ while (end > 0 && s.charCodeAt(end - 1) === 48 /* '0' */)
1351
+ end--;
1352
+ s = s.slice(0, end);
1353
+ if (s.length > 0 && s.charCodeAt(s.length - 1) === 46 /* '.' */)
1354
+ s += "0";
1355
+ return s;
1291
1356
  }
1292
1357
  /**
1293
1358
  * AgentOutput.usage capture — fires on the Task sub-agent dispatcher.
@@ -1340,29 +1405,607 @@ function extractAgentUsage(input) {
1340
1405
  if (typeof usage.service_tier === "string") {
1341
1406
  parts.push(`tier:${usage.service_tier.slice(0, 32)}`);
1342
1407
  }
1343
- // Gap #1 (16-oss-verify-gap-prd) derive cost_usd from per-model pricing
1344
- // when at least one token count is present. Zero-token case skips cost
1345
- // so dashboard never shows misleading "$0.00 for nothing" rows.
1346
- const inputTokens = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
1347
- const outputTokens = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
1348
- const cacheCreate = typeof usage.cache_creation_input_tokens === "number"
1349
- ? usage.cache_creation_input_tokens
1350
- : 0;
1351
- const cacheRead = typeof usage.cache_read_input_tokens === "number"
1352
- ? usage.cache_read_input_tokens
1353
- : 0;
1354
- const anyTokens = inputTokens > 0 || outputTokens > 0 || cacheCreate > 0 || cacheRead > 0;
1355
- if (anyTokens) {
1356
- const modelKey = resolveModelKey(input, out);
1357
- const cost = computeCostUsd(modelKey, inputTokens, outputTokens, cacheCreate, cacheRead);
1358
- parts.push(`cost_usd:${cost.toFixed(6).replace(/0+$/, "").replace(/\.$/, ".0")}`);
1408
+ // CUMULATIVE-USAGE GUARD (docs/handoff/cumulative-cost-bug.md): a Task
1409
+ // tool_response carries the sub-agent's usage SUMMED across its entire run
1410
+ // every internal turn re-reads the cache, so cache_read reaches the billions.
1411
+ // Pricing that cumulative figure as a single turn produced four-figure
1412
+ // per-event costs ($3,532 with cache_read 4.7B) that poisoned every FinOps
1413
+ // aggregate. We therefore do NOT derive cost_usd here. The raw token counts
1414
+ // stay, tagged usage_scope="task_cumulative", so the platform buckets them as
1415
+ // lifetime spend; real per-turn cost comes only from per-turn signals
1416
+ // (extractTranscriptUsage + each adapter's own session).
1417
+ const modelId = resolveModelId(input, out);
1418
+ // Wave 2b — emit structured top-level fields alongside the colon-string so
1419
+ // the forward envelope (which spreads `...event`) hands the platform typed
1420
+ // columns. Each field is set only when its source signal is present, so the
1421
+ // forward payload stays minimal; cost_usd is omitted on a price miss or a
1422
+ // zero-token turn. The colon-string `data` stays for human/debug + back-compat.
1423
+ const event = {
1424
+ type: "agent_usage",
1425
+ category: "cost",
1426
+ data: safeString(parts.join(" ")),
1427
+ priority: 2,
1428
+ };
1429
+ if (modelId.length > 0)
1430
+ event.model_id = modelId;
1431
+ if (typeof usage.input_tokens === "number")
1432
+ event.input_tokens = usage.input_tokens;
1433
+ if (typeof usage.output_tokens === "number")
1434
+ event.output_tokens = usage.output_tokens;
1435
+ if (typeof usage.cache_read_input_tokens === "number") {
1436
+ event.cache_read_tokens = usage.cache_read_input_tokens;
1359
1437
  }
1360
- return [{
1361
- type: "agent_usage",
1362
- category: "cost",
1363
- data: safeString(parts.join(" ")),
1364
- priority: 2,
1365
- }];
1438
+ if (typeof usage.cache_creation_input_tokens === "number") {
1439
+ event.cache_creation_tokens = usage.cache_creation_input_tokens;
1440
+ }
1441
+ event.usage_scope = "task_cumulative";
1442
+ return [event];
1443
+ }
1444
+ // ── Kimi Code (kimi-code) usage parsers ────────────────────────────────────
1445
+ // Implementation lives in src/adapters/kimi/usage.ts (per adapter ownership);
1446
+ // re-exported here so the hook-reachable session-extract bundle can import the
1447
+ // cursor-gated wire.jsonl reader without a separate per-adapter bundle. The
1448
+ // import is type-only-free (runtime callees buildAgentUsageEvent are hoisted),
1449
+ // so the extract.ts <-> usage.ts cycle is load-order safe.
1450
+ export { parseKimiUsage, extractKimiUsageSince } from "../adapters/kimi/usage.js";
1451
+ // ── Qwen Code (qwen-code) usage parsers ────────────────────────────────────
1452
+ // Implementation lives in src/adapters/qwen-code/usage.ts (per adapter
1453
+ // ownership); re-exported here so the hook-reachable session-extract bundle can
1454
+ // import the cursor-gated chats/<sessionId>.jsonl reader via the shared
1455
+ // loadExtract() loader, exactly like the kimi re-export above. Same load-order
1456
+ // safety: runtime callee buildAgentUsageEvent is hoisted within this module.
1457
+ export { parseQwenUsage, extractQwenUsageSince } from "../adapters/qwen-code/usage.js";
1458
+ /**
1459
+ * Pi (oh-my-pi) per-turn usage parser.
1460
+ *
1461
+ * Maps a Pi `turn_end` payload (`{ message: AssistantMessage }`) to the
1462
+ * `buildAgentUsageEvent` input shape, or null when there is nothing to record.
1463
+ *
1464
+ * Field provenance (adapter-matrix/pi.md @320261f + cited refs):
1465
+ * - usage: AssistantMessage.usage (ai/src/types.ts:521 -> catalog/src/types.ts:100-145)
1466
+ * - model_id: AssistantMessage.model (ai/src/types.ts:510; kept "provider/model" — builder normalizes)
1467
+ * - input: Usage.input -> input_tokens
1468
+ * - output: Usage.output -> output_tokens
1469
+ * - cacheWrite: Usage.cacheWrite -> cache_creation_tokens
1470
+ * - cacheRead: Usage.cacheRead -> cache_read_tokens
1471
+ * - native USD: Usage.cost.total -> native_cost_usd (HIGH confidence; no price-table needed)
1472
+ *
1473
+ * The event is per-turn incremental (per-response usage; anthropic.ts:1893-1901;
1474
+ * "for the turn" catalog/types.ts:103), so each turn_end maps to exactly one
1475
+ * agent_usage event with no cross-turn accumulation.
1476
+ *
1477
+ * Algorithmic + null-safe, NO regex. Accepts either the full TurnEndEvent
1478
+ * (`{ message }`) or a bare AssistantMessage (`{ usage, model }`) so callers
1479
+ * can pass `event` or `event.message` interchangeably. Returns null when the
1480
+ * payload is not an assistant message, carries no usage object, or every token
1481
+ * bucket is zero/absent (an all-zero turn emits no event — matches
1482
+ * buildAgentUsageEvent's own zero->null contract).
1483
+ */
1484
+ export function parsePiUsage(payload) {
1485
+ if (!payload || typeof payload !== "object")
1486
+ return null;
1487
+ const root = payload;
1488
+ // Unwrap TurnEndEvent.message when present; otherwise treat the payload as
1489
+ // the AssistantMessage itself.
1490
+ const maybeMessage = root.message;
1491
+ const message = maybeMessage && typeof maybeMessage === "object"
1492
+ ? maybeMessage
1493
+ : root;
1494
+ // Only assistant turns carry LLM usage. Custom/non-LLM turns are skipped.
1495
+ // Tolerate a missing role (some payloads omit it) but reject an explicit
1496
+ // non-assistant role.
1497
+ if (typeof message.role === "string" && message.role !== "assistant") {
1498
+ return null;
1499
+ }
1500
+ const usageRaw = message.usage;
1501
+ if (!usageRaw || typeof usageRaw !== "object")
1502
+ return null;
1503
+ const usage = usageRaw;
1504
+ const num = (v) => typeof v === "number" && Number.isFinite(v) && v > 0 ? v : 0;
1505
+ const input_tokens = num(usage.input);
1506
+ const output_tokens = num(usage.output);
1507
+ const cache_creation_tokens = num(usage.cacheWrite);
1508
+ const cache_read_tokens = num(usage.cacheRead);
1509
+ // Zero-everything turn → null (mirrors buildAgentUsageEvent's contract; keeps
1510
+ // the DB free of no-op cost events).
1511
+ if (input_tokens <= 0 &&
1512
+ output_tokens <= 0 &&
1513
+ cache_creation_tokens <= 0 &&
1514
+ cache_read_tokens <= 0) {
1515
+ return null;
1516
+ }
1517
+ // Pi-native USD cost lives on usage.cost.total. Preserve it only when finite;
1518
+ // omit (null) on absence so the builder falls back to the pricing catalog.
1519
+ let native_cost_usd = null;
1520
+ const costRaw = usage.cost;
1521
+ if (costRaw && typeof costRaw === "object") {
1522
+ const total = costRaw.total;
1523
+ if (typeof total === "number" && Number.isFinite(total)) {
1524
+ native_cost_usd = total;
1525
+ }
1526
+ }
1527
+ const model_id = typeof message.model === "string" ? message.model : "";
1528
+ return {
1529
+ model_id,
1530
+ input_tokens,
1531
+ output_tokens,
1532
+ cache_creation_tokens,
1533
+ cache_read_tokens,
1534
+ native_cost_usd,
1535
+ };
1536
+ }
1537
+ /**
1538
+ * openclaw `model.usage` diagnostic-event capture — parseOpenclawUsage.
1539
+ *
1540
+ * openclaw exposes a first-class `model.usage` diagnostic event
1541
+ * (`DiagnosticUsageEvent`, refs/platforms/openclaw/src/infra/diagnostic-events.ts:18-47),
1542
+ * emitted once per turn and consumed via `onDiagnosticEvent(listener)`
1543
+ * (diagnostic-events.ts:1156) — the same bus the first-party diagnostics-otel /
1544
+ * diagnostics-prometheus extensions read.
1545
+ *
1546
+ * Field mapping (openclaw → AgentUsageCounts):
1547
+ * evt.usage.input → input_tokens
1548
+ * evt.usage.output → output_tokens
1549
+ * evt.usage.cacheWrite→ cache_creation_tokens (cache-creation)
1550
+ * evt.usage.cacheRead → cache_read_tokens (cache-read)
1551
+ * evt.costUsd → native_cost_usd (pre-computed via estimateUsageCost,
1552
+ * agent-runner.ts:1995 — preferred over catalog)
1553
+ * evt.model → model_id
1554
+ *
1555
+ * CRITICAL: read `evt.usage` (the PER-TURN TOTAL — "Last Turn Total"
1556
+ * agent-runner.ts:943), NEVER `evt.lastCallUsage` (the last-model-call DELTA,
1557
+ * diagnostic-events.ts:34-40). Summing both would double-count.
1558
+ *
1559
+ * Returns AgentUsageCounts (the buildAgentUsageEvent input shape) or null when
1560
+ * the event is not a usage event / carries no usage / sums to zero. Pure,
1561
+ * null-safe, algorithmic — NO regex.
1562
+ */
1563
+ export function parseOpenclawUsage(payload) {
1564
+ if (!payload || typeof payload !== "object")
1565
+ return null;
1566
+ const evt = payload;
1567
+ // Only the `model.usage` diagnostic carries token usage. Tolerate an absent
1568
+ // type (defensive against a thinner payload variant) but reject any explicit
1569
+ // non-usage diagnostic (model.failover, log.record, …).
1570
+ if (typeof evt.type === "string" && evt.type !== "model.usage") {
1571
+ return null;
1572
+ }
1573
+ // PER-TURN TOTAL lives on `usage`. `lastCallUsage` is the last-call delta and
1574
+ // must NOT be consumed — reading it instead would understate (or, when summed
1575
+ // with usage, double-count) the turn.
1576
+ const usageRaw = evt.usage;
1577
+ if (!usageRaw || typeof usageRaw !== "object")
1578
+ return null;
1579
+ const usage = usageRaw;
1580
+ const num = (v) => typeof v === "number" && Number.isFinite(v) && v > 0 ? v : 0;
1581
+ const input_tokens = num(usage.input);
1582
+ const output_tokens = num(usage.output);
1583
+ const cache_creation_tokens = num(usage.cacheWrite);
1584
+ const cache_read_tokens = num(usage.cacheRead);
1585
+ // Zero-everything turn → null (mirrors buildAgentUsageEvent's contract; keeps
1586
+ // the DB free of no-op cost events).
1587
+ if (input_tokens <= 0 &&
1588
+ output_tokens <= 0 &&
1589
+ cache_creation_tokens <= 0 &&
1590
+ cache_read_tokens <= 0) {
1591
+ return null;
1592
+ }
1593
+ // openclaw ships a pre-computed USD cost at the TOP LEVEL (`evt.costUsd`, not
1594
+ // nested under usage). Preserve it only when finite; omit (null) on absence so
1595
+ // the builder falls back to the pricing catalog.
1596
+ const costRaw = evt.costUsd;
1597
+ const native_cost_usd = typeof costRaw === "number" && Number.isFinite(costRaw) ? costRaw : null;
1598
+ const model_id = typeof evt.model === "string" ? evt.model : "";
1599
+ return {
1600
+ model_id,
1601
+ input_tokens,
1602
+ output_tokens,
1603
+ cache_creation_tokens,
1604
+ cache_read_tokens,
1605
+ native_cost_usd,
1606
+ };
1607
+ }
1608
+ /**
1609
+ * opencode per-turn usage parser.
1610
+ *
1611
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
1612
+ * adapter-matrix/opencode.md. opencode tracks usage per *assistant message*; the
1613
+ * usage-bearing payload reaches a plugin via the `message.updated` bus event,
1614
+ * whose `event.properties.info` is the full Message. The assistant token shape
1615
+ * (refs platforms/opencode .../session/message.ts) is:
1616
+ * info.tokens = { input, output, reasoning, cache: { read, write } }
1617
+ * info.cost = USD cost for this message
1618
+ * info.modelID / info.providerID (older refs may expose a single info.model)
1619
+ *
1620
+ * Field mapping (refs message.ts):
1621
+ * tokens.input -> input_tokens
1622
+ * tokens.output -> output_tokens
1623
+ * tokens.cache.read -> cache_read_tokens
1624
+ * tokens.cache.write -> cache_creation_tokens
1625
+ * modelID/providerID -> model_id (`${providerID}/${modelID}` when both present)
1626
+ * cost -> native_cost_usd
1627
+ *
1628
+ * LAST-STEP-SNAPSHOT CAVEAT (refs processor.ts:717-718): message-level
1629
+ * `.tokens` is OVERWRITTEN every step-finish, so it holds the LAST step's usage
1630
+ * — not the turn total. `.cost`, however, ACCUMULATES (`cost += usage.cost`) and
1631
+ * is the correct cumulative turn cost. We therefore pass `info.cost` through as
1632
+ * native_cost_usd so the billed $ is exact even though the token snapshot is
1633
+ * imprecise; the token columns remain best-effort (last-step) telemetry. A true
1634
+ * turn-total token sum would require summing per-step Step.Ended parts, which the
1635
+ * `message.updated` payload does not carry — out of scope for this snapshot-based
1636
+ * capture.
1637
+ *
1638
+ * Accepts either the bus event (`{ properties: { info } }`), the wrapped
1639
+ * `{ event: { properties: { info } } }`, or the bare Message (`info`) so the
1640
+ * caller can hand us whatever the SDK surfaces. NO regex — pure algorithmic,
1641
+ * null-safe traversal. Returns null when the payload is not an assistant
1642
+ * message, carries no tokens object, or every token bucket is zero/absent
1643
+ * (mirrors buildAgentUsageEvent's zero->null contract).
1644
+ */
1645
+ export function parseOpencodeUsage(payload) {
1646
+ if (!payload || typeof payload !== "object")
1647
+ return null;
1648
+ const root = payload;
1649
+ // Unwrap, most-specific first: { event: { properties: { info } } } →
1650
+ // { properties: { info } } → bare message. Each hop is guarded so a missing
1651
+ // layer simply falls through to treating the current object as the message.
1652
+ const eventLayer = root.event && typeof root.event === "object"
1653
+ ? root.event
1654
+ : root;
1655
+ const propsLayer = eventLayer.properties && typeof eventLayer.properties === "object"
1656
+ ? eventLayer.properties
1657
+ : eventLayer;
1658
+ const message = propsLayer.info && typeof propsLayer.info === "object"
1659
+ ? propsLayer.info
1660
+ : root;
1661
+ // Only assistant messages carry token usage. Tolerate a missing role but
1662
+ // reject an explicit non-assistant one.
1663
+ if (typeof message.role === "string" && message.role !== "assistant") {
1664
+ return null;
1665
+ }
1666
+ const tokensRaw = message.tokens;
1667
+ if (!tokensRaw || typeof tokensRaw !== "object")
1668
+ return null;
1669
+ const tokens = tokensRaw;
1670
+ const num = (v) => typeof v === "number" && Number.isFinite(v) && v > 0 ? v : 0;
1671
+ const cacheRaw = tokens.cache;
1672
+ const cache = cacheRaw && typeof cacheRaw === "object"
1673
+ ? cacheRaw
1674
+ : {};
1675
+ const input_tokens = num(tokens.input);
1676
+ const output_tokens = num(tokens.output);
1677
+ const cache_read_tokens = num(cache.read);
1678
+ const cache_creation_tokens = num(cache.write);
1679
+ // Zero-everything turn → null (keeps the DB free of no-op cost events).
1680
+ if (input_tokens <= 0 &&
1681
+ output_tokens <= 0 &&
1682
+ cache_creation_tokens <= 0 &&
1683
+ cache_read_tokens <= 0) {
1684
+ return null;
1685
+ }
1686
+ // Native cumulative USD cost (preferred — exact, immune to the last-step
1687
+ // token-snapshot imprecision). Omit (null) on absence so the builder falls
1688
+ // back to the pricing catalog over the last-step token columns.
1689
+ const costRaw = message.cost;
1690
+ const native_cost_usd = typeof costRaw === "number" && Number.isFinite(costRaw) ? costRaw : null;
1691
+ // Billed model id. Prefer the `${providerID}/${modelID}` pair (how opencode
1692
+ // itself addresses the model); fall back to a bare modelID, then a single
1693
+ // `model` string (older refs shape). Empty when none present.
1694
+ const modelID = typeof message.modelID === "string" ? message.modelID : "";
1695
+ const providerID = typeof message.providerID === "string" ? message.providerID : "";
1696
+ let model_id = "";
1697
+ if (modelID.length > 0) {
1698
+ model_id = providerID.length > 0 ? `${providerID}/${modelID}` : modelID;
1699
+ }
1700
+ else if (typeof message.model === "string") {
1701
+ model_id = message.model;
1702
+ }
1703
+ return {
1704
+ model_id,
1705
+ input_tokens,
1706
+ output_tokens,
1707
+ cache_creation_tokens,
1708
+ cache_read_tokens,
1709
+ native_cost_usd,
1710
+ };
1711
+ }
1712
+ /**
1713
+ * Build a structured `agent_usage` event from summed per-model token counts.
1714
+ * Emits the colon-string `data` (human/debug + back-compat) AND the structured
1715
+ * top-level fields the forward envelope spreads to the platform. cost_usd via
1716
+ * the pricing catalog — omitted on a price miss. Returns null when every token
1717
+ * bucket is zero/absent (so an all-zero model emits no event).
1718
+ */
1719
+ export function buildAgentUsageEvent(counts) {
1720
+ const { model_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, native_cost_usd } = counts;
1721
+ if (input_tokens <= 0 && output_tokens <= 0 && cache_creation_tokens <= 0 && cache_read_tokens <= 0) {
1722
+ return null;
1723
+ }
1724
+ const parts = [`tokens_in:${input_tokens}`, `tokens_out:${output_tokens}`];
1725
+ if (cache_creation_tokens > 0)
1726
+ parts.push(`cache_create:${cache_creation_tokens}`);
1727
+ if (cache_read_tokens > 0)
1728
+ parts.push(`cache_read:${cache_read_tokens}`);
1729
+ const cost = (typeof native_cost_usd === "number" && Number.isFinite(native_cost_usd))
1730
+ ? native_cost_usd
1731
+ : computeTurnCostUsd(model_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens);
1732
+ if (cost !== null)
1733
+ parts.push(`cost_usd:${formatCostUsd(cost)}`);
1734
+ const event = {
1735
+ type: "agent_usage",
1736
+ category: "cost",
1737
+ data: safeString(parts.join(" ")),
1738
+ priority: 2,
1739
+ };
1740
+ if (model_id.length > 0)
1741
+ event.model_id = model_id;
1742
+ event.input_tokens = input_tokens;
1743
+ event.output_tokens = output_tokens;
1744
+ if (cache_read_tokens > 0)
1745
+ event.cache_read_tokens = cache_read_tokens;
1746
+ if (cache_creation_tokens > 0)
1747
+ event.cache_creation_tokens = cache_creation_tokens;
1748
+ if (cost !== null)
1749
+ event.cost_usd = cost;
1750
+ return event;
1751
+ }
1752
+ /**
1753
+ * gemini-cli AfterModel usage capture — parse ONE AfterModel hook payload into
1754
+ * a builder `agent_usage` event (or null). Pure, null-safe, struct-only — NO regex.
1755
+ *
1756
+ * Refs (docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md):
1757
+ * - AfterModel fires per model call inside the gemini-cli stream loop
1758
+ * (geminiChat.ts:1213); the hook input carries `llm_request` + `llm_response`
1759
+ * (hooks/types.ts:692-695).
1760
+ * - `llm_response.usageMetadata` exposes promptTokenCount / candidatesTokenCount
1761
+ * / totalTokenCount (hookTranslator.ts:60-64).
1762
+ * - model_id = `response.modelVersion || req.model` (loggingContentGenerator.ts:405,553).
1763
+ *
1764
+ * Mapping → builder shape:
1765
+ * promptTokenCount → input_tokens
1766
+ * candidatesTokenCount → output_tokens
1767
+ * thoughtsTokenCount → ADDED into output_tokens (Gemini bills reasoning as output)
1768
+ * cachedContentTokenCount → cache_read_tokens (when present)
1769
+ * model_id → response.modelVersion || llm_request.model
1770
+ *
1771
+ * CAVEAT — the DECOUPLED AfterModel payload (hookTranslator.ts:60-64) forwards
1772
+ * only prompt/candidates/total and DROPS cachedContentTokenCount +
1773
+ * thoughtsTokenCount. We map those two defensively WHEN PRESENT (richer payload
1774
+ * variant / future fix / OTel-fed input) but never depend on them — the common
1775
+ * case is input+output only. For full cached/thoughts fidelity the OTel
1776
+ * `api_response` exporter or the chat-recording JSON is the source of record.
1777
+ *
1778
+ * MULTI-CALL TURNS — one user turn that triggers tool calls spans MULTIPLE
1779
+ * model calls, each AfterModel cumulative within itself. This fn emits ONE
1780
+ * priced event PER AfterModel call (each call is one billed round-trip).
1781
+ * Per-userPromptId summation into a single per-turn total is DEFERRED — emitting
1782
+ * per-call never double-counts, since each call's usageMetadata is the
1783
+ * authoritative total for that call.
1784
+ */
1785
+ export function parseGeminiUsage(afterModelPayload) {
1786
+ if (!afterModelPayload || typeof afterModelPayload !== "object")
1787
+ return null;
1788
+ const payload = afterModelPayload;
1789
+ const resp = payload.llm_response;
1790
+ if (!resp || typeof resp !== "object")
1791
+ return null;
1792
+ const response = resp;
1793
+ const um = response.usageMetadata;
1794
+ if (!um || typeof um !== "object")
1795
+ return null;
1796
+ const usage = um;
1797
+ const num = (v) => (typeof v === "number" && Number.isFinite(v) ? v : 0);
1798
+ const input = num(usage.promptTokenCount);
1799
+ const candidates = num(usage.candidatesTokenCount);
1800
+ const thoughts = num(usage.thoughtsTokenCount);
1801
+ const cached = num(usage.cachedContentTokenCount);
1802
+ // Gemini bills reasoning (thoughts) as output tokens — fold into output.
1803
+ const output = candidates + thoughts;
1804
+ // model_id = response.modelVersion (server-confirmed) || llm_request.model.
1805
+ const req = payload.llm_request;
1806
+ const reqModel = req && typeof req === "object" && typeof req.model === "string"
1807
+ ? req.model
1808
+ : "";
1809
+ const modelVersion = typeof response.modelVersion === "string" ? response.modelVersion : "";
1810
+ const modelId = modelVersion.length > 0 ? modelVersion : reqModel;
1811
+ // gemini exposes no native cost — cost_usd is derived from the pricing catalog
1812
+ // inside buildAgentUsageEvent (native_cost_usd omitted). All-zero ⇒ null.
1813
+ return buildAgentUsageEvent({
1814
+ model_id: modelId,
1815
+ input_tokens: input,
1816
+ output_tokens: output,
1817
+ cache_creation_tokens: 0,
1818
+ cache_read_tokens: cached,
1819
+ });
1820
+ }
1821
+ /**
1822
+ * claude-code MAIN-turn usage capture — the dominant-spend path the Task
1823
+ * subagent capture (extractAgentUsage) misses. Parses the session transcript
1824
+ * JSONL char-algorithmically (NO regex): each `type:"assistant"` line carries
1825
+ * `message.usage` + `message.model`, and usage is a per-turn DELTA, so summing
1826
+ * the assistant turns per model = the exact billed total. `isSidechain:true`
1827
+ * lines are Task-subagent sidechains written to a SEPARATE transcript (refs:
1828
+ * sessionStorage.ts:1042) — excluding them keeps the main-turn sum from
1829
+ * double-counting the separate Task-subagent capture. Emits one structured
1830
+ * `agent_usage` event per distinct model.
1831
+ */
1832
+ export function extractTranscriptUsage(transcript) {
1833
+ if (typeof transcript !== "string" || transcript.length === 0)
1834
+ return [];
1835
+ const sums = new Map();
1836
+ let start = 0;
1837
+ for (let i = 0; i <= transcript.length; i++) {
1838
+ if (i !== transcript.length && transcript.charCodeAt(i) !== 10 /* \n */)
1839
+ continue;
1840
+ const line = transcript.slice(start, i).trim();
1841
+ start = i + 1;
1842
+ if (line.length === 0)
1843
+ continue;
1844
+ let obj;
1845
+ try {
1846
+ const p = JSON.parse(line);
1847
+ if (!p || typeof p !== "object")
1848
+ continue;
1849
+ obj = p;
1850
+ }
1851
+ catch {
1852
+ continue;
1853
+ }
1854
+ if (obj.type !== "assistant" || obj.isSidechain === true)
1855
+ continue;
1856
+ const msg = obj.message;
1857
+ if (!msg || typeof msg !== "object")
1858
+ continue;
1859
+ const m = msg;
1860
+ const model = typeof m.model === "string" ? m.model : "";
1861
+ if (model.length === 0)
1862
+ continue;
1863
+ const u = m.usage;
1864
+ if (!u || typeof u !== "object")
1865
+ continue;
1866
+ const usage = u;
1867
+ const cur = sums.get(model) ?? { input: 0, output: 0, cacheCreate: 0, cacheRead: 0 };
1868
+ if (typeof usage.input_tokens === "number")
1869
+ cur.input += usage.input_tokens;
1870
+ if (typeof usage.output_tokens === "number")
1871
+ cur.output += usage.output_tokens;
1872
+ if (typeof usage.cache_creation_input_tokens === "number")
1873
+ cur.cacheCreate += usage.cache_creation_input_tokens;
1874
+ if (typeof usage.cache_read_input_tokens === "number")
1875
+ cur.cacheRead += usage.cache_read_input_tokens;
1876
+ sums.set(model, cur);
1877
+ }
1878
+ const events = [];
1879
+ for (const [model, s] of sums) {
1880
+ const ev = buildAgentUsageEvent({
1881
+ model_id: model,
1882
+ input_tokens: s.input,
1883
+ output_tokens: s.output,
1884
+ cache_creation_tokens: s.cacheCreate,
1885
+ cache_read_tokens: s.cacheRead,
1886
+ });
1887
+ if (ev)
1888
+ events.push(ev);
1889
+ }
1890
+ return events;
1891
+ }
1892
+ /**
1893
+ * Cursor-aware variant of extractTranscriptUsage for the Stop hook.
1894
+ *
1895
+ * The transcript grows every turn and the forward loop forwards ALL passed
1896
+ * events unconditionally, so re-running extractTranscriptUsage on the whole
1897
+ * transcript each Stop would double-count every prior turn. This walks only
1898
+ * the turns NEW since the last Stop, keyed by a per-session high-water cursor
1899
+ * (the `uuid` of the last assistant turn seen).
1900
+ *
1901
+ * - sinceUuid null/empty → process ALL non-sidechain assistant turns.
1902
+ * - sinceUuid found → process only turns AFTER it (exclusive).
1903
+ * - sinceUuid set but NOT found (transcript compaction dropped it) → process
1904
+ * ONLY THE LAST non-sidechain assistant turn. Bounded by design: we never
1905
+ * re-emit the whole history when the cursor falls off the front.
1906
+ *
1907
+ * `cursor` returns the uuid of the LAST non-sidechain assistant turn in the
1908
+ * transcript (whether or not it carried usage), so the next Stop resumes
1909
+ * exactly past it. When the transcript has no such turn, the input cursor is
1910
+ * returned unchanged. Same char-algorithmic JSONL parse (NO regex), same
1911
+ * sidechain exclusion, same buildAgentUsageEvent emission path.
1912
+ */
1913
+ export function extractTranscriptUsageSince(transcript, sinceUuid) {
1914
+ const inputCursor = typeof sinceUuid === "string" && sinceUuid.length > 0 ? sinceUuid : null;
1915
+ if (typeof transcript !== "string" || transcript.length === 0) {
1916
+ return { events: [], cursor: inputCursor };
1917
+ }
1918
+ const turns = [];
1919
+ let start = 0;
1920
+ for (let i = 0; i <= transcript.length; i++) {
1921
+ if (i !== transcript.length && transcript.charCodeAt(i) !== 10 /* \n */)
1922
+ continue;
1923
+ const line = transcript.slice(start, i).trim();
1924
+ start = i + 1;
1925
+ if (line.length === 0)
1926
+ continue;
1927
+ let obj;
1928
+ try {
1929
+ const p = JSON.parse(line);
1930
+ if (!p || typeof p !== "object")
1931
+ continue;
1932
+ obj = p;
1933
+ }
1934
+ catch {
1935
+ continue;
1936
+ }
1937
+ if (obj.type !== "assistant" || obj.isSidechain === true)
1938
+ continue;
1939
+ const msg = obj.message;
1940
+ if (!msg || typeof msg !== "object")
1941
+ continue;
1942
+ const m = msg;
1943
+ const model = typeof m.model === "string" ? m.model : "";
1944
+ if (model.length === 0)
1945
+ continue;
1946
+ const uuid = typeof obj.uuid === "string" && obj.uuid.length > 0 ? obj.uuid : null;
1947
+ const u = m.usage;
1948
+ const usage = u && typeof u === "object" ? u : {};
1949
+ turns.push({
1950
+ uuid,
1951
+ model,
1952
+ input: typeof usage.input_tokens === "number" ? usage.input_tokens : 0,
1953
+ output: typeof usage.output_tokens === "number" ? usage.output_tokens : 0,
1954
+ cacheCreate: typeof usage.cache_creation_input_tokens === "number" ? usage.cache_creation_input_tokens : 0,
1955
+ cacheRead: typeof usage.cache_read_input_tokens === "number" ? usage.cache_read_input_tokens : 0,
1956
+ });
1957
+ }
1958
+ // No assistant turns at all → nothing to emit, cursor unchanged.
1959
+ if (turns.length === 0)
1960
+ return { events: [], cursor: inputCursor };
1961
+ // Cursor always advances to the last assistant turn's uuid (or stays as the
1962
+ // input cursor if that last turn has no uuid).
1963
+ const lastUuid = turns[turns.length - 1].uuid;
1964
+ const cursor = lastUuid !== null ? lastUuid : inputCursor;
1965
+ // Select the slice to process.
1966
+ let slice;
1967
+ if (inputCursor === null) {
1968
+ slice = turns; // all turns
1969
+ }
1970
+ else {
1971
+ let foundAt = -1;
1972
+ for (let i = 0; i < turns.length; i++) {
1973
+ if (turns[i].uuid === inputCursor) {
1974
+ foundAt = i;
1975
+ break;
1976
+ }
1977
+ }
1978
+ if (foundAt >= 0) {
1979
+ slice = turns.slice(foundAt + 1); // strictly after the cursor
1980
+ }
1981
+ else {
1982
+ // Compaction: cursor fell off the front. Bounded fallback — last turn only.
1983
+ slice = turns.slice(turns.length - 1);
1984
+ }
1985
+ }
1986
+ // Sum the selected turns per model and emit via the shared event builder.
1987
+ const sums = new Map();
1988
+ for (const t of slice) {
1989
+ const cur = sums.get(t.model) ?? { input: 0, output: 0, cacheCreate: 0, cacheRead: 0 };
1990
+ cur.input += t.input;
1991
+ cur.output += t.output;
1992
+ cur.cacheCreate += t.cacheCreate;
1993
+ cur.cacheRead += t.cacheRead;
1994
+ sums.set(t.model, cur);
1995
+ }
1996
+ const events = [];
1997
+ for (const [model, s] of sums) {
1998
+ const ev = buildAgentUsageEvent({
1999
+ model_id: model,
2000
+ input_tokens: s.input,
2001
+ output_tokens: s.output,
2002
+ cache_creation_tokens: s.cacheCreate,
2003
+ cache_read_tokens: s.cacheRead,
2004
+ });
2005
+ if (ev)
2006
+ events.push(ev);
2007
+ }
2008
+ return { events, cursor };
1366
2009
  }
1367
2010
  // ── User-message extractors ────────────────────────────────────────────────
1368
2011
  /**