context-mode 1.0.166 → 1.0.167

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.codex-plugin/plugin.json +1 -1
  4. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  5. package/.openclaw-plugin/package.json +1 -1
  6. package/README.md +6 -4
  7. package/build/adapters/codex/usage.d.ts +107 -0
  8. package/build/adapters/codex/usage.js +227 -0
  9. package/build/adapters/gemini-cli/hooks.d.ts +7 -1
  10. package/build/adapters/gemini-cli/hooks.js +9 -1
  11. package/build/adapters/gemini-cli/index.js +11 -0
  12. package/build/adapters/kimi/paths.d.ts +20 -0
  13. package/build/adapters/kimi/paths.js +41 -1
  14. package/build/adapters/kimi/usage.d.ts +82 -0
  15. package/build/adapters/kimi/usage.js +217 -0
  16. package/build/adapters/omp/plugin.d.ts +6 -0
  17. package/build/adapters/omp/plugin.js +87 -2
  18. package/build/adapters/omp/usage.d.ts +49 -0
  19. package/build/adapters/omp/usage.js +110 -0
  20. package/build/adapters/openclaw/plugin.d.ts +10 -0
  21. package/build/adapters/openclaw/plugin.js +57 -0
  22. package/build/adapters/openclaw/usage.d.ts +34 -0
  23. package/build/adapters/openclaw/usage.js +52 -0
  24. package/build/adapters/opencode/plugin.d.ts +17 -0
  25. package/build/adapters/opencode/plugin.js +40 -1
  26. package/build/adapters/pi/extension.js +34 -1
  27. package/build/adapters/qwen-code/index.js +23 -1
  28. package/build/adapters/qwen-code/usage.d.ts +90 -0
  29. package/build/adapters/qwen-code/usage.js +222 -0
  30. package/build/session/db.d.ts +11 -0
  31. package/build/session/db.js +33 -0
  32. package/build/session/extract.d.ts +208 -0
  33. package/build/session/extract.js +670 -43
  34. package/build/session/model-prices.json +429 -0
  35. package/build/session/pricing.d.ts +64 -0
  36. package/build/session/pricing.js +151 -0
  37. package/cli.bundle.mjs +62 -62
  38. package/configs/antigravity-cli/plugin.json +1 -1
  39. package/configs/copilot-cli/.github/plugin/plugin.json +1 -1
  40. package/configs/gemini-cli/settings.json +11 -0
  41. package/hooks/codex/stop.mjs +91 -4
  42. package/hooks/gemini-cli/aftermodel.mjs +70 -0
  43. package/hooks/kimi/stop.mjs +74 -3
  44. package/hooks/qwen-code/platform.mjs +1 -0
  45. package/hooks/qwen-code/stop.mjs +168 -0
  46. package/hooks/session-db.bundle.mjs +7 -7
  47. package/hooks/session-extract.bundle.mjs +3 -2
  48. package/hooks/session-loaders.mjs +9 -1
  49. package/hooks/stop.mjs +35 -2
  50. package/openclaw.plugin.json +1 -1
  51. package/package.json +1 -1
  52. package/server.bundle.mjs +90 -90
@@ -4,6 +4,7 @@
4
4
  *
5
5
  * All 13 event categories as specified in PRD Section 3.
6
6
  */
7
+ import { lookupPrice as catalogLookupPrice, computeCostUsd as catalogComputeCostUsd, } from "./pricing.js";
7
8
  // ── Internal helpers ───────────────────────────────────────────────────────
8
9
  /** Null-safe string coercion — no truncation, preserves full data. */
9
10
  function safeString(value) {
@@ -1243,51 +1244,87 @@ function extractFileReadMetadata(input) {
1243
1244
  }];
1244
1245
  }
1245
1246
  /**
1246
- * Per-model USD price table Anthropic public list pricing, $/MTok.
1247
- * Verified against platform.claude.com/docs/en/about-claude/pricing,
1248
- * cloudzero.com, finout.io 2026-06 (cache: 5-min cache_write = 1.25× input,
1249
- * cache_read = 0.10× input). Fast-mode variants (e.g. opus-4-8-fast at
1250
- * $10/$50) are intentionally NOT mapped they ship as separate model
1251
- * ids and would dilute the standard-tier dashboards if blended here.
1247
+ * Per-model USD pricing now lives in the curated multi-vendor catalog
1248
+ * (src/pricing/catalog.ts), which prices each model from ITS OWN row across
1249
+ * Anthropic / OpenAI / Google / Chinese / other vendors. This kills the old
1250
+ * bug where the hardcoded Anthropic-only table here billed every non-Claude
1251
+ * model at Claude-Sonnet's `default` rate. Unknown ids now resolve to a null
1252
+ * cost (one console.warn) instead of a silently wrong Claude rate.
1252
1253
  *
1253
- * NOTE: 16-oss-verify-gap-prd Gap #1 quoted Opus at $15/$75 that is
1254
- * the prior Opus 4 (non-4.7) rate. Opus 4.7 and 4.8 ship at $5/$25.
1254
+ * resolveModelId picks the first non-empty model id from the hook candidates;
1255
+ * date-suffixed ids (e.g. claude-haiku-4-5-20251001) are reduced to a catalog
1256
+ * hit by progressively dropping trailing `-segment` suffixes (NO regex).
1255
1257
  */
1256
- const MODEL_PRICING_USD_PER_MTOK = {
1257
- "claude-opus-4-8": { input: 5.00, output: 25.00, cache_write: 6.25, cache_read: 0.50 },
1258
- "claude-opus-4-7": { input: 5.00, output: 25.00, cache_write: 6.25, cache_read: 0.50 },
1259
- "claude-sonnet-4-6": { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
1260
- "claude-haiku-4-5": { input: 1.00, output: 5.00, cache_write: 1.25, cache_read: 0.10 },
1261
- default: { input: 3.00, output: 15.00, cache_write: 3.75, cache_read: 0.30 },
1262
- };
1263
- function resolveModelKey(input, parsedResp) {
1258
+ function resolveModelId(input, parsedResp) {
1264
1259
  const candidates = [
1265
1260
  input.tool_input?.model,
1266
1261
  input.model,
1267
1262
  parsedResp.model,
1268
1263
  ];
1269
- const keys = Object.keys(MODEL_PRICING_USD_PER_MTOK).filter((k) => k !== "default");
1270
1264
  for (const c of candidates) {
1271
- if (typeof c !== "string" || c.length === 0)
1272
- continue;
1273
- if (c in MODEL_PRICING_USD_PER_MTOK)
1265
+ if (typeof c === "string" && c.length > 0)
1274
1266
  return c;
1275
- // Prefix match for date-suffixed model ids
1276
- // (e.g. claude-haiku-4-5-20251001 → claude-haiku-4-5)
1277
- for (const key of keys) {
1278
- if (c.startsWith(key))
1279
- return key;
1280
- }
1281
1267
  }
1282
- return "default";
1268
+ return "";
1269
+ }
1270
+ /**
1271
+ * Drop one trailing `-<segment>` from a model id, char-algorithmically (no
1272
+ * regex): walks back to the last '-' and returns the head, or null when there
1273
+ * is no usable separator. Lets a date-suffixed id fall back to its base id
1274
+ * (claude-haiku-4-5-20251001 → claude-haiku-4-5 → … ) one segment at a time.
1275
+ */
1276
+ function dropTrailingSegment(id) {
1277
+ for (let i = id.length - 1; i > 0; i--) {
1278
+ if (id.charCodeAt(i) === 45 /* '-' */)
1279
+ return id.slice(0, i);
1280
+ }
1281
+ return null;
1283
1282
  }
1284
- function computeCostUsd(modelKey, inputTokens, outputTokens, cacheCreationTokens, cacheReadTokens) {
1285
- const price = MODEL_PRICING_USD_PER_MTOK[modelKey] ?? MODEL_PRICING_USD_PER_MTOK.default;
1286
- const totalMicroDollars = inputTokens * price.input +
1287
- outputTokens * price.output +
1288
- cacheCreationTokens * price.cache_write +
1289
- cacheReadTokens * price.cache_read;
1290
- return totalMicroDollars / 1_000_000;
1283
+ /**
1284
+ * Resolve a model id to one the catalog can price: try the raw id, then
1285
+ * progressively trim trailing `-segment` suffixes so a date-suffixed id still
1286
+ * prices off its base model. Probes with lookupPrice (no warn) and returns the
1287
+ * first id that hits, or "" on a full miss — so cost compute warns at most once.
1288
+ */
1289
+ function resolveCatalogId(modelId) {
1290
+ let candidate = modelId;
1291
+ while (candidate && candidate.length > 0) {
1292
+ if (catalogLookupPrice(candidate) !== null)
1293
+ return candidate;
1294
+ candidate = dropTrailingSegment(candidate);
1295
+ }
1296
+ return "";
1297
+ }
1298
+ /**
1299
+ * Cost for a turn via the catalog. Returns null on a price miss (catalog emits
1300
+ * one console.warn of the unmatched id) or when all token buckets are zero.
1301
+ */
1302
+ function computeTurnCostUsd(modelId, inputTokens, outputTokens, cacheCreationTokens, cacheReadTokens) {
1303
+ const resolved = resolveCatalogId(modelId);
1304
+ // Feed the resolved id when found; otherwise pass the raw id so the catalog's
1305
+ // single miss-warning carries the id the operator actually saw.
1306
+ return catalogComputeCostUsd(resolved || modelId, {
1307
+ input_tokens: inputTokens,
1308
+ output_tokens: outputTokens,
1309
+ cache_creation_tokens: cacheCreationTokens,
1310
+ cache_read_tokens: cacheReadTokens,
1311
+ });
1312
+ }
1313
+ /**
1314
+ * Format a cost to a compact `cost_usd` string, char-algorithmically (no
1315
+ * regex). Renders 6 decimals, drops trailing zeros, and keeps a single `.0`
1316
+ * when the fraction trims to empty (e.g. 0 → "0.0"), matching the prior
1317
+ * `.toFixed(6).replace(...)` output exactly.
1318
+ */
1319
+ function formatCostUsd(cost) {
1320
+ let s = cost.toFixed(6);
1321
+ let end = s.length;
1322
+ while (end > 0 && s.charCodeAt(end - 1) === 48 /* '0' */)
1323
+ end--;
1324
+ s = s.slice(0, end);
1325
+ if (s.length > 0 && s.charCodeAt(s.length - 1) === 46 /* '.' */)
1326
+ s += "0";
1327
+ return s;
1291
1328
  }
1292
1329
  /**
1293
1330
  * AgentOutput.usage capture — fires on the Task sub-agent dispatcher.
@@ -1351,18 +1388,608 @@ function extractAgentUsage(input) {
1351
1388
  const cacheRead = typeof usage.cache_read_input_tokens === "number"
1352
1389
  ? usage.cache_read_input_tokens
1353
1390
  : 0;
1391
+ const modelId = resolveModelId(input, out);
1354
1392
  const anyTokens = inputTokens > 0 || outputTokens > 0 || cacheCreate > 0 || cacheRead > 0;
1393
+ let cost = null;
1355
1394
  if (anyTokens) {
1356
- const modelKey = resolveModelKey(input, out);
1357
- const cost = computeCostUsd(modelKey, inputTokens, outputTokens, cacheCreate, cacheRead);
1358
- parts.push(`cost_usd:${cost.toFixed(6).replace(/0+$/, "").replace(/\.$/, ".0")}`);
1395
+ // null unmatched model id (catalog warned once) — skip the cost token
1396
+ // rather than blend a wrong Claude rate (the old non-Claude bug).
1397
+ cost = computeTurnCostUsd(modelId, inputTokens, outputTokens, cacheCreate, cacheRead);
1398
+ if (cost !== null)
1399
+ parts.push(`cost_usd:${formatCostUsd(cost)}`);
1400
+ }
1401
+ // Wave 2b — emit structured top-level fields alongside the colon-string so
1402
+ // the forward envelope (which spreads `...event`) hands the platform typed
1403
+ // columns. Each field is set only when its source signal is present, so the
1404
+ // forward payload stays minimal; cost_usd is omitted on a price miss or a
1405
+ // zero-token turn. The colon-string `data` stays for human/debug + back-compat.
1406
+ const event = {
1407
+ type: "agent_usage",
1408
+ category: "cost",
1409
+ data: safeString(parts.join(" ")),
1410
+ priority: 2,
1411
+ };
1412
+ if (modelId.length > 0)
1413
+ event.model_id = modelId;
1414
+ if (typeof usage.input_tokens === "number")
1415
+ event.input_tokens = usage.input_tokens;
1416
+ if (typeof usage.output_tokens === "number")
1417
+ event.output_tokens = usage.output_tokens;
1418
+ if (typeof usage.cache_read_input_tokens === "number") {
1419
+ event.cache_read_tokens = usage.cache_read_input_tokens;
1359
1420
  }
1360
- return [{
1361
- type: "agent_usage",
1362
- category: "cost",
1363
- data: safeString(parts.join(" ")),
1364
- priority: 2,
1365
- }];
1421
+ if (typeof usage.cache_creation_input_tokens === "number") {
1422
+ event.cache_creation_tokens = usage.cache_creation_input_tokens;
1423
+ }
1424
+ if (cost !== null)
1425
+ event.cost_usd = cost;
1426
+ return [event];
1427
+ }
1428
+ // ── Kimi Code (kimi-code) usage parsers ────────────────────────────────────
1429
+ // Implementation lives in src/adapters/kimi/usage.ts (per adapter ownership);
1430
+ // re-exported here so the hook-reachable session-extract bundle can import the
1431
+ // cursor-gated wire.jsonl reader without a separate per-adapter bundle. The
1432
+ // import is type-only-free (runtime callees buildAgentUsageEvent are hoisted),
1433
+ // so the extract.ts <-> usage.ts cycle is load-order safe.
1434
+ export { parseKimiUsage, extractKimiUsageSince } from "../adapters/kimi/usage.js";
1435
+ // ── Qwen Code (qwen-code) usage parsers ────────────────────────────────────
1436
+ // Implementation lives in src/adapters/qwen-code/usage.ts (per adapter
1437
+ // ownership); re-exported here so the hook-reachable session-extract bundle can
1438
+ // import the cursor-gated chats/<sessionId>.jsonl reader via the shared
1439
+ // loadExtract() loader, exactly like the kimi re-export above. Same load-order
1440
+ // safety: runtime callee buildAgentUsageEvent is hoisted within this module.
1441
+ export { parseQwenUsage, extractQwenUsageSince } from "../adapters/qwen-code/usage.js";
1442
+ /**
1443
+ * Pi (oh-my-pi) per-turn usage parser.
1444
+ *
1445
+ * Maps a Pi `turn_end` payload (`{ message: AssistantMessage }`) to the
1446
+ * `buildAgentUsageEvent` input shape, or null when there is nothing to record.
1447
+ *
1448
+ * Field provenance (adapter-matrix/pi.md @320261f + cited refs):
1449
+ * - usage: AssistantMessage.usage (ai/src/types.ts:521 -> catalog/src/types.ts:100-145)
1450
+ * - model_id: AssistantMessage.model (ai/src/types.ts:510; kept "provider/model" — builder normalizes)
1451
+ * - input: Usage.input -> input_tokens
1452
+ * - output: Usage.output -> output_tokens
1453
+ * - cacheWrite: Usage.cacheWrite -> cache_creation_tokens
1454
+ * - cacheRead: Usage.cacheRead -> cache_read_tokens
1455
+ * - native USD: Usage.cost.total -> native_cost_usd (HIGH confidence; no price-table needed)
1456
+ *
1457
+ * The event is per-turn incremental (per-response usage; anthropic.ts:1893-1901;
1458
+ * "for the turn" catalog/types.ts:103), so each turn_end maps to exactly one
1459
+ * agent_usage event with no cross-turn accumulation.
1460
+ *
1461
+ * Algorithmic + null-safe, NO regex. Accepts either the full TurnEndEvent
1462
+ * (`{ message }`) or a bare AssistantMessage (`{ usage, model }`) so callers
1463
+ * can pass `event` or `event.message` interchangeably. Returns null when the
1464
+ * payload is not an assistant message, carries no usage object, or every token
1465
+ * bucket is zero/absent (an all-zero turn emits no event — matches
1466
+ * buildAgentUsageEvent's own zero->null contract).
1467
+ */
1468
+ export function parsePiUsage(payload) {
1469
+ if (!payload || typeof payload !== "object")
1470
+ return null;
1471
+ const root = payload;
1472
+ // Unwrap TurnEndEvent.message when present; otherwise treat the payload as
1473
+ // the AssistantMessage itself.
1474
+ const maybeMessage = root.message;
1475
+ const message = maybeMessage && typeof maybeMessage === "object"
1476
+ ? maybeMessage
1477
+ : root;
1478
+ // Only assistant turns carry LLM usage. Custom/non-LLM turns are skipped.
1479
+ // Tolerate a missing role (some payloads omit it) but reject an explicit
1480
+ // non-assistant role.
1481
+ if (typeof message.role === "string" && message.role !== "assistant") {
1482
+ return null;
1483
+ }
1484
+ const usageRaw = message.usage;
1485
+ if (!usageRaw || typeof usageRaw !== "object")
1486
+ return null;
1487
+ const usage = usageRaw;
1488
+ const num = (v) => typeof v === "number" && Number.isFinite(v) && v > 0 ? v : 0;
1489
+ const input_tokens = num(usage.input);
1490
+ const output_tokens = num(usage.output);
1491
+ const cache_creation_tokens = num(usage.cacheWrite);
1492
+ const cache_read_tokens = num(usage.cacheRead);
1493
+ // Zero-everything turn → null (mirrors buildAgentUsageEvent's contract; keeps
1494
+ // the DB free of no-op cost events).
1495
+ if (input_tokens <= 0 &&
1496
+ output_tokens <= 0 &&
1497
+ cache_creation_tokens <= 0 &&
1498
+ cache_read_tokens <= 0) {
1499
+ return null;
1500
+ }
1501
+ // Pi-native USD cost lives on usage.cost.total. Preserve it only when finite;
1502
+ // omit (null) on absence so the builder falls back to the pricing catalog.
1503
+ let native_cost_usd = null;
1504
+ const costRaw = usage.cost;
1505
+ if (costRaw && typeof costRaw === "object") {
1506
+ const total = costRaw.total;
1507
+ if (typeof total === "number" && Number.isFinite(total)) {
1508
+ native_cost_usd = total;
1509
+ }
1510
+ }
1511
+ const model_id = typeof message.model === "string" ? message.model : "";
1512
+ return {
1513
+ model_id,
1514
+ input_tokens,
1515
+ output_tokens,
1516
+ cache_creation_tokens,
1517
+ cache_read_tokens,
1518
+ native_cost_usd,
1519
+ };
1520
+ }
1521
+ /**
1522
+ * openclaw `model.usage` diagnostic-event capture — parseOpenclawUsage.
1523
+ *
1524
+ * openclaw exposes a first-class `model.usage` diagnostic event
1525
+ * (`DiagnosticUsageEvent`, refs/platforms/openclaw/src/infra/diagnostic-events.ts:18-47),
1526
+ * emitted once per turn and consumed via `onDiagnosticEvent(listener)`
1527
+ * (diagnostic-events.ts:1156) — the same bus the first-party diagnostics-otel /
1528
+ * diagnostics-prometheus extensions read.
1529
+ *
1530
+ * Field mapping (openclaw → AgentUsageCounts):
1531
+ * evt.usage.input → input_tokens
1532
+ * evt.usage.output → output_tokens
1533
+ * evt.usage.cacheWrite→ cache_creation_tokens (cache-creation)
1534
+ * evt.usage.cacheRead → cache_read_tokens (cache-read)
1535
+ * evt.costUsd → native_cost_usd (pre-computed via estimateUsageCost,
1536
+ * agent-runner.ts:1995 — preferred over catalog)
1537
+ * evt.model → model_id
1538
+ *
1539
+ * CRITICAL: read `evt.usage` (the PER-TURN TOTAL — "Last Turn Total"
1540
+ * agent-runner.ts:943), NEVER `evt.lastCallUsage` (the last-model-call DELTA,
1541
+ * diagnostic-events.ts:34-40). Summing both would double-count.
1542
+ *
1543
+ * Returns AgentUsageCounts (the buildAgentUsageEvent input shape) or null when
1544
+ * the event is not a usage event / carries no usage / sums to zero. Pure,
1545
+ * null-safe, algorithmic — NO regex.
1546
+ */
1547
+ export function parseOpenclawUsage(payload) {
1548
+ if (!payload || typeof payload !== "object")
1549
+ return null;
1550
+ const evt = payload;
1551
+ // Only the `model.usage` diagnostic carries token usage. Tolerate an absent
1552
+ // type (defensive against a thinner payload variant) but reject any explicit
1553
+ // non-usage diagnostic (model.failover, log.record, …).
1554
+ if (typeof evt.type === "string" && evt.type !== "model.usage") {
1555
+ return null;
1556
+ }
1557
+ // PER-TURN TOTAL lives on `usage`. `lastCallUsage` is the last-call delta and
1558
+ // must NOT be consumed — reading it instead would understate (or, when summed
1559
+ // with usage, double-count) the turn.
1560
+ const usageRaw = evt.usage;
1561
+ if (!usageRaw || typeof usageRaw !== "object")
1562
+ return null;
1563
+ const usage = usageRaw;
1564
+ const num = (v) => typeof v === "number" && Number.isFinite(v) && v > 0 ? v : 0;
1565
+ const input_tokens = num(usage.input);
1566
+ const output_tokens = num(usage.output);
1567
+ const cache_creation_tokens = num(usage.cacheWrite);
1568
+ const cache_read_tokens = num(usage.cacheRead);
1569
+ // Zero-everything turn → null (mirrors buildAgentUsageEvent's contract; keeps
1570
+ // the DB free of no-op cost events).
1571
+ if (input_tokens <= 0 &&
1572
+ output_tokens <= 0 &&
1573
+ cache_creation_tokens <= 0 &&
1574
+ cache_read_tokens <= 0) {
1575
+ return null;
1576
+ }
1577
+ // openclaw ships a pre-computed USD cost at the TOP LEVEL (`evt.costUsd`, not
1578
+ // nested under usage). Preserve it only when finite; omit (null) on absence so
1579
+ // the builder falls back to the pricing catalog.
1580
+ const costRaw = evt.costUsd;
1581
+ const native_cost_usd = typeof costRaw === "number" && Number.isFinite(costRaw) ? costRaw : null;
1582
+ const model_id = typeof evt.model === "string" ? evt.model : "";
1583
+ return {
1584
+ model_id,
1585
+ input_tokens,
1586
+ output_tokens,
1587
+ cache_creation_tokens,
1588
+ cache_read_tokens,
1589
+ native_cost_usd,
1590
+ };
1591
+ }
1592
+ /**
1593
+ * opencode per-turn usage parser.
1594
+ *
1595
+ * Ground truth: context-mode-platform/docs/prds/2026-06-paid-observability/
1596
+ * adapter-matrix/opencode.md. opencode tracks usage per *assistant message*; the
1597
+ * usage-bearing payload reaches a plugin via the `message.updated` bus event,
1598
+ * whose `event.properties.info` is the full Message. The assistant token shape
1599
+ * (refs platforms/opencode .../session/message.ts) is:
1600
+ * info.tokens = { input, output, reasoning, cache: { read, write } }
1601
+ * info.cost = USD cost for this message
1602
+ * info.modelID / info.providerID (older refs may expose a single info.model)
1603
+ *
1604
+ * Field mapping (refs message.ts):
1605
+ * tokens.input -> input_tokens
1606
+ * tokens.output -> output_tokens
1607
+ * tokens.cache.read -> cache_read_tokens
1608
+ * tokens.cache.write -> cache_creation_tokens
1609
+ * modelID/providerID -> model_id (`${providerID}/${modelID}` when both present)
1610
+ * cost -> native_cost_usd
1611
+ *
1612
+ * LAST-STEP-SNAPSHOT CAVEAT (refs processor.ts:717-718): message-level
1613
+ * `.tokens` is OVERWRITTEN every step-finish, so it holds the LAST step's usage
1614
+ * — not the turn total. `.cost`, however, ACCUMULATES (`cost += usage.cost`) and
1615
+ * is the correct cumulative turn cost. We therefore pass `info.cost` through as
1616
+ * native_cost_usd so the billed $ is exact even though the token snapshot is
1617
+ * imprecise; the token columns remain best-effort (last-step) telemetry. A true
1618
+ * turn-total token sum would require summing per-step Step.Ended parts, which the
1619
+ * `message.updated` payload does not carry — out of scope for this snapshot-based
1620
+ * capture.
1621
+ *
1622
+ * Accepts either the bus event (`{ properties: { info } }`), the wrapped
1623
+ * `{ event: { properties: { info } } }`, or the bare Message (`info`) so the
1624
+ * caller can hand us whatever the SDK surfaces. NO regex — pure algorithmic,
1625
+ * null-safe traversal. Returns null when the payload is not an assistant
1626
+ * message, carries no tokens object, or every token bucket is zero/absent
1627
+ * (mirrors buildAgentUsageEvent's zero->null contract).
1628
+ */
1629
+ export function parseOpencodeUsage(payload) {
1630
+ if (!payload || typeof payload !== "object")
1631
+ return null;
1632
+ const root = payload;
1633
+ // Unwrap, most-specific first: { event: { properties: { info } } } →
1634
+ // { properties: { info } } → bare message. Each hop is guarded so a missing
1635
+ // layer simply falls through to treating the current object as the message.
1636
+ const eventLayer = root.event && typeof root.event === "object"
1637
+ ? root.event
1638
+ : root;
1639
+ const propsLayer = eventLayer.properties && typeof eventLayer.properties === "object"
1640
+ ? eventLayer.properties
1641
+ : eventLayer;
1642
+ const message = propsLayer.info && typeof propsLayer.info === "object"
1643
+ ? propsLayer.info
1644
+ : root;
1645
+ // Only assistant messages carry token usage. Tolerate a missing role but
1646
+ // reject an explicit non-assistant one.
1647
+ if (typeof message.role === "string" && message.role !== "assistant") {
1648
+ return null;
1649
+ }
1650
+ const tokensRaw = message.tokens;
1651
+ if (!tokensRaw || typeof tokensRaw !== "object")
1652
+ return null;
1653
+ const tokens = tokensRaw;
1654
+ const num = (v) => typeof v === "number" && Number.isFinite(v) && v > 0 ? v : 0;
1655
+ const cacheRaw = tokens.cache;
1656
+ const cache = cacheRaw && typeof cacheRaw === "object"
1657
+ ? cacheRaw
1658
+ : {};
1659
+ const input_tokens = num(tokens.input);
1660
+ const output_tokens = num(tokens.output);
1661
+ const cache_read_tokens = num(cache.read);
1662
+ const cache_creation_tokens = num(cache.write);
1663
+ // Zero-everything turn → null (keeps the DB free of no-op cost events).
1664
+ if (input_tokens <= 0 &&
1665
+ output_tokens <= 0 &&
1666
+ cache_creation_tokens <= 0 &&
1667
+ cache_read_tokens <= 0) {
1668
+ return null;
1669
+ }
1670
+ // Native cumulative USD cost (preferred — exact, immune to the last-step
1671
+ // token-snapshot imprecision). Omit (null) on absence so the builder falls
1672
+ // back to the pricing catalog over the last-step token columns.
1673
+ const costRaw = message.cost;
1674
+ const native_cost_usd = typeof costRaw === "number" && Number.isFinite(costRaw) ? costRaw : null;
1675
+ // Billed model id. Prefer the `${providerID}/${modelID}` pair (how opencode
1676
+ // itself addresses the model); fall back to a bare modelID, then a single
1677
+ // `model` string (older refs shape). Empty when none present.
1678
+ const modelID = typeof message.modelID === "string" ? message.modelID : "";
1679
+ const providerID = typeof message.providerID === "string" ? message.providerID : "";
1680
+ let model_id = "";
1681
+ if (modelID.length > 0) {
1682
+ model_id = providerID.length > 0 ? `${providerID}/${modelID}` : modelID;
1683
+ }
1684
+ else if (typeof message.model === "string") {
1685
+ model_id = message.model;
1686
+ }
1687
+ return {
1688
+ model_id,
1689
+ input_tokens,
1690
+ output_tokens,
1691
+ cache_creation_tokens,
1692
+ cache_read_tokens,
1693
+ native_cost_usd,
1694
+ };
1695
+ }
1696
+ /**
1697
+ * Build a structured `agent_usage` event from summed per-model token counts.
1698
+ * Emits the colon-string `data` (human/debug + back-compat) AND the structured
1699
+ * top-level fields the forward envelope spreads to the platform. cost_usd via
1700
+ * the pricing catalog — omitted on a price miss. Returns null when every token
1701
+ * bucket is zero/absent (so an all-zero model emits no event).
1702
+ */
1703
+ export function buildAgentUsageEvent(counts) {
1704
+ const { model_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, native_cost_usd } = counts;
1705
+ if (input_tokens <= 0 && output_tokens <= 0 && cache_creation_tokens <= 0 && cache_read_tokens <= 0) {
1706
+ return null;
1707
+ }
1708
+ const parts = [`tokens_in:${input_tokens}`, `tokens_out:${output_tokens}`];
1709
+ if (cache_creation_tokens > 0)
1710
+ parts.push(`cache_create:${cache_creation_tokens}`);
1711
+ if (cache_read_tokens > 0)
1712
+ parts.push(`cache_read:${cache_read_tokens}`);
1713
+ const cost = (typeof native_cost_usd === "number" && Number.isFinite(native_cost_usd))
1714
+ ? native_cost_usd
1715
+ : computeTurnCostUsd(model_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens);
1716
+ if (cost !== null)
1717
+ parts.push(`cost_usd:${formatCostUsd(cost)}`);
1718
+ const event = {
1719
+ type: "agent_usage",
1720
+ category: "cost",
1721
+ data: safeString(parts.join(" ")),
1722
+ priority: 2,
1723
+ };
1724
+ if (model_id.length > 0)
1725
+ event.model_id = model_id;
1726
+ event.input_tokens = input_tokens;
1727
+ event.output_tokens = output_tokens;
1728
+ if (cache_read_tokens > 0)
1729
+ event.cache_read_tokens = cache_read_tokens;
1730
+ if (cache_creation_tokens > 0)
1731
+ event.cache_creation_tokens = cache_creation_tokens;
1732
+ if (cost !== null)
1733
+ event.cost_usd = cost;
1734
+ return event;
1735
+ }
1736
+ /**
1737
+ * gemini-cli AfterModel usage capture — parse ONE AfterModel hook payload into
1738
+ * a builder `agent_usage` event (or null). Pure, null-safe, struct-only — NO regex.
1739
+ *
1740
+ * Refs (docs/prds/2026-06-paid-observability/adapter-matrix/gemini-cli.md):
1741
+ * - AfterModel fires per model call inside the gemini-cli stream loop
1742
+ * (geminiChat.ts:1213); the hook input carries `llm_request` + `llm_response`
1743
+ * (hooks/types.ts:692-695).
1744
+ * - `llm_response.usageMetadata` exposes promptTokenCount / candidatesTokenCount
1745
+ * / totalTokenCount (hookTranslator.ts:60-64).
1746
+ * - model_id = `response.modelVersion || req.model` (loggingContentGenerator.ts:405,553).
1747
+ *
1748
+ * Mapping → builder shape:
1749
+ * promptTokenCount → input_tokens
1750
+ * candidatesTokenCount → output_tokens
1751
+ * thoughtsTokenCount → ADDED into output_tokens (Gemini bills reasoning as output)
1752
+ * cachedContentTokenCount → cache_read_tokens (when present)
1753
+ * model_id → response.modelVersion || llm_request.model
1754
+ *
1755
+ * CAVEAT — the DECOUPLED AfterModel payload (hookTranslator.ts:60-64) forwards
1756
+ * only prompt/candidates/total and DROPS cachedContentTokenCount +
1757
+ * thoughtsTokenCount. We map those two defensively WHEN PRESENT (richer payload
1758
+ * variant / future fix / OTel-fed input) but never depend on them — the common
1759
+ * case is input+output only. For full cached/thoughts fidelity the OTel
1760
+ * `api_response` exporter or the chat-recording JSON is the source of record.
1761
+ *
1762
+ * MULTI-CALL TURNS — one user turn that triggers tool calls spans MULTIPLE
1763
+ * model calls, each AfterModel cumulative within itself. This fn emits ONE
1764
+ * priced event PER AfterModel call (each call is one billed round-trip).
1765
+ * Per-userPromptId summation into a single per-turn total is DEFERRED — emitting
1766
+ * per-call never double-counts, since each call's usageMetadata is the
1767
+ * authoritative total for that call.
1768
+ */
1769
+ export function parseGeminiUsage(afterModelPayload) {
1770
+ if (!afterModelPayload || typeof afterModelPayload !== "object")
1771
+ return null;
1772
+ const payload = afterModelPayload;
1773
+ const resp = payload.llm_response;
1774
+ if (!resp || typeof resp !== "object")
1775
+ return null;
1776
+ const response = resp;
1777
+ const um = response.usageMetadata;
1778
+ if (!um || typeof um !== "object")
1779
+ return null;
1780
+ const usage = um;
1781
+ const num = (v) => (typeof v === "number" && Number.isFinite(v) ? v : 0);
1782
+ const input = num(usage.promptTokenCount);
1783
+ const candidates = num(usage.candidatesTokenCount);
1784
+ const thoughts = num(usage.thoughtsTokenCount);
1785
+ const cached = num(usage.cachedContentTokenCount);
1786
+ // Gemini bills reasoning (thoughts) as output tokens — fold into output.
1787
+ const output = candidates + thoughts;
1788
+ // model_id = response.modelVersion (server-confirmed) || llm_request.model.
1789
+ const req = payload.llm_request;
1790
+ const reqModel = req && typeof req === "object" && typeof req.model === "string"
1791
+ ? req.model
1792
+ : "";
1793
+ const modelVersion = typeof response.modelVersion === "string" ? response.modelVersion : "";
1794
+ const modelId = modelVersion.length > 0 ? modelVersion : reqModel;
1795
+ // gemini exposes no native cost — cost_usd is derived from the pricing catalog
1796
+ // inside buildAgentUsageEvent (native_cost_usd omitted). All-zero ⇒ null.
1797
+ return buildAgentUsageEvent({
1798
+ model_id: modelId,
1799
+ input_tokens: input,
1800
+ output_tokens: output,
1801
+ cache_creation_tokens: 0,
1802
+ cache_read_tokens: cached,
1803
+ });
1804
+ }
1805
+ /**
1806
+ * claude-code MAIN-turn usage capture — the dominant-spend path the Task
1807
+ * subagent capture (extractAgentUsage) misses. Parses the session transcript
1808
+ * JSONL char-algorithmically (NO regex): each `type:"assistant"` line carries
1809
+ * `message.usage` + `message.model`, and usage is a per-turn DELTA, so summing
1810
+ * the assistant turns per model = the exact billed total. `isSidechain:true`
1811
+ * lines are Task-subagent sidechains written to a SEPARATE transcript (refs:
1812
+ * sessionStorage.ts:1042) — excluding them keeps the main-turn sum from
1813
+ * double-counting the separate Task-subagent capture. Emits one structured
1814
+ * `agent_usage` event per distinct model.
1815
+ */
1816
+ export function extractTranscriptUsage(transcript) {
1817
+ if (typeof transcript !== "string" || transcript.length === 0)
1818
+ return [];
1819
+ const sums = new Map();
1820
+ let start = 0;
1821
+ for (let i = 0; i <= transcript.length; i++) {
1822
+ if (i !== transcript.length && transcript.charCodeAt(i) !== 10 /* \n */)
1823
+ continue;
1824
+ const line = transcript.slice(start, i).trim();
1825
+ start = i + 1;
1826
+ if (line.length === 0)
1827
+ continue;
1828
+ let obj;
1829
+ try {
1830
+ const p = JSON.parse(line);
1831
+ if (!p || typeof p !== "object")
1832
+ continue;
1833
+ obj = p;
1834
+ }
1835
+ catch {
1836
+ continue;
1837
+ }
1838
+ if (obj.type !== "assistant" || obj.isSidechain === true)
1839
+ continue;
1840
+ const msg = obj.message;
1841
+ if (!msg || typeof msg !== "object")
1842
+ continue;
1843
+ const m = msg;
1844
+ const model = typeof m.model === "string" ? m.model : "";
1845
+ if (model.length === 0)
1846
+ continue;
1847
+ const u = m.usage;
1848
+ if (!u || typeof u !== "object")
1849
+ continue;
1850
+ const usage = u;
1851
+ const cur = sums.get(model) ?? { input: 0, output: 0, cacheCreate: 0, cacheRead: 0 };
1852
+ if (typeof usage.input_tokens === "number")
1853
+ cur.input += usage.input_tokens;
1854
+ if (typeof usage.output_tokens === "number")
1855
+ cur.output += usage.output_tokens;
1856
+ if (typeof usage.cache_creation_input_tokens === "number")
1857
+ cur.cacheCreate += usage.cache_creation_input_tokens;
1858
+ if (typeof usage.cache_read_input_tokens === "number")
1859
+ cur.cacheRead += usage.cache_read_input_tokens;
1860
+ sums.set(model, cur);
1861
+ }
1862
+ const events = [];
1863
+ for (const [model, s] of sums) {
1864
+ const ev = buildAgentUsageEvent({
1865
+ model_id: model,
1866
+ input_tokens: s.input,
1867
+ output_tokens: s.output,
1868
+ cache_creation_tokens: s.cacheCreate,
1869
+ cache_read_tokens: s.cacheRead,
1870
+ });
1871
+ if (ev)
1872
+ events.push(ev);
1873
+ }
1874
+ return events;
1875
+ }
1876
+ /**
1877
+ * Cursor-aware variant of extractTranscriptUsage for the Stop hook.
1878
+ *
1879
+ * The transcript grows every turn and the forward loop forwards ALL passed
1880
+ * events unconditionally, so re-running extractTranscriptUsage on the whole
1881
+ * transcript each Stop would double-count every prior turn. This walks only
1882
+ * the turns NEW since the last Stop, keyed by a per-session high-water cursor
1883
+ * (the `uuid` of the last assistant turn seen).
1884
+ *
1885
+ * - sinceUuid null/empty → process ALL non-sidechain assistant turns.
1886
+ * - sinceUuid found → process only turns AFTER it (exclusive).
1887
+ * - sinceUuid set but NOT found (transcript compaction dropped it) → process
1888
+ * ONLY THE LAST non-sidechain assistant turn. Bounded by design: we never
1889
+ * re-emit the whole history when the cursor falls off the front.
1890
+ *
1891
+ * `cursor` returns the uuid of the LAST non-sidechain assistant turn in the
1892
+ * transcript (whether or not it carried usage), so the next Stop resumes
1893
+ * exactly past it. When the transcript has no such turn, the input cursor is
1894
+ * returned unchanged. Same char-algorithmic JSONL parse (NO regex), same
1895
+ * sidechain exclusion, same buildAgentUsageEvent emission path.
1896
+ */
1897
+ export function extractTranscriptUsageSince(transcript, sinceUuid) {
1898
+ const inputCursor = typeof sinceUuid === "string" && sinceUuid.length > 0 ? sinceUuid : null;
1899
+ if (typeof transcript !== "string" || transcript.length === 0) {
1900
+ return { events: [], cursor: inputCursor };
1901
+ }
1902
+ const turns = [];
1903
+ let start = 0;
1904
+ for (let i = 0; i <= transcript.length; i++) {
1905
+ if (i !== transcript.length && transcript.charCodeAt(i) !== 10 /* \n */)
1906
+ continue;
1907
+ const line = transcript.slice(start, i).trim();
1908
+ start = i + 1;
1909
+ if (line.length === 0)
1910
+ continue;
1911
+ let obj;
1912
+ try {
1913
+ const p = JSON.parse(line);
1914
+ if (!p || typeof p !== "object")
1915
+ continue;
1916
+ obj = p;
1917
+ }
1918
+ catch {
1919
+ continue;
1920
+ }
1921
+ if (obj.type !== "assistant" || obj.isSidechain === true)
1922
+ continue;
1923
+ const msg = obj.message;
1924
+ if (!msg || typeof msg !== "object")
1925
+ continue;
1926
+ const m = msg;
1927
+ const model = typeof m.model === "string" ? m.model : "";
1928
+ if (model.length === 0)
1929
+ continue;
1930
+ const uuid = typeof obj.uuid === "string" && obj.uuid.length > 0 ? obj.uuid : null;
1931
+ const u = m.usage;
1932
+ const usage = u && typeof u === "object" ? u : {};
1933
+ turns.push({
1934
+ uuid,
1935
+ model,
1936
+ input: typeof usage.input_tokens === "number" ? usage.input_tokens : 0,
1937
+ output: typeof usage.output_tokens === "number" ? usage.output_tokens : 0,
1938
+ cacheCreate: typeof usage.cache_creation_input_tokens === "number" ? usage.cache_creation_input_tokens : 0,
1939
+ cacheRead: typeof usage.cache_read_input_tokens === "number" ? usage.cache_read_input_tokens : 0,
1940
+ });
1941
+ }
1942
+ // No assistant turns at all → nothing to emit, cursor unchanged.
1943
+ if (turns.length === 0)
1944
+ return { events: [], cursor: inputCursor };
1945
+ // Cursor always advances to the last assistant turn's uuid (or stays as the
1946
+ // input cursor if that last turn has no uuid).
1947
+ const lastUuid = turns[turns.length - 1].uuid;
1948
+ const cursor = lastUuid !== null ? lastUuid : inputCursor;
1949
+ // Select the slice to process.
1950
+ let slice;
1951
+ if (inputCursor === null) {
1952
+ slice = turns; // all turns
1953
+ }
1954
+ else {
1955
+ let foundAt = -1;
1956
+ for (let i = 0; i < turns.length; i++) {
1957
+ if (turns[i].uuid === inputCursor) {
1958
+ foundAt = i;
1959
+ break;
1960
+ }
1961
+ }
1962
+ if (foundAt >= 0) {
1963
+ slice = turns.slice(foundAt + 1); // strictly after the cursor
1964
+ }
1965
+ else {
1966
+ // Compaction: cursor fell off the front. Bounded fallback — last turn only.
1967
+ slice = turns.slice(turns.length - 1);
1968
+ }
1969
+ }
1970
+ // Sum the selected turns per model and emit via the shared event builder.
1971
+ const sums = new Map();
1972
+ for (const t of slice) {
1973
+ const cur = sums.get(t.model) ?? { input: 0, output: 0, cacheCreate: 0, cacheRead: 0 };
1974
+ cur.input += t.input;
1975
+ cur.output += t.output;
1976
+ cur.cacheCreate += t.cacheCreate;
1977
+ cur.cacheRead += t.cacheRead;
1978
+ sums.set(t.model, cur);
1979
+ }
1980
+ const events = [];
1981
+ for (const [model, s] of sums) {
1982
+ const ev = buildAgentUsageEvent({
1983
+ model_id: model,
1984
+ input_tokens: s.input,
1985
+ output_tokens: s.output,
1986
+ cache_creation_tokens: s.cacheCreate,
1987
+ cache_read_tokens: s.cacheRead,
1988
+ });
1989
+ if (ev)
1990
+ events.push(ev);
1991
+ }
1992
+ return { events, cursor };
1366
1993
  }
1367
1994
  // ── User-message extractors ────────────────────────────────────────────────
1368
1995
  /**