@pentatonic-ai/ai-agent-sdk 0.4.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +59 -0
  2. package/bin/cli.js +70 -9
  3. package/dist/index.cjs +25 -3
  4. package/dist/index.js +25 -3
  5. package/package.json +4 -2
  6. package/packages/doctor/README.md +106 -0
  7. package/packages/doctor/__tests__/checks.test.js +187 -0
  8. package/packages/doctor/__tests__/detect.test.js +101 -0
  9. package/packages/doctor/__tests__/output.test.js +92 -0
  10. package/packages/doctor/__tests__/plugins.test.js +111 -0
  11. package/packages/doctor/__tests__/runner.test.js +131 -0
  12. package/packages/doctor/package.json +6 -0
  13. package/packages/doctor/src/checks/hosted-tes.js +109 -0
  14. package/packages/doctor/src/checks/local-memory.js +290 -0
  15. package/packages/doctor/src/checks/platform.js +170 -0
  16. package/packages/doctor/src/checks/universal.js +121 -0
  17. package/packages/doctor/src/detect.js +102 -0
  18. package/packages/doctor/src/index.js +33 -0
  19. package/packages/doctor/src/output.js +55 -0
  20. package/packages/doctor/src/plugins.js +81 -0
  21. package/packages/doctor/src/runner.js +136 -0
  22. package/packages/memory/migrations/005-atomic-memories.sql +16 -0
  23. package/packages/memory/migrations/006-fix-vector-dim.sql +97 -0
  24. package/packages/memory/openclaw-plugin/__tests__/chat-turn.test.js +208 -0
  25. package/packages/memory/openclaw-plugin/__tests__/indicator.test.js +142 -0
  26. package/packages/memory/openclaw-plugin/__tests__/version-check.test.js +136 -0
  27. package/packages/memory/openclaw-plugin/index.js +369 -58
  28. package/packages/memory/openclaw-plugin/openclaw.plugin.json +11 -1
  29. package/packages/memory/openclaw-plugin/package.json +1 -1
  30. package/packages/memory/src/__tests__/distill.test.js +175 -0
  31. package/packages/memory/src/__tests__/openclaw-chat-turn.test.js +289 -0
  32. package/packages/memory/src/distill.js +162 -0
  33. package/packages/memory/src/index.js +1 -0
  34. package/packages/memory/src/ingest.js +10 -0
  35. package/packages/memory/src/openclaw/index.js +280 -23
  36. package/packages/memory/src/openclaw/package.json +1 -1
  37. package/packages/memory/src/server.js +59 -5
  38. package/src/normalizer.js +16 -0
  39. package/src/session.js +21 -2
@@ -117,6 +117,63 @@ async function hostedSearch(config, query, limit = 5, minScore = 0.3) {
117
117
  }
118
118
  }
119
119
 
120
+ /**
121
+ * Emit a CHAT_TURN event to TES so the conversation-analytics dashboard
122
+ * (Token Universe + Tools tabs) can render. Without this, the dashboard
123
+ * filters on eventType=CHAT_TURN and shows nothing for OpenClaw users
124
+ * because the only events emitted are STORE_MEMORY.
125
+ *
126
+ * Anything missing from the message metadata is omitted rather than
127
+ * defaulted to zero — that way the dashboard can distinguish "no data"
128
+ * from "zero usage".
129
+ */
130
+ async function hostedEmitChatTurn(config, sessionId, turn) {
131
+ const attributes = {
132
+ source: "openclaw-plugin",
133
+ user_message: turn.userMessage,
134
+ assistant_response: turn.assistantResponse,
135
+ };
136
+ if (turn.model) attributes.model = turn.model;
137
+ if (turn.usage) attributes.usage = turn.usage;
138
+ if (turn.toolCalls?.length) attributes.tool_calls = turn.toolCalls;
139
+ if (turn.turnNumber !== undefined) attributes.turn_number = turn.turnNumber;
140
+ if (turn.systemPrompt) attributes.system_prompt = turn.systemPrompt;
141
+
142
+ try {
143
+ const response = await fetch(`${config.tes_endpoint}/api/graphql`, {
144
+ method: "POST",
145
+ headers: tesHeaders(config),
146
+ // Route through createModuleEvent on the conversation-analytics
147
+ // module rather than the top-level emitEvent. The latter requires
148
+ // a permission most client API keys don't have ("Access denied:
149
+ // You don't have permission to update emitEvent"), but the
150
+ // module's manifest declares CHAT_TURN as a registered event
151
+ // type, so the module-scoped path is both authorised and
152
+ // consistent with how STORE_MEMORY is emitted.
153
+ body: JSON.stringify({
154
+ query: `mutation Cme($moduleId: String!, $input: ModuleEventInput!) {
155
+ createModuleEvent(moduleId: $moduleId, input: $input) { success eventId }
156
+ }`,
157
+ variables: {
158
+ moduleId: "conversation-analytics",
159
+ input: {
160
+ eventType: "CHAT_TURN",
161
+ data: {
162
+ entity_id: sessionId,
163
+ attributes,
164
+ },
165
+ },
166
+ },
167
+ }),
168
+ signal: AbortSignal.timeout(10000),
169
+ });
170
+ if (!response.ok) return null;
171
+ return response.json();
172
+ } catch {
173
+ return null;
174
+ }
175
+ }
176
+
120
177
  async function hostedStore(config, content, metadata = {}) {
121
178
  try {
122
179
  const response = await fetch(`${config.tes_endpoint}/api/graphql`, {
@@ -152,6 +209,186 @@ async function hostedStore(config, content, metadata = {}) {
152
209
 
153
210
  // --- Hosted context engine ---
154
211
 
212
+ // Per-session turn buffer. Holds the user message until the matching
213
+ // assistant response arrives, at which point we emit a CHAT_TURN.
214
+ // Turn counter is kept in a separate map so it survives buffer clears
215
+ // between turns. Module-scoped (rather than per-engine) so multiple
216
+ // engine instances don't double-buffer the same session.
217
+ //
218
+ // Simple LRU cap to avoid unbounded growth in long-running processes
219
+ // with many sessions (each entry is small, 500 sessions ≈ <50KB, but
220
+ // the cap exists to enforce an upper bound).
221
+ const MAX_SESSIONS = 500;
222
+ const turnBuffers = new Map(); // sessionId → { userMessage }
223
+ const turnCounters = new Map(); // sessionId → highest turn_number emitted
224
+
225
+ function capSessionMaps() {
226
+ while (turnBuffers.size > MAX_SESSIONS) {
227
+ turnBuffers.delete(turnBuffers.keys().next().value);
228
+ }
229
+ while (turnCounters.size > MAX_SESSIONS) {
230
+ turnCounters.delete(turnCounters.keys().next().value);
231
+ }
232
+ }
233
+
234
+ function _resetTurnBuffersForTest() {
235
+ turnBuffers.clear();
236
+ turnCounters.clear();
237
+ }
238
+ export { _resetTurnBuffersForTest };
239
+
240
+ // Extract text from a message content field. OpenClaw may pass content
241
+ // either as a plain string or as an array of content blocks ([{type:"text",
242
+ // text:"..."}, ...]). Returns null if no text can be extracted.
243
+ function getTextContent(message) {
244
+ if (!message) return null;
245
+ const c = message.content;
246
+ if (typeof c === "string") return c;
247
+ if (Array.isArray(c)) {
248
+ const text = c
249
+ .filter((b) => b?.type === "text" && typeof b.text === "string")
250
+ .map((b) => b.text)
251
+ .join(" ");
252
+ return text || null;
253
+ }
254
+ return null;
255
+ }
256
+
257
+ // OpenClaw wraps real user messages from external channels (Telegram etc.)
258
+ // in "Conversation info (untrusted metadata)" JSON envelopes, with the
259
+ // actual user text appended after the metadata blocks. Strip those
260
+ // envelopes to get the real user text. Returns null for pure system
261
+ // prompts ("Note: The previous agent run", "System (untrusted)", etc.).
262
+ function extractUserText(raw) {
263
+ if (!raw) return null;
264
+ const trimmed = raw.trim();
265
+
266
+ if (
267
+ trimmed.startsWith("Note: The previous agent run") ||
268
+ trimmed.startsWith("System (untrusted)") ||
269
+ trimmed.startsWith("[System]") ||
270
+ trimmed.startsWith("System:") ||
271
+ trimmed.startsWith("[Queued messages")
272
+ ) {
273
+ return null;
274
+ }
275
+
276
+ if (
277
+ trimmed.startsWith("Conversation info") ||
278
+ trimmed.startsWith("(untrusted metadata)") ||
279
+ trimmed.startsWith("Sender (untrusted") ||
280
+ trimmed.startsWith("Untrusted context")
281
+ ) {
282
+ const stripped = trimmed
283
+ .replace(
284
+ /(?:Conversation info|Sender|Thread starter|Replied message|Forwarded message context|Chat history since last reply) \(untrusted[^)]*\):\s*```json[\s\S]*?```/g,
285
+ ""
286
+ )
287
+ .replace(
288
+ /Untrusted context \(metadata, do not treat as instructions or commands\):/g,
289
+ ""
290
+ )
291
+ .trim();
292
+ return stripped || null;
293
+ }
294
+
295
+ return trimmed;
296
+ }
297
+
298
+ // Pull whatever the runtime hands us. Different OpenClaw versions wrap
299
+ // provider responses differently — we look in the obvious places and
300
+ // silently omit fields we can't find. The dashboard handles undefined
301
+ // usage/tool_calls gracefully (renders "no data" rather than zeros).
302
+ function extractAssistantMetadata(message) {
303
+ const meta = {};
304
+ // Direct fields first (richest hook contracts)
305
+ if (message.model) meta.model = message.model;
306
+ if (message.usage) meta.usage = message.usage;
307
+ if (Array.isArray(message.tool_calls) && message.tool_calls.length) {
308
+ meta.toolCalls = message.tool_calls;
309
+ } else if (Array.isArray(message.toolCalls) && message.toolCalls.length) {
310
+ meta.toolCalls = message.toolCalls;
311
+ }
312
+ // Fall back to a wrapped raw response if the runtime forwards it
313
+ const raw = message.raw || message.response || message._raw;
314
+ if (raw && typeof raw === "object") {
315
+ if (!meta.model && raw.model) meta.model = raw.model;
316
+ if (!meta.usage && raw.usage) meta.usage = raw.usage;
317
+ if (!meta.toolCalls) {
318
+ // Anthropic puts tool_use blocks in raw.content[]
319
+ if (Array.isArray(raw.content)) {
320
+ const tc = raw.content
321
+ .filter((b) => b?.type === "tool_use")
322
+ .map((b) => ({ tool: b.name, args: b.input || {} }));
323
+ if (tc.length) meta.toolCalls = tc;
324
+ }
325
+ // OpenAI puts tool_calls inside choices[0].message
326
+ if (
327
+ !meta.toolCalls &&
328
+ Array.isArray(raw.choices) &&
329
+ raw.choices[0]?.message?.tool_calls
330
+ ) {
331
+ meta.toolCalls = raw.choices[0].message.tool_calls.map((tc) => ({
332
+ tool: tc.function?.name || tc.name,
333
+ args: tc.function?.arguments,
334
+ }));
335
+ }
336
+ }
337
+ }
338
+ return meta;
339
+ }
340
+
341
+ // Process a single message: emit STORE_MEMORY for retrieval, and buffer
342
+ // for CHAT_TURN emission on the next assistant message. Shared between
343
+ // the `ingest` and `afterTurn` hooks so we behave consistently whichever
344
+ // one the OpenClaw runtime invokes.
345
+ async function handleHostedMessage(config, sessionId, message, log) {
346
+ const role = message?.role || message?.type;
347
+ if (role !== "user" && role !== "assistant") return;
348
+
349
+ const raw = getTextContent(message);
350
+ if (!raw) return;
351
+
352
+ // For user messages, strip OpenClaw's metadata envelope so we store
353
+ // and emit the real user text, not the JSON wrapper.
354
+ const text = role === "user" ? extractUserText(raw) : raw;
355
+ if (!text) return;
356
+
357
+ // STORE_MEMORY for retrieval.
358
+ try {
359
+ await hostedStore(config, text, { session_id: sessionId, role });
360
+ } catch (err) {
361
+ log(`[memory] Hosted store failed: ${err.message}`);
362
+ }
363
+
364
+ // CHAT_TURN buffering: pair each user message with the next assistant
365
+ // message in the same session and emit on the assistant turn.
366
+ try {
367
+ if (role === "user") {
368
+ turnBuffers.set(sessionId, { userMessage: text });
369
+ capSessionMaps();
370
+ } else if (role === "assistant") {
371
+ const buf = turnBuffers.get(sessionId);
372
+ const turnNumber = (turnCounters.get(sessionId) || 0) + 1;
373
+ turnCounters.set(sessionId, turnNumber);
374
+ capSessionMaps();
375
+ const meta = extractAssistantMetadata(message);
376
+ await hostedEmitChatTurn(config, sessionId, {
377
+ userMessage: buf?.userMessage,
378
+ assistantResponse: text,
379
+ turnNumber,
380
+ ...meta,
381
+ });
382
+ turnBuffers.delete(sessionId);
383
+ log(
384
+ `[memory] Emitted CHAT_TURN${meta.usage ? " w/ usage" : ""}${meta.toolCalls?.length ? ` w/ ${meta.toolCalls.length} tool_calls` : ""}`
385
+ );
386
+ }
387
+ } catch (err) {
388
+ log(`[memory] CHAT_TURN emit failed: ${err.message}`);
389
+ }
390
+ }
391
+
155
392
  function createHostedContextEngine(config, opts = {}) {
156
393
  const searchLimit = opts.searchLimit || 5;
157
394
  const minScore = opts.minScore || 0.3;
@@ -164,37 +401,34 @@ function createHostedContextEngine(config, opts = {}) {
164
401
  ownsCompaction: false,
165
402
  },
166
403
 
404
+ // Called by older OpenClaw runtimes that don't use afterTurn.
405
+ // Falls through to the shared handler so behaviour is identical.
167
406
  async ingest({ sessionId, message }) {
168
- if (!message?.content) return { ingested: false };
169
- const role = message.role || message.type;
170
- if (role !== "user" && role !== "assistant") return { ingested: false };
171
-
172
- try {
173
- await hostedStore(config, message.content, {
174
- session_id: sessionId,
175
- role,
176
- });
177
- log(`[memory] Ingested ${role} message via TES`);
178
- return { ingested: true };
179
- } catch (err) {
180
- log(`[memory] Hosted ingest failed: ${err.message}`);
181
- return { ingested: false };
182
- }
407
+ await handleHostedMessage(config, sessionId, message, log);
408
+ return { ingested: true };
183
409
  },
184
410
 
185
411
  async assemble({ sessionId, messages }) {
186
- const lastUserMsg = [...messages]
187
- .reverse()
188
- .find((m) => m.role === "user" || m.type === "user");
189
-
190
- if (!lastUserMsg?.content) {
412
+ // Find the most recent real user message. Skip OpenClaw's internal
413
+ // metadata prompts (extractUserText returns null for those).
414
+ let lastUserText = null;
415
+ for (const m of [...messages].reverse()) {
416
+ if (m.role !== "user" && m.type !== "user") continue;
417
+ const raw = getTextContent(m);
418
+ const extracted = extractUserText(raw);
419
+ if (extracted) {
420
+ lastUserText = extracted;
421
+ break;
422
+ }
423
+ }
424
+ if (!lastUserText) {
191
425
  return { messages, estimatedTokens: 0 };
192
426
  }
193
427
 
194
428
  try {
195
429
  const results = await hostedSearch(
196
430
  config,
197
- lastUserMsg.content,
431
+ lastUserText,
198
432
  searchLimit,
199
433
  minScore
200
434
  );
@@ -210,7 +444,21 @@ function createHostedContextEngine(config, opts = {}) {
210
444
  )
211
445
  .join("\n");
212
446
 
213
- const addition = `[Memory] Relevant context from past conversations:\n${memoryText}`;
447
+ // Visibility footer instruct the model to surface that memory was
448
+ // used. Opt out via show_memory_indicator: false in plugin config.
449
+ const showIndicator = config.show_memory_indicator !== false;
450
+ const indicatorRule = showIndicator
451
+ ? [
452
+ "",
453
+ `After your reply, on a new line, append exactly this footer (no other prefix, no trailing content):`,
454
+ `—`,
455
+ `🧠 _Used ${results.length} memor${results.length === 1 ? "y" : "ies"} from Pentatonic Memory_`,
456
+ "",
457
+ `If the memories above were not relevant to your reply, omit the footer.`,
458
+ ].join("\n")
459
+ : "";
460
+
461
+ const addition = `[Memory] Relevant context from past conversations:\n${memoryText}${indicatorRule}`;
214
462
 
215
463
  log(`[memory] Assembled ${results.length} memories via TES`);
216
464
 
@@ -229,7 +477,16 @@ function createHostedContextEngine(config, opts = {}) {
229
477
  return { ok: true, compacted: false };
230
478
  },
231
479
 
232
- async afterTurn() {},
480
+ // Newer OpenClaw runtimes call afterTurn instead of ingest. We slice
481
+ // messages added during this turn (user+assistant) and hand each one
482
+ // to the shared handler — same STORE_MEMORY + CHAT_TURN flow.
483
+ async afterTurn({ sessionId, messages, prePromptMessageCount }) {
484
+ if (!messages || typeof prePromptMessageCount !== "number") return;
485
+ const newMessages = messages.slice(prePromptMessageCount);
486
+ for (const message of newMessages) {
487
+ await handleHostedMessage(config, sessionId, message, log);
488
+ }
489
+ },
233
490
  };
234
491
  }
235
492
 
@@ -1,5 +1,5 @@
1
1
  {
2
- "name": "@pentatonic-ai/openclaw-memory-plugin",
2
+ "name": "@pentatonic-ai/openclaw-memory-plugin-internal",
3
3
  "version": "0.4.0",
4
4
  "type": "module",
5
5
  "openclaw": {
@@ -82,8 +82,61 @@ async function main() {
82
82
 
83
83
  const memory = createMemory();
84
84
 
85
+ // Enable pgvector before migrations (so migration 002 can create the vector column)
86
+ const setupPool = new Pool({ connectionString: process.env.DATABASE_URL });
87
+ try {
88
+ await setupPool.query("CREATE EXTENSION IF NOT EXISTS vector");
89
+ process.stderr.write("[memory-server] pgvector extension enabled\n");
90
+ } catch (err) {
91
+ process.stderr.write(`[memory-server] pgvector not available: ${err.message}\n`);
92
+ }
93
+
85
94
  // Run migrations on startup
86
95
  await memory.migrate();
96
+
97
+ // Fix: if migration 002 ran without pgvector, the vector column is missing.
98
+ // Re-apply it now that the extension is enabled.
99
+ try {
100
+ const colCheck = await setupPool.query(
101
+ `SELECT 1 FROM information_schema.columns
102
+ WHERE table_name = 'memory_nodes' AND column_name = 'embedding_vec' LIMIT 1`
103
+ );
104
+ if (colCheck.rows.length === 0) {
105
+ process.stderr.write("[memory-server] embedding_vec column missing — re-applying migration 002\n");
106
+ const { readFileSync } = await import("fs");
107
+ const { resolve, dirname } = await import("path");
108
+ const { fileURLToPath } = await import("url");
109
+ const migrationPath = resolve(dirname(fileURLToPath(import.meta.url)), "../migrations/002-vector-index.sql");
110
+ const sql = readFileSync(migrationPath, "utf-8");
111
+ await setupPool.query(sql);
112
+ process.stderr.write("[memory-server] embedding_vec column created\n");
113
+ }
114
+
115
+ // Re-run 006 if there are JSONB embeddings but no populated vectors —
116
+ // catches the case where 006 ran on a fresh DB before any data existed,
117
+ // then a subsequent insert was silently dimension-mismatched.
118
+ const mismatchCheck = await setupPool.query(
119
+ `SELECT
120
+ EXISTS (SELECT 1 FROM memory_nodes WHERE embedding IS NOT NULL) AS has_jsonb,
121
+ EXISTS (SELECT 1 FROM memory_nodes WHERE embedding_vec IS NOT NULL) AS has_vec
122
+ FROM memory_nodes LIMIT 1`
123
+ );
124
+ const row = mismatchCheck.rows[0] || {};
125
+ if (row.has_jsonb && !row.has_vec) {
126
+ process.stderr.write("[memory-server] JSONB embeddings present but no vectors — re-running migration 006\n");
127
+ const { readFileSync } = await import("fs");
128
+ const { resolve, dirname } = await import("path");
129
+ const { fileURLToPath } = await import("url");
130
+ const migrationPath = resolve(dirname(fileURLToPath(import.meta.url)), "../migrations/006-fix-vector-dim.sql");
131
+ const sql = readFileSync(migrationPath, "utf-8");
132
+ await setupPool.query(sql);
133
+ process.stderr.write("[memory-server] embedding_vec repair complete\n");
134
+ }
135
+ } catch (err) {
136
+ process.stderr.write(`[memory-server] Vector column repair skipped: ${err.message}\n`);
137
+ }
138
+ await setupPool.end();
139
+
87
140
  await memory.ensureLayers(CLIENT_ID);
88
141
 
89
142
  const server = new McpServer({
@@ -258,10 +311,11 @@ async function main() {
258
311
 
259
312
  if (url.pathname === "/search" && req.method === "POST") {
260
313
  try {
261
- // Use text search by default (fast, no external dependencies).
262
- // Vector search available via ?mode=vector if embeddings are working.
263
- const useVector = url.searchParams.get("mode") === "vector";
264
- const searchFn = useVector ? memory.search : memory.textSearch;
314
+ // Try vector search first (embeddings + BM25 + recency + frequency).
315
+ // Falls back to text-only search internally if embeddings fail.
316
+ // Use ?mode=text to force text-only search.
317
+ const textOnly = url.searchParams.get("mode") === "text";
318
+ const searchFn = textOnly ? memory.textSearch : memory.search;
265
319
  const results = await searchFn(body.query || "", {
266
320
  clientId: CLIENT_ID,
267
321
  limit: body.limit || 5,
@@ -288,7 +342,7 @@ async function main() {
288
342
  const health = {
289
343
  status: "ok",
290
344
  client: CLIENT_ID,
291
- version: "0.4.7",
345
+ version: "0.5.0",
292
346
  search: "text",
293
347
  db: false,
294
348
  ollama: false,
package/src/normalizer.js CHANGED
@@ -34,6 +34,21 @@ function empty() {
34
34
  };
35
35
  }
36
36
 
37
+ // Anthropic-only. The conversation-analytics Token Universe tab stacks
38
+ // cache_read / cache_create alongside input / output, so we pass them
39
+ // through whenever the provider supplies them. Other providers omit
40
+ // these keys silently.
41
+ function extractCacheUsage(usage) {
42
+ const out = {};
43
+ if (typeof usage.cache_read_input_tokens === "number") {
44
+ out.cache_read_input_tokens = usage.cache_read_input_tokens;
45
+ }
46
+ if (typeof usage.cache_creation_input_tokens === "number") {
47
+ out.cache_creation_input_tokens = usage.cache_creation_input_tokens;
48
+ }
49
+ return out;
50
+ }
51
+
37
52
  function normalizeOpenAI(raw) {
38
53
  const message = raw.choices?.[0]?.message || {};
39
54
  const usage = raw.usage || {};
@@ -76,6 +91,7 @@ function normalizeAnthropic(raw) {
76
91
  usage: {
77
92
  prompt_tokens: usage.input_tokens || 0,
78
93
  completion_tokens: usage.output_tokens || 0,
94
+ ...extractCacheUsage(usage),
79
95
  },
80
96
  toolCalls,
81
97
  };
package/src/session.js CHANGED
@@ -22,6 +22,8 @@ export class Session {
22
22
  _reset() {
23
23
  this._promptTokens = 0;
24
24
  this._completionTokens = 0;
25
+ this._cacheReadTokens = 0;
26
+ this._cacheCreateTokens = 0;
25
27
  this._rounds = 0;
26
28
  this._toolCalls = [];
27
29
  this._model = null;
@@ -29,12 +31,27 @@ export class Session {
29
31
  }
30
32
 
31
33
  get totalUsage() {
32
- return {
34
+ const usage = {
33
35
  prompt_tokens: this._promptTokens,
34
36
  completion_tokens: this._completionTokens,
35
- total_tokens: this._promptTokens + this._completionTokens,
37
+ total_tokens:
38
+ this._promptTokens +
39
+ this._completionTokens +
40
+ this._cacheReadTokens +
41
+ this._cacheCreateTokens,
36
42
  ai_rounds: this._rounds,
37
43
  };
44
+ // Cache token passthrough (Anthropic only). Added only when non-zero
45
+ // so the legacy { prompt_tokens, completion_tokens, total_tokens,
46
+ // ai_rounds } shape is preserved when no cache is in play. The
47
+ // conversation-analytics Token Universe tab reads these directly.
48
+ if (this._cacheReadTokens) {
49
+ usage.cache_read_input_tokens = this._cacheReadTokens;
50
+ }
51
+ if (this._cacheCreateTokens) {
52
+ usage.cache_creation_input_tokens = this._cacheCreateTokens;
53
+ }
54
+ return usage;
38
55
  }
39
56
 
40
57
  get toolCalls() {
@@ -47,6 +64,8 @@ export class Session {
47
64
 
48
65
  this._promptTokens += normalized.usage.prompt_tokens;
49
66
  this._completionTokens += normalized.usage.completion_tokens;
67
+ this._cacheReadTokens += normalized.usage.cache_read_input_tokens || 0;
68
+ this._cacheCreateTokens += normalized.usage.cache_creation_input_tokens || 0;
50
69
  this._rounds += 1;
51
70
 
52
71
  if (normalized.model) {