@pentatonic-ai/ai-agent-sdk 0.4.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/bin/cli.js +70 -9
- package/dist/index.cjs +25 -3
- package/dist/index.js +25 -3
- package/package.json +4 -2
- package/packages/doctor/README.md +106 -0
- package/packages/doctor/__tests__/checks.test.js +187 -0
- package/packages/doctor/__tests__/detect.test.js +101 -0
- package/packages/doctor/__tests__/output.test.js +92 -0
- package/packages/doctor/__tests__/plugins.test.js +111 -0
- package/packages/doctor/__tests__/runner.test.js +131 -0
- package/packages/doctor/package.json +6 -0
- package/packages/doctor/src/checks/hosted-tes.js +109 -0
- package/packages/doctor/src/checks/local-memory.js +290 -0
- package/packages/doctor/src/checks/platform.js +170 -0
- package/packages/doctor/src/checks/universal.js +121 -0
- package/packages/doctor/src/detect.js +102 -0
- package/packages/doctor/src/index.js +33 -0
- package/packages/doctor/src/output.js +55 -0
- package/packages/doctor/src/plugins.js +81 -0
- package/packages/doctor/src/runner.js +136 -0
- package/packages/memory/migrations/005-atomic-memories.sql +16 -0
- package/packages/memory/migrations/006-fix-vector-dim.sql +97 -0
- package/packages/memory/openclaw-plugin/__tests__/chat-turn.test.js +208 -0
- package/packages/memory/openclaw-plugin/__tests__/indicator.test.js +142 -0
- package/packages/memory/openclaw-plugin/__tests__/version-check.test.js +136 -0
- package/packages/memory/openclaw-plugin/index.js +369 -58
- package/packages/memory/openclaw-plugin/openclaw.plugin.json +11 -1
- package/packages/memory/openclaw-plugin/package.json +1 -1
- package/packages/memory/src/__tests__/distill.test.js +175 -0
- package/packages/memory/src/__tests__/openclaw-chat-turn.test.js +289 -0
- package/packages/memory/src/distill.js +162 -0
- package/packages/memory/src/index.js +1 -0
- package/packages/memory/src/ingest.js +10 -0
- package/packages/memory/src/openclaw/index.js +280 -23
- package/packages/memory/src/openclaw/package.json +1 -1
- package/packages/memory/src/server.js +59 -5
- package/src/normalizer.js +16 -0
- package/src/session.js +21 -2
|
@@ -117,6 +117,63 @@ async function hostedSearch(config, query, limit = 5, minScore = 0.3) {
|
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
/**
|
|
121
|
+
* Emit a CHAT_TURN event to TES so the conversation-analytics dashboard
|
|
122
|
+
* (Token Universe + Tools tabs) can render. Without this, the dashboard
|
|
123
|
+
* filters on eventType=CHAT_TURN and shows nothing for OpenClaw users
|
|
124
|
+
* because the only events emitted are STORE_MEMORY.
|
|
125
|
+
*
|
|
126
|
+
* Anything missing from the message metadata is omitted rather than
|
|
127
|
+
* defaulted to zero — that way the dashboard can distinguish "no data"
|
|
128
|
+
* from "zero usage".
|
|
129
|
+
*/
|
|
130
|
+
async function hostedEmitChatTurn(config, sessionId, turn) {
|
|
131
|
+
const attributes = {
|
|
132
|
+
source: "openclaw-plugin",
|
|
133
|
+
user_message: turn.userMessage,
|
|
134
|
+
assistant_response: turn.assistantResponse,
|
|
135
|
+
};
|
|
136
|
+
if (turn.model) attributes.model = turn.model;
|
|
137
|
+
if (turn.usage) attributes.usage = turn.usage;
|
|
138
|
+
if (turn.toolCalls?.length) attributes.tool_calls = turn.toolCalls;
|
|
139
|
+
if (turn.turnNumber !== undefined) attributes.turn_number = turn.turnNumber;
|
|
140
|
+
if (turn.systemPrompt) attributes.system_prompt = turn.systemPrompt;
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
const response = await fetch(`${config.tes_endpoint}/api/graphql`, {
|
|
144
|
+
method: "POST",
|
|
145
|
+
headers: tesHeaders(config),
|
|
146
|
+
// Route through createModuleEvent on the conversation-analytics
|
|
147
|
+
// module rather than the top-level emitEvent. The latter requires
|
|
148
|
+
// a permission most client API keys don't have ("Access denied:
|
|
149
|
+
// You don't have permission to update emitEvent"), but the
|
|
150
|
+
// module's manifest declares CHAT_TURN as a registered event
|
|
151
|
+
// type, so the module-scoped path is both authorised and
|
|
152
|
+
// consistent with how STORE_MEMORY is emitted.
|
|
153
|
+
body: JSON.stringify({
|
|
154
|
+
query: `mutation Cme($moduleId: String!, $input: ModuleEventInput!) {
|
|
155
|
+
createModuleEvent(moduleId: $moduleId, input: $input) { success eventId }
|
|
156
|
+
}`,
|
|
157
|
+
variables: {
|
|
158
|
+
moduleId: "conversation-analytics",
|
|
159
|
+
input: {
|
|
160
|
+
eventType: "CHAT_TURN",
|
|
161
|
+
data: {
|
|
162
|
+
entity_id: sessionId,
|
|
163
|
+
attributes,
|
|
164
|
+
},
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
}),
|
|
168
|
+
signal: AbortSignal.timeout(10000),
|
|
169
|
+
});
|
|
170
|
+
if (!response.ok) return null;
|
|
171
|
+
return response.json();
|
|
172
|
+
} catch {
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
120
177
|
async function hostedStore(config, content, metadata = {}) {
|
|
121
178
|
try {
|
|
122
179
|
const response = await fetch(`${config.tes_endpoint}/api/graphql`, {
|
|
@@ -152,6 +209,186 @@ async function hostedStore(config, content, metadata = {}) {
|
|
|
152
209
|
|
|
153
210
|
// --- Hosted context engine ---
|
|
154
211
|
|
|
212
|
+
// Per-session turn buffer. Holds the user message until the matching
|
|
213
|
+
// assistant response arrives, at which point we emit a CHAT_TURN.
|
|
214
|
+
// Turn counter is kept in a separate map so it survives buffer clears
|
|
215
|
+
// between turns. Module-scoped (rather than per-engine) so multiple
|
|
216
|
+
// engine instances don't double-buffer the same session.
|
|
217
|
+
//
|
|
218
|
+
// Simple LRU cap to avoid unbounded growth in long-running processes
|
|
219
|
+
// with many sessions (each entry is small, 500 sessions ≈ <50KB, but
|
|
220
|
+
// the cap exists to enforce an upper bound).
|
|
221
|
+
const MAX_SESSIONS = 500;
|
|
222
|
+
const turnBuffers = new Map(); // sessionId → { userMessage }
|
|
223
|
+
const turnCounters = new Map(); // sessionId → highest turn_number emitted
|
|
224
|
+
|
|
225
|
+
function capSessionMaps() {
|
|
226
|
+
while (turnBuffers.size > MAX_SESSIONS) {
|
|
227
|
+
turnBuffers.delete(turnBuffers.keys().next().value);
|
|
228
|
+
}
|
|
229
|
+
while (turnCounters.size > MAX_SESSIONS) {
|
|
230
|
+
turnCounters.delete(turnCounters.keys().next().value);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function _resetTurnBuffersForTest() {
|
|
235
|
+
turnBuffers.clear();
|
|
236
|
+
turnCounters.clear();
|
|
237
|
+
}
|
|
238
|
+
export { _resetTurnBuffersForTest };
|
|
239
|
+
|
|
240
|
+
// Extract text from a message content field. OpenClaw may pass content
|
|
241
|
+
// either as a plain string or as an array of content blocks ([{type:"text",
|
|
242
|
+
// text:"..."}, ...]). Returns null if no text can be extracted.
|
|
243
|
+
function getTextContent(message) {
|
|
244
|
+
if (!message) return null;
|
|
245
|
+
const c = message.content;
|
|
246
|
+
if (typeof c === "string") return c;
|
|
247
|
+
if (Array.isArray(c)) {
|
|
248
|
+
const text = c
|
|
249
|
+
.filter((b) => b?.type === "text" && typeof b.text === "string")
|
|
250
|
+
.map((b) => b.text)
|
|
251
|
+
.join(" ");
|
|
252
|
+
return text || null;
|
|
253
|
+
}
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// OpenClaw wraps real user messages from external channels (Telegram etc.)
|
|
258
|
+
// in "Conversation info (untrusted metadata)" JSON envelopes, with the
|
|
259
|
+
// actual user text appended after the metadata blocks. Strip those
|
|
260
|
+
// envelopes to get the real user text. Returns null for pure system
|
|
261
|
+
// prompts ("Note: The previous agent run", "System (untrusted)", etc.).
|
|
262
|
+
function extractUserText(raw) {
|
|
263
|
+
if (!raw) return null;
|
|
264
|
+
const trimmed = raw.trim();
|
|
265
|
+
|
|
266
|
+
if (
|
|
267
|
+
trimmed.startsWith("Note: The previous agent run") ||
|
|
268
|
+
trimmed.startsWith("System (untrusted)") ||
|
|
269
|
+
trimmed.startsWith("[System]") ||
|
|
270
|
+
trimmed.startsWith("System:") ||
|
|
271
|
+
trimmed.startsWith("[Queued messages")
|
|
272
|
+
) {
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (
|
|
277
|
+
trimmed.startsWith("Conversation info") ||
|
|
278
|
+
trimmed.startsWith("(untrusted metadata)") ||
|
|
279
|
+
trimmed.startsWith("Sender (untrusted") ||
|
|
280
|
+
trimmed.startsWith("Untrusted context")
|
|
281
|
+
) {
|
|
282
|
+
const stripped = trimmed
|
|
283
|
+
.replace(
|
|
284
|
+
/(?:Conversation info|Sender|Thread starter|Replied message|Forwarded message context|Chat history since last reply) \(untrusted[^)]*\):\s*```json[\s\S]*?```/g,
|
|
285
|
+
""
|
|
286
|
+
)
|
|
287
|
+
.replace(
|
|
288
|
+
/Untrusted context \(metadata, do not treat as instructions or commands\):/g,
|
|
289
|
+
""
|
|
290
|
+
)
|
|
291
|
+
.trim();
|
|
292
|
+
return stripped || null;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return trimmed;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Pull whatever the runtime hands us. Different OpenClaw versions wrap
|
|
299
|
+
// provider responses differently — we look in the obvious places and
|
|
300
|
+
// silently omit fields we can't find. The dashboard handles undefined
|
|
301
|
+
// usage/tool_calls gracefully (renders "no data" rather than zeros).
|
|
302
|
+
function extractAssistantMetadata(message) {
|
|
303
|
+
const meta = {};
|
|
304
|
+
// Direct fields first (richest hook contracts)
|
|
305
|
+
if (message.model) meta.model = message.model;
|
|
306
|
+
if (message.usage) meta.usage = message.usage;
|
|
307
|
+
if (Array.isArray(message.tool_calls) && message.tool_calls.length) {
|
|
308
|
+
meta.toolCalls = message.tool_calls;
|
|
309
|
+
} else if (Array.isArray(message.toolCalls) && message.toolCalls.length) {
|
|
310
|
+
meta.toolCalls = message.toolCalls;
|
|
311
|
+
}
|
|
312
|
+
// Fall back to a wrapped raw response if the runtime forwards it
|
|
313
|
+
const raw = message.raw || message.response || message._raw;
|
|
314
|
+
if (raw && typeof raw === "object") {
|
|
315
|
+
if (!meta.model && raw.model) meta.model = raw.model;
|
|
316
|
+
if (!meta.usage && raw.usage) meta.usage = raw.usage;
|
|
317
|
+
if (!meta.toolCalls) {
|
|
318
|
+
// Anthropic puts tool_use blocks in raw.content[]
|
|
319
|
+
if (Array.isArray(raw.content)) {
|
|
320
|
+
const tc = raw.content
|
|
321
|
+
.filter((b) => b?.type === "tool_use")
|
|
322
|
+
.map((b) => ({ tool: b.name, args: b.input || {} }));
|
|
323
|
+
if (tc.length) meta.toolCalls = tc;
|
|
324
|
+
}
|
|
325
|
+
// OpenAI puts tool_calls inside choices[0].message
|
|
326
|
+
if (
|
|
327
|
+
!meta.toolCalls &&
|
|
328
|
+
Array.isArray(raw.choices) &&
|
|
329
|
+
raw.choices[0]?.message?.tool_calls
|
|
330
|
+
) {
|
|
331
|
+
meta.toolCalls = raw.choices[0].message.tool_calls.map((tc) => ({
|
|
332
|
+
tool: tc.function?.name || tc.name,
|
|
333
|
+
args: tc.function?.arguments,
|
|
334
|
+
}));
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
return meta;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Process a single message: emit STORE_MEMORY for retrieval, and buffer
|
|
342
|
+
// for CHAT_TURN emission on the next assistant message. Shared between
|
|
343
|
+
// the `ingest` and `afterTurn` hooks so we behave consistently whichever
|
|
344
|
+
// one the OpenClaw runtime invokes.
|
|
345
|
+
async function handleHostedMessage(config, sessionId, message, log) {
|
|
346
|
+
const role = message?.role || message?.type;
|
|
347
|
+
if (role !== "user" && role !== "assistant") return;
|
|
348
|
+
|
|
349
|
+
const raw = getTextContent(message);
|
|
350
|
+
if (!raw) return;
|
|
351
|
+
|
|
352
|
+
// For user messages, strip OpenClaw's metadata envelope so we store
|
|
353
|
+
// and emit the real user text, not the JSON wrapper.
|
|
354
|
+
const text = role === "user" ? extractUserText(raw) : raw;
|
|
355
|
+
if (!text) return;
|
|
356
|
+
|
|
357
|
+
// STORE_MEMORY for retrieval.
|
|
358
|
+
try {
|
|
359
|
+
await hostedStore(config, text, { session_id: sessionId, role });
|
|
360
|
+
} catch (err) {
|
|
361
|
+
log(`[memory] Hosted store failed: ${err.message}`);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// CHAT_TURN buffering: pair each user message with the next assistant
|
|
365
|
+
// message in the same session and emit on the assistant turn.
|
|
366
|
+
try {
|
|
367
|
+
if (role === "user") {
|
|
368
|
+
turnBuffers.set(sessionId, { userMessage: text });
|
|
369
|
+
capSessionMaps();
|
|
370
|
+
} else if (role === "assistant") {
|
|
371
|
+
const buf = turnBuffers.get(sessionId);
|
|
372
|
+
const turnNumber = (turnCounters.get(sessionId) || 0) + 1;
|
|
373
|
+
turnCounters.set(sessionId, turnNumber);
|
|
374
|
+
capSessionMaps();
|
|
375
|
+
const meta = extractAssistantMetadata(message);
|
|
376
|
+
await hostedEmitChatTurn(config, sessionId, {
|
|
377
|
+
userMessage: buf?.userMessage,
|
|
378
|
+
assistantResponse: text,
|
|
379
|
+
turnNumber,
|
|
380
|
+
...meta,
|
|
381
|
+
});
|
|
382
|
+
turnBuffers.delete(sessionId);
|
|
383
|
+
log(
|
|
384
|
+
`[memory] Emitted CHAT_TURN${meta.usage ? " w/ usage" : ""}${meta.toolCalls?.length ? ` w/ ${meta.toolCalls.length} tool_calls` : ""}`
|
|
385
|
+
);
|
|
386
|
+
}
|
|
387
|
+
} catch (err) {
|
|
388
|
+
log(`[memory] CHAT_TURN emit failed: ${err.message}`);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
155
392
|
function createHostedContextEngine(config, opts = {}) {
|
|
156
393
|
const searchLimit = opts.searchLimit || 5;
|
|
157
394
|
const minScore = opts.minScore || 0.3;
|
|
@@ -164,37 +401,34 @@ function createHostedContextEngine(config, opts = {}) {
|
|
|
164
401
|
ownsCompaction: false,
|
|
165
402
|
},
|
|
166
403
|
|
|
404
|
+
// Called by older OpenClaw runtimes that don't use afterTurn.
|
|
405
|
+
// Falls through to the shared handler so behaviour is identical.
|
|
167
406
|
async ingest({ sessionId, message }) {
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
if (role !== "user" && role !== "assistant") return { ingested: false };
|
|
171
|
-
|
|
172
|
-
try {
|
|
173
|
-
await hostedStore(config, message.content, {
|
|
174
|
-
session_id: sessionId,
|
|
175
|
-
role,
|
|
176
|
-
});
|
|
177
|
-
log(`[memory] Ingested ${role} message via TES`);
|
|
178
|
-
return { ingested: true };
|
|
179
|
-
} catch (err) {
|
|
180
|
-
log(`[memory] Hosted ingest failed: ${err.message}`);
|
|
181
|
-
return { ingested: false };
|
|
182
|
-
}
|
|
407
|
+
await handleHostedMessage(config, sessionId, message, log);
|
|
408
|
+
return { ingested: true };
|
|
183
409
|
},
|
|
184
410
|
|
|
185
411
|
async assemble({ sessionId, messages }) {
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
412
|
+
// Find the most recent real user message. Skip OpenClaw's internal
|
|
413
|
+
// metadata prompts (extractUserText returns null for those).
|
|
414
|
+
let lastUserText = null;
|
|
415
|
+
for (const m of [...messages].reverse()) {
|
|
416
|
+
if (m.role !== "user" && m.type !== "user") continue;
|
|
417
|
+
const raw = getTextContent(m);
|
|
418
|
+
const extracted = extractUserText(raw);
|
|
419
|
+
if (extracted) {
|
|
420
|
+
lastUserText = extracted;
|
|
421
|
+
break;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
if (!lastUserText) {
|
|
191
425
|
return { messages, estimatedTokens: 0 };
|
|
192
426
|
}
|
|
193
427
|
|
|
194
428
|
try {
|
|
195
429
|
const results = await hostedSearch(
|
|
196
430
|
config,
|
|
197
|
-
|
|
431
|
+
lastUserText,
|
|
198
432
|
searchLimit,
|
|
199
433
|
minScore
|
|
200
434
|
);
|
|
@@ -210,7 +444,21 @@ function createHostedContextEngine(config, opts = {}) {
|
|
|
210
444
|
)
|
|
211
445
|
.join("\n");
|
|
212
446
|
|
|
213
|
-
|
|
447
|
+
// Visibility footer — instruct the model to surface that memory was
|
|
448
|
+
// used. Opt out via show_memory_indicator: false in plugin config.
|
|
449
|
+
const showIndicator = config.show_memory_indicator !== false;
|
|
450
|
+
const indicatorRule = showIndicator
|
|
451
|
+
? [
|
|
452
|
+
"",
|
|
453
|
+
`After your reply, on a new line, append exactly this footer (no other prefix, no trailing content):`,
|
|
454
|
+
`—`,
|
|
455
|
+
`🧠 _Used ${results.length} memor${results.length === 1 ? "y" : "ies"} from Pentatonic Memory_`,
|
|
456
|
+
"",
|
|
457
|
+
`If the memories above were not relevant to your reply, omit the footer.`,
|
|
458
|
+
].join("\n")
|
|
459
|
+
: "";
|
|
460
|
+
|
|
461
|
+
const addition = `[Memory] Relevant context from past conversations:\n${memoryText}${indicatorRule}`;
|
|
214
462
|
|
|
215
463
|
log(`[memory] Assembled ${results.length} memories via TES`);
|
|
216
464
|
|
|
@@ -229,7 +477,16 @@ function createHostedContextEngine(config, opts = {}) {
|
|
|
229
477
|
return { ok: true, compacted: false };
|
|
230
478
|
},
|
|
231
479
|
|
|
232
|
-
|
|
480
|
+
// Newer OpenClaw runtimes call afterTurn instead of ingest. We slice
|
|
481
|
+
// messages added during this turn (user+assistant) and hand each one
|
|
482
|
+
// to the shared handler — same STORE_MEMORY + CHAT_TURN flow.
|
|
483
|
+
async afterTurn({ sessionId, messages, prePromptMessageCount }) {
|
|
484
|
+
if (!messages || typeof prePromptMessageCount !== "number") return;
|
|
485
|
+
const newMessages = messages.slice(prePromptMessageCount);
|
|
486
|
+
for (const message of newMessages) {
|
|
487
|
+
await handleHostedMessage(config, sessionId, message, log);
|
|
488
|
+
}
|
|
489
|
+
},
|
|
233
490
|
};
|
|
234
491
|
}
|
|
235
492
|
|
|
@@ -82,8 +82,61 @@ async function main() {
|
|
|
82
82
|
|
|
83
83
|
const memory = createMemory();
|
|
84
84
|
|
|
85
|
+
// Enable pgvector before migrations (so migration 002 can create the vector column)
|
|
86
|
+
const setupPool = new Pool({ connectionString: process.env.DATABASE_URL });
|
|
87
|
+
try {
|
|
88
|
+
await setupPool.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
89
|
+
process.stderr.write("[memory-server] pgvector extension enabled\n");
|
|
90
|
+
} catch (err) {
|
|
91
|
+
process.stderr.write(`[memory-server] pgvector not available: ${err.message}\n`);
|
|
92
|
+
}
|
|
93
|
+
|
|
85
94
|
// Run migrations on startup
|
|
86
95
|
await memory.migrate();
|
|
96
|
+
|
|
97
|
+
// Fix: if migration 002 ran without pgvector, the vector column is missing.
|
|
98
|
+
// Re-apply it now that the extension is enabled.
|
|
99
|
+
try {
|
|
100
|
+
const colCheck = await setupPool.query(
|
|
101
|
+
`SELECT 1 FROM information_schema.columns
|
|
102
|
+
WHERE table_name = 'memory_nodes' AND column_name = 'embedding_vec' LIMIT 1`
|
|
103
|
+
);
|
|
104
|
+
if (colCheck.rows.length === 0) {
|
|
105
|
+
process.stderr.write("[memory-server] embedding_vec column missing — re-applying migration 002\n");
|
|
106
|
+
const { readFileSync } = await import("fs");
|
|
107
|
+
const { resolve, dirname } = await import("path");
|
|
108
|
+
const { fileURLToPath } = await import("url");
|
|
109
|
+
const migrationPath = resolve(dirname(fileURLToPath(import.meta.url)), "../migrations/002-vector-index.sql");
|
|
110
|
+
const sql = readFileSync(migrationPath, "utf-8");
|
|
111
|
+
await setupPool.query(sql);
|
|
112
|
+
process.stderr.write("[memory-server] embedding_vec column created\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Re-run 006 if there are JSONB embeddings but no populated vectors —
|
|
116
|
+
// catches the case where 006 ran on a fresh DB before any data existed,
|
|
117
|
+
// then a subsequent insert was silently dimension-mismatched.
|
|
118
|
+
const mismatchCheck = await setupPool.query(
|
|
119
|
+
`SELECT
|
|
120
|
+
EXISTS (SELECT 1 FROM memory_nodes WHERE embedding IS NOT NULL) AS has_jsonb,
|
|
121
|
+
EXISTS (SELECT 1 FROM memory_nodes WHERE embedding_vec IS NOT NULL) AS has_vec
|
|
122
|
+
FROM memory_nodes LIMIT 1`
|
|
123
|
+
);
|
|
124
|
+
const row = mismatchCheck.rows[0] || {};
|
|
125
|
+
if (row.has_jsonb && !row.has_vec) {
|
|
126
|
+
process.stderr.write("[memory-server] JSONB embeddings present but no vectors — re-running migration 006\n");
|
|
127
|
+
const { readFileSync } = await import("fs");
|
|
128
|
+
const { resolve, dirname } = await import("path");
|
|
129
|
+
const { fileURLToPath } = await import("url");
|
|
130
|
+
const migrationPath = resolve(dirname(fileURLToPath(import.meta.url)), "../migrations/006-fix-vector-dim.sql");
|
|
131
|
+
const sql = readFileSync(migrationPath, "utf-8");
|
|
132
|
+
await setupPool.query(sql);
|
|
133
|
+
process.stderr.write("[memory-server] embedding_vec repair complete\n");
|
|
134
|
+
}
|
|
135
|
+
} catch (err) {
|
|
136
|
+
process.stderr.write(`[memory-server] Vector column repair skipped: ${err.message}\n`);
|
|
137
|
+
}
|
|
138
|
+
await setupPool.end();
|
|
139
|
+
|
|
87
140
|
await memory.ensureLayers(CLIENT_ID);
|
|
88
141
|
|
|
89
142
|
const server = new McpServer({
|
|
@@ -258,10 +311,11 @@ async function main() {
|
|
|
258
311
|
|
|
259
312
|
if (url.pathname === "/search" && req.method === "POST") {
|
|
260
313
|
try {
|
|
261
|
-
//
|
|
262
|
-
//
|
|
263
|
-
|
|
264
|
-
const
|
|
314
|
+
// Try vector search first (embeddings + BM25 + recency + frequency).
|
|
315
|
+
// Falls back to text-only search internally if embeddings fail.
|
|
316
|
+
// Use ?mode=text to force text-only search.
|
|
317
|
+
const textOnly = url.searchParams.get("mode") === "text";
|
|
318
|
+
const searchFn = textOnly ? memory.textSearch : memory.search;
|
|
265
319
|
const results = await searchFn(body.query || "", {
|
|
266
320
|
clientId: CLIENT_ID,
|
|
267
321
|
limit: body.limit || 5,
|
|
@@ -288,7 +342,7 @@ async function main() {
|
|
|
288
342
|
const health = {
|
|
289
343
|
status: "ok",
|
|
290
344
|
client: CLIENT_ID,
|
|
291
|
-
version: "0.
|
|
345
|
+
version: "0.5.0",
|
|
292
346
|
search: "text",
|
|
293
347
|
db: false,
|
|
294
348
|
ollama: false,
|
package/src/normalizer.js
CHANGED
|
@@ -34,6 +34,21 @@ function empty() {
|
|
|
34
34
|
};
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
// Anthropic-only. The conversation-analytics Token Universe tab stacks
|
|
38
|
+
// cache_read / cache_create alongside input / output, so we pass them
|
|
39
|
+
// through whenever the provider supplies them. Other providers omit
|
|
40
|
+
// these keys silently.
|
|
41
|
+
function extractCacheUsage(usage) {
|
|
42
|
+
const out = {};
|
|
43
|
+
if (typeof usage.cache_read_input_tokens === "number") {
|
|
44
|
+
out.cache_read_input_tokens = usage.cache_read_input_tokens;
|
|
45
|
+
}
|
|
46
|
+
if (typeof usage.cache_creation_input_tokens === "number") {
|
|
47
|
+
out.cache_creation_input_tokens = usage.cache_creation_input_tokens;
|
|
48
|
+
}
|
|
49
|
+
return out;
|
|
50
|
+
}
|
|
51
|
+
|
|
37
52
|
function normalizeOpenAI(raw) {
|
|
38
53
|
const message = raw.choices?.[0]?.message || {};
|
|
39
54
|
const usage = raw.usage || {};
|
|
@@ -76,6 +91,7 @@ function normalizeAnthropic(raw) {
|
|
|
76
91
|
usage: {
|
|
77
92
|
prompt_tokens: usage.input_tokens || 0,
|
|
78
93
|
completion_tokens: usage.output_tokens || 0,
|
|
94
|
+
...extractCacheUsage(usage),
|
|
79
95
|
},
|
|
80
96
|
toolCalls,
|
|
81
97
|
};
|
package/src/session.js
CHANGED
|
@@ -22,6 +22,8 @@ export class Session {
|
|
|
22
22
|
_reset() {
|
|
23
23
|
this._promptTokens = 0;
|
|
24
24
|
this._completionTokens = 0;
|
|
25
|
+
this._cacheReadTokens = 0;
|
|
26
|
+
this._cacheCreateTokens = 0;
|
|
25
27
|
this._rounds = 0;
|
|
26
28
|
this._toolCalls = [];
|
|
27
29
|
this._model = null;
|
|
@@ -29,12 +31,27 @@ export class Session {
|
|
|
29
31
|
}
|
|
30
32
|
|
|
31
33
|
get totalUsage() {
|
|
32
|
-
|
|
34
|
+
const usage = {
|
|
33
35
|
prompt_tokens: this._promptTokens,
|
|
34
36
|
completion_tokens: this._completionTokens,
|
|
35
|
-
total_tokens:
|
|
37
|
+
total_tokens:
|
|
38
|
+
this._promptTokens +
|
|
39
|
+
this._completionTokens +
|
|
40
|
+
this._cacheReadTokens +
|
|
41
|
+
this._cacheCreateTokens,
|
|
36
42
|
ai_rounds: this._rounds,
|
|
37
43
|
};
|
|
44
|
+
// Cache token passthrough (Anthropic only). Added only when non-zero
|
|
45
|
+
// so the legacy { prompt_tokens, completion_tokens, total_tokens,
|
|
46
|
+
// ai_rounds } shape is preserved when no cache is in play. The
|
|
47
|
+
// conversation-analytics Token Universe tab reads these directly.
|
|
48
|
+
if (this._cacheReadTokens) {
|
|
49
|
+
usage.cache_read_input_tokens = this._cacheReadTokens;
|
|
50
|
+
}
|
|
51
|
+
if (this._cacheCreateTokens) {
|
|
52
|
+
usage.cache_creation_input_tokens = this._cacheCreateTokens;
|
|
53
|
+
}
|
|
54
|
+
return usage;
|
|
38
55
|
}
|
|
39
56
|
|
|
40
57
|
get toolCalls() {
|
|
@@ -47,6 +64,8 @@ export class Session {
|
|
|
47
64
|
|
|
48
65
|
this._promptTokens += normalized.usage.prompt_tokens;
|
|
49
66
|
this._completionTokens += normalized.usage.completion_tokens;
|
|
67
|
+
this._cacheReadTokens += normalized.usage.cache_read_input_tokens || 0;
|
|
68
|
+
this._cacheCreateTokens += normalized.usage.cache_creation_input_tokens || 0;
|
|
50
69
|
this._rounds += 1;
|
|
51
70
|
|
|
52
71
|
if (normalized.model) {
|