npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.4.8 → 0.5.0 - Mend

@pentatonic-ai/ai-agent-sdk 0.4.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +59 -0
package/bin/cli.js +70 -9
package/dist/index.cjs +25 -3
package/dist/index.js +25 -3
package/package.json +4 -2
package/packages/doctor/README.md +106 -0
package/packages/doctor/__tests__/checks.test.js +187 -0
package/packages/doctor/__tests__/detect.test.js +101 -0
package/packages/doctor/__tests__/output.test.js +92 -0
package/packages/doctor/__tests__/plugins.test.js +111 -0
package/packages/doctor/__tests__/runner.test.js +131 -0
package/packages/doctor/package.json +6 -0
package/packages/doctor/src/checks/hosted-tes.js +109 -0
package/packages/doctor/src/checks/local-memory.js +290 -0
package/packages/doctor/src/checks/platform.js +170 -0
package/packages/doctor/src/checks/universal.js +121 -0
package/packages/doctor/src/detect.js +102 -0
package/packages/doctor/src/index.js +33 -0
package/packages/doctor/src/output.js +55 -0
package/packages/doctor/src/plugins.js +81 -0
package/packages/doctor/src/runner.js +136 -0
package/packages/memory/migrations/005-atomic-memories.sql +16 -0
package/packages/memory/migrations/006-fix-vector-dim.sql +97 -0
package/packages/memory/openclaw-plugin/__tests__/chat-turn.test.js +208 -0
package/packages/memory/openclaw-plugin/__tests__/indicator.test.js +142 -0
package/packages/memory/openclaw-plugin/__tests__/version-check.test.js +136 -0
package/packages/memory/openclaw-plugin/index.js +369 -58
package/packages/memory/openclaw-plugin/openclaw.plugin.json +11 -1
package/packages/memory/openclaw-plugin/package.json +1 -1
package/packages/memory/src/__tests__/distill.test.js +175 -0
package/packages/memory/src/__tests__/openclaw-chat-turn.test.js +289 -0
package/packages/memory/src/distill.js +162 -0
package/packages/memory/src/index.js +1 -0
package/packages/memory/src/ingest.js +10 -0
package/packages/memory/src/openclaw/index.js +280 -23
package/packages/memory/src/openclaw/package.json +1 -1
package/packages/memory/src/server.js +59 -5
package/src/normalizer.js +16 -0
package/src/session.js +21 -2

package/packages/memory/src/openclaw/index.js CHANGED Viewed

@@ -117,6 +117,63 @@ async function hostedSearch(config, query, limit = 5, minScore = 0.3) {
   }
 }
+/**
+ * Emit a CHAT_TURN event to TES so the conversation-analytics dashboard
+ * (Token Universe + Tools tabs) can render. Without this, the dashboard
+ * filters on eventType=CHAT_TURN and shows nothing for OpenClaw users
+ * because the only events emitted are STORE_MEMORY.
+ *
+ * Anything missing from the message metadata is omitted rather than
+ * defaulted to zero — that way the dashboard can distinguish "no data"
+ * from "zero usage".
+ */
+async function hostedEmitChatTurn(config, sessionId, turn) {
+  const attributes = {
+    source: "openclaw-plugin",
+    user_message: turn.userMessage,
+    assistant_response: turn.assistantResponse,
+  };
+  if (turn.model) attributes.model = turn.model;
+  if (turn.usage) attributes.usage = turn.usage;
+  if (turn.toolCalls?.length) attributes.tool_calls = turn.toolCalls;
+  if (turn.turnNumber !== undefined) attributes.turn_number = turn.turnNumber;
+  if (turn.systemPrompt) attributes.system_prompt = turn.systemPrompt;
+  try {
+    const response = await fetch(`${config.tes_endpoint}/api/graphql`, {
+      method: "POST",
+      headers: tesHeaders(config),
+      // Route through createModuleEvent on the conversation-analytics
+      // module rather than the top-level emitEvent. The latter requires
+      // a permission most client API keys don't have ("Access denied:
+      // You don't have permission to update emitEvent"), but the
+      // module's manifest declares CHAT_TURN as a registered event
+      // type, so the module-scoped path is both authorised and
+      // consistent with how STORE_MEMORY is emitted.
+      body: JSON.stringify({
+        query: `mutation Cme($moduleId: String!, $input: ModuleEventInput!) {
+          createModuleEvent(moduleId: $moduleId, input: $input) { success eventId }
+        }`,
+        variables: {
+          moduleId: "conversation-analytics",
+          input: {
+            eventType: "CHAT_TURN",
+            data: {
+              entity_id: sessionId,
+              attributes,
+            },
+          },
+        },
+      }),
+      signal: AbortSignal.timeout(10000),
+    });
+    if (!response.ok) return null;
+    return response.json();
+  } catch {
+    return null;
+  }
+}
 async function hostedStore(config, content, metadata = {}) {
   try {
     const response = await fetch(`${config.tes_endpoint}/api/graphql`, {
@@ -152,6 +209,186 @@ async function hostedStore(config, content, metadata = {}) {
 // --- Hosted context engine ---
+// Per-session turn buffer. Holds the user message until the matching
+// assistant response arrives, at which point we emit a CHAT_TURN.
+// Turn counter is kept in a separate map so it survives buffer clears
+// between turns. Module-scoped (rather than per-engine) so multiple
+// engine instances don't double-buffer the same session.
+//
+// Simple LRU cap to avoid unbounded growth in long-running processes
+// with many sessions (each entry is small, 500 sessions ≈ <50KB, but
+// the cap exists to enforce an upper bound).
+const MAX_SESSIONS = 500;
+const turnBuffers = new Map(); // sessionId → { userMessage }
+const turnCounters = new Map(); // sessionId → highest turn_number emitted
+function capSessionMaps() {
+  while (turnBuffers.size > MAX_SESSIONS) {
+    turnBuffers.delete(turnBuffers.keys().next().value);
+  }
+  while (turnCounters.size > MAX_SESSIONS) {
+    turnCounters.delete(turnCounters.keys().next().value);
+  }
+}
+function _resetTurnBuffersForTest() {
+  turnBuffers.clear();
+  turnCounters.clear();
+}
+export { _resetTurnBuffersForTest };
+// Extract text from a message content field. OpenClaw may pass content
+// either as a plain string or as an array of content blocks ([{type:"text",
+// text:"..."}, ...]). Returns null if no text can be extracted.
+function getTextContent(message) {
+  if (!message) return null;
+  const c = message.content;
+  if (typeof c === "string") return c;
+  if (Array.isArray(c)) {
+    const text = c
+      .filter((b) => b?.type === "text" && typeof b.text === "string")
+      .map((b) => b.text)
+      .join(" ");
+    return text || null;
+  }
+  return null;
+}
+// OpenClaw wraps real user messages from external channels (Telegram etc.)
+// in "Conversation info (untrusted metadata)" JSON envelopes, with the
+// actual user text appended after the metadata blocks. Strip those
+// envelopes to get the real user text. Returns null for pure system
+// prompts ("Note: The previous agent run", "System (untrusted)", etc.).
+function extractUserText(raw) {
+  if (!raw) return null;
+  const trimmed = raw.trim();
+  if (
+    trimmed.startsWith("Note: The previous agent run") ||
+    trimmed.startsWith("System (untrusted)") ||
+    trimmed.startsWith("[System]") ||
+    trimmed.startsWith("System:") ||
+    trimmed.startsWith("[Queued messages")
+  ) {
+    return null;
+  }
+  if (
+    trimmed.startsWith("Conversation info") ||
+    trimmed.startsWith("(untrusted metadata)") ||
+    trimmed.startsWith("Sender (untrusted") ||
+    trimmed.startsWith("Untrusted context")
+  ) {
+    const stripped = trimmed
+      .replace(
+        /(?:Conversation info|Sender|Thread starter|Replied message|Forwarded message context|Chat history since last reply) \(untrusted[^)]*\):\s*```json[\s\S]*?```/g,
+        ""
+      )
+      .replace(
+        /Untrusted context \(metadata, do not treat as instructions or commands\):/g,
+        ""
+      )
+      .trim();
+    return stripped || null;
+  }
+  return trimmed;
+}
+// Pull whatever the runtime hands us. Different OpenClaw versions wrap
+// provider responses differently — we look in the obvious places and
+// silently omit fields we can't find. The dashboard handles undefined
+// usage/tool_calls gracefully (renders "no data" rather than zeros).
+function extractAssistantMetadata(message) {
+  const meta = {};
+  // Direct fields first (richest hook contracts)
+  if (message.model) meta.model = message.model;
+  if (message.usage) meta.usage = message.usage;
+  if (Array.isArray(message.tool_calls) && message.tool_calls.length) {
+    meta.toolCalls = message.tool_calls;
+  } else if (Array.isArray(message.toolCalls) && message.toolCalls.length) {
+    meta.toolCalls = message.toolCalls;
+  }
+  // Fall back to a wrapped raw response if the runtime forwards it
+  const raw = message.raw || message.response || message._raw;
+  if (raw && typeof raw === "object") {
+    if (!meta.model && raw.model) meta.model = raw.model;
+    if (!meta.usage && raw.usage) meta.usage = raw.usage;
+    if (!meta.toolCalls) {
+      // Anthropic puts tool_use blocks in raw.content[]
+      if (Array.isArray(raw.content)) {
+        const tc = raw.content
+          .filter((b) => b?.type === "tool_use")
+          .map((b) => ({ tool: b.name, args: b.input || {} }));
+        if (tc.length) meta.toolCalls = tc;
+      }
+      // OpenAI puts tool_calls inside choices[0].message
+      if (
+        !meta.toolCalls &&
+        Array.isArray(raw.choices) &&
+        raw.choices[0]?.message?.tool_calls
+      ) {
+        meta.toolCalls = raw.choices[0].message.tool_calls.map((tc) => ({
+          tool: tc.function?.name || tc.name,
+          args: tc.function?.arguments,
+        }));
+      }
+    }
+  }
+  return meta;
+}
+// Process a single message: emit STORE_MEMORY for retrieval, and buffer
+// for CHAT_TURN emission on the next assistant message. Shared between
+// the `ingest` and `afterTurn` hooks so we behave consistently whichever
+// one the OpenClaw runtime invokes.
+async function handleHostedMessage(config, sessionId, message, log) {
+  const role = message?.role || message?.type;
+  if (role !== "user" && role !== "assistant") return;
+  const raw = getTextContent(message);
+  if (!raw) return;
+  // For user messages, strip OpenClaw's metadata envelope so we store
+  // and emit the real user text, not the JSON wrapper.
+  const text = role === "user" ? extractUserText(raw) : raw;
+  if (!text) return;
+  // STORE_MEMORY for retrieval.
+  try {
+    await hostedStore(config, text, { session_id: sessionId, role });
+  } catch (err) {
+    log(`[memory] Hosted store failed: ${err.message}`);
+  }
+  // CHAT_TURN buffering: pair each user message with the next assistant
+  // message in the same session and emit on the assistant turn.
+  try {
+    if (role === "user") {
+      turnBuffers.set(sessionId, { userMessage: text });
+      capSessionMaps();
+    } else if (role === "assistant") {
+      const buf = turnBuffers.get(sessionId);
+      const turnNumber = (turnCounters.get(sessionId) || 0) + 1;
+      turnCounters.set(sessionId, turnNumber);
+      capSessionMaps();
+      const meta = extractAssistantMetadata(message);
+      await hostedEmitChatTurn(config, sessionId, {
+        userMessage: buf?.userMessage,
+        assistantResponse: text,
+        turnNumber,
+        ...meta,
+      });
+      turnBuffers.delete(sessionId);
+      log(
+        `[memory] Emitted CHAT_TURN${meta.usage ? " w/ usage" : ""}${meta.toolCalls?.length ? ` w/ ${meta.toolCalls.length} tool_calls` : ""}`
+      );
+    }
+  } catch (err) {
+    log(`[memory] CHAT_TURN emit failed: ${err.message}`);
+  }
+}
 function createHostedContextEngine(config, opts = {}) {
   const searchLimit = opts.searchLimit || 5;
   const minScore = opts.minScore || 0.3;
@@ -164,37 +401,34 @@ function createHostedContextEngine(config, opts = {}) {
       ownsCompaction: false,
     },
+    // Called by older OpenClaw runtimes that don't use afterTurn.
+    // Falls through to the shared handler so behaviour is identical.
     async ingest({ sessionId, message }) {
-      if (!message?.content) return { ingested: false };
-      const role = message.role || message.type;
-      if (role !== "user" && role !== "assistant") return { ingested: false };
-      try {
-        await hostedStore(config, message.content, {
-          session_id: sessionId,
-          role,
-        });
-        log(`[memory] Ingested ${role} message via TES`);
-        return { ingested: true };
-      } catch (err) {
-        log(`[memory] Hosted ingest failed: ${err.message}`);
-        return { ingested: false };
-      }
+      await handleHostedMessage(config, sessionId, message, log);
+      return { ingested: true };
     },
     async assemble({ sessionId, messages }) {
-      const lastUserMsg = [...messages]
-        .reverse()
-        .find((m) => m.role === "user" || m.type === "user");
-      if (!lastUserMsg?.content) {
+      // Find the most recent real user message. Skip OpenClaw's internal
+      // metadata prompts (extractUserText returns null for those).
+      let lastUserText = null;
+      for (const m of [...messages].reverse()) {
+        if (m.role !== "user" && m.type !== "user") continue;
+        const raw = getTextContent(m);
+        const extracted = extractUserText(raw);
+        if (extracted) {
+          lastUserText = extracted;
+          break;
+        }
+      }
+      if (!lastUserText) {
         return { messages, estimatedTokens: 0 };
       }
       try {
         const results = await hostedSearch(
           config,
-          lastUserMsg.content,
+          lastUserText,
           searchLimit,
           minScore
         );
@@ -210,7 +444,21 @@ function createHostedContextEngine(config, opts = {}) {
           )
           .join("\n");
-        const addition = `[Memory] Relevant context from past conversations:\n${memoryText}`;
+        // Visibility footer — instruct the model to surface that memory was
+        // used. Opt out via show_memory_indicator: false in plugin config.
+        const showIndicator = config.show_memory_indicator !== false;
+        const indicatorRule = showIndicator
+          ? [
+              "",
+              `After your reply, on a new line, append exactly this footer (no other prefix, no trailing content):`,
+              `—`,
+              `🧠 _Used ${results.length} memor${results.length === 1 ? "y" : "ies"} from Pentatonic Memory_`,
+              "",
+              `If the memories above were not relevant to your reply, omit the footer.`,
+            ].join("\n")
+          : "";
+        const addition = `[Memory] Relevant context from past conversations:\n${memoryText}${indicatorRule}`;
         log(`[memory] Assembled ${results.length} memories via TES`);
@@ -229,7 +477,16 @@ function createHostedContextEngine(config, opts = {}) {
       return { ok: true, compacted: false };
     },
-    async afterTurn() {},
+    // Newer OpenClaw runtimes call afterTurn instead of ingest. We slice
+    // messages added during this turn (user+assistant) and hand each one
+    // to the shared handler — same STORE_MEMORY + CHAT_TURN flow.
+    async afterTurn({ sessionId, messages, prePromptMessageCount }) {
+      if (!messages || typeof prePromptMessageCount !== "number") return;
+      const newMessages = messages.slice(prePromptMessageCount);
+      for (const message of newMessages) {
+        await handleHostedMessage(config, sessionId, message, log);
+      }
+    },
   };
 }

package/packages/memory/src/openclaw/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "name": "@pentatonic-ai/openclaw-memory-plugin",
+  "name": "@pentatonic-ai/openclaw-memory-plugin-internal",
   "version": "0.4.0",
   "type": "module",
   "openclaw": {

package/packages/memory/src/server.js CHANGED Viewed

@@ -82,8 +82,61 @@ async function main() {
   const memory = createMemory();
+  // Enable pgvector before migrations (so migration 002 can create the vector column)
+  const setupPool = new Pool({ connectionString: process.env.DATABASE_URL });
+  try {
+    await setupPool.query("CREATE EXTENSION IF NOT EXISTS vector");
+    process.stderr.write("[memory-server] pgvector extension enabled\n");
+  } catch (err) {
+    process.stderr.write(`[memory-server] pgvector not available: ${err.message}\n`);
+  }
   // Run migrations on startup
   await memory.migrate();
+  // Fix: if migration 002 ran without pgvector, the vector column is missing.
+  // Re-apply it now that the extension is enabled.
+  try {
+    const colCheck = await setupPool.query(
+      `SELECT 1 FROM information_schema.columns
+       WHERE table_name = 'memory_nodes' AND column_name = 'embedding_vec' LIMIT 1`
+    );
+    if (colCheck.rows.length === 0) {
+      process.stderr.write("[memory-server] embedding_vec column missing — re-applying migration 002\n");
+      const { readFileSync } = await import("fs");
+      const { resolve, dirname } = await import("path");
+      const { fileURLToPath } = await import("url");
+      const migrationPath = resolve(dirname(fileURLToPath(import.meta.url)), "../migrations/002-vector-index.sql");
+      const sql = readFileSync(migrationPath, "utf-8");
+      await setupPool.query(sql);
+      process.stderr.write("[memory-server] embedding_vec column created\n");
+    }
+    // Re-run 006 if there are JSONB embeddings but no populated vectors —
+    // catches the case where 006 ran on a fresh DB before any data existed,
+    // then a subsequent insert was silently dimension-mismatched.
+    const mismatchCheck = await setupPool.query(
+      `SELECT
+         EXISTS (SELECT 1 FROM memory_nodes WHERE embedding IS NOT NULL) AS has_jsonb,
+         EXISTS (SELECT 1 FROM memory_nodes WHERE embedding_vec IS NOT NULL) AS has_vec
+       FROM memory_nodes LIMIT 1`
+    );
+    const row = mismatchCheck.rows[0] || {};
+    if (row.has_jsonb && !row.has_vec) {
+      process.stderr.write("[memory-server] JSONB embeddings present but no vectors — re-running migration 006\n");
+      const { readFileSync } = await import("fs");
+      const { resolve, dirname } = await import("path");
+      const { fileURLToPath } = await import("url");
+      const migrationPath = resolve(dirname(fileURLToPath(import.meta.url)), "../migrations/006-fix-vector-dim.sql");
+      const sql = readFileSync(migrationPath, "utf-8");
+      await setupPool.query(sql);
+      process.stderr.write("[memory-server] embedding_vec repair complete\n");
+    }
+  } catch (err) {
+    process.stderr.write(`[memory-server] Vector column repair skipped: ${err.message}\n`);
+  }
+  await setupPool.end();
   await memory.ensureLayers(CLIENT_ID);
   const server = new McpServer({
@@ -258,10 +311,11 @@ async function main() {
       if (url.pathname === "/search" && req.method === "POST") {
         try {
-          // Use text search by default (fast, no external dependencies).
-          // Vector search available via ?mode=vector if embeddings are working.
-          const useVector = url.searchParams.get("mode") === "vector";
-          const searchFn = useVector ? memory.search : memory.textSearch;
+          // Try vector search first (embeddings + BM25 + recency + frequency).
+          // Falls back to text-only search internally if embeddings fail.
+          // Use ?mode=text to force text-only search.
+          const textOnly = url.searchParams.get("mode") === "text";
+          const searchFn = textOnly ? memory.textSearch : memory.search;
           const results = await searchFn(body.query || "", {
             clientId: CLIENT_ID,
             limit: body.limit || 5,
@@ -288,7 +342,7 @@ async function main() {
         const health = {
           status: "ok",
           client: CLIENT_ID,
-          version: "0.4.7",
+          version: "0.5.0",
           search: "text",
           db: false,
           ollama: false,

package/src/normalizer.js CHANGED Viewed

@@ -34,6 +34,21 @@ function empty() {
   };
 }
+// Anthropic-only. The conversation-analytics Token Universe tab stacks
+// cache_read / cache_create alongside input / output, so we pass them
+// through whenever the provider supplies them. Other providers omit
+// these keys silently.
+function extractCacheUsage(usage) {
+  const out = {};
+  if (typeof usage.cache_read_input_tokens === "number") {
+    out.cache_read_input_tokens = usage.cache_read_input_tokens;
+  }
+  if (typeof usage.cache_creation_input_tokens === "number") {
+    out.cache_creation_input_tokens = usage.cache_creation_input_tokens;
+  }
+  return out;
+}
 function normalizeOpenAI(raw) {
   const message = raw.choices?.[0]?.message || {};
   const usage = raw.usage || {};
@@ -76,6 +91,7 @@ function normalizeAnthropic(raw) {
     usage: {
       prompt_tokens: usage.input_tokens || 0,
       completion_tokens: usage.output_tokens || 0,
+      ...extractCacheUsage(usage),
     },
     toolCalls,
   };

package/src/session.js CHANGED Viewed

@@ -22,6 +22,8 @@ export class Session {
   _reset() {
     this._promptTokens = 0;
     this._completionTokens = 0;
+    this._cacheReadTokens = 0;
+    this._cacheCreateTokens = 0;
     this._rounds = 0;
     this._toolCalls = [];
     this._model = null;
@@ -29,12 +31,27 @@ export class Session {
   }
   get totalUsage() {
-    return {
+    const usage = {
       prompt_tokens: this._promptTokens,
       completion_tokens: this._completionTokens,
-      total_tokens: this._promptTokens + this._completionTokens,
+      total_tokens:
+        this._promptTokens +
+        this._completionTokens +
+        this._cacheReadTokens +
+        this._cacheCreateTokens,
       ai_rounds: this._rounds,
     };
+    // Cache token passthrough (Anthropic only). Added only when non-zero
+    // so the legacy { prompt_tokens, completion_tokens, total_tokens,
+    // ai_rounds } shape is preserved when no cache is in play. The
+    // conversation-analytics Token Universe tab reads these directly.
+    if (this._cacheReadTokens) {
+      usage.cache_read_input_tokens = this._cacheReadTokens;
+    }
+    if (this._cacheCreateTokens) {
+      usage.cache_creation_input_tokens = this._cacheCreateTokens;
+    }
+    return usage;
   }
   get toolCalls() {
@@ -47,6 +64,8 @@ export class Session {
     this._promptTokens += normalized.usage.prompt_tokens;
     this._completionTokens += normalized.usage.completion_tokens;
+    this._cacheReadTokens += normalized.usage.cache_read_input_tokens || 0;
+    this._cacheCreateTokens += normalized.usage.cache_creation_input_tokens || 0;
     this._rounds += 1;
     if (normalized.model) {