npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.5.0 → 0.5.2 - Mend

@pentatonic-ai/ai-agent-sdk 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +2 -1
package/package.json +1 -1
package/packages/memory/openclaw-plugin/__tests__/query-expansion.test.js +193 -0
package/packages/memory/openclaw-plugin/index.js +41 -2
package/packages/memory/openclaw-plugin/openclaw.plugin.json +1 -1
package/packages/memory/openclaw-plugin/package.json +1 -1
package/packages/memory/src/__tests__/api-contract.test.js +119 -0
package/packages/memory/src/ai.js +25 -2
package/packages/memory/src/ingest.js +9 -3
package/packages/memory/src/server.js +6 -1

package/README.md CHANGED Viewed

@@ -70,6 +70,7 @@ That's it. The plugin hooks automatically search memories on every prompt and st
 - **Automatic memory** -- every conversation turn is stored with embeddings and HyDE query expansion
 - **Semantic search** -- multi-signal retrieval combining vector similarity, BM25 full-text, recency decay, and access frequency
 - **Memory layers** -- episodic (recent), semantic (consolidated), procedural (how-to), working (temporary)
+- **Distilled memory** -- a background LLM pass extracts atomic facts from each raw turn and stores each as its own node in the semantic layer, linked back to the source. A query like *"what does Phil drink?"* matches *"Phil drinks cortado"* more reliably than a mixed paragraph covering food, drinks, and hobbies. Default-on; the raw turn is still preserved.
 - **Decay and consolidation** -- memories fade over time; frequently accessed ones get promoted
 ### Change models
@@ -195,7 +196,7 @@ openclaw pentatonic-memory local
 OpenClaw's context engine hooks fire on every lifecycle event:
-- **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion
+- **Ingest** -- every user and assistant message is stored with embeddings and HyDE query expansion, then distilled into atomic facts in the background (see [Distilled memory](#what-you-get))
 - **Assemble** -- relevant memories are injected as system prompt context before every model run
 - **Compact** -- decay cycle runs when the context window fills
 - **After turn** -- high-access memories get consolidated to the semantic layer

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.5.0",
+  "version": "0.5.2",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory/openclaw-plugin/__tests__/query-expansion.test.js ADDED Viewed

@@ -0,0 +1,193 @@
+/**
+ * Query expansion fallback tests.
+ *
+ * When the raw user prompt returns no memories, the plugin retries once
+ * with a keyword-distilled form. This recovers matches for verbose
+ * natural-language prompts that fall below the semantic threshold.
+ */
+import plugin, { extractSearchKeywords } from "../index.js";
+const realFetch = globalThis.fetch;
+afterEach(() => {
+  globalThis.fetch = realFetch;
+});
+function makeEngine(extraConfig = {}) {
+  let factory;
+  plugin.register({
+    pluginConfig: {
+      tes_endpoint: "https://x.test",
+      tes_client_id: "c",
+      tes_api_key: "tes_c_xyz",
+      ...extraConfig,
+    },
+    registerTool: () => {},
+    registerContextEngine: (_name, fn) => {
+      factory = fn;
+    },
+  });
+  if (!factory) throw new Error("plugin did not register a context engine");
+  return factory();
+}
+describe("extractSearchKeywords", () => {
+  it("strips stopwords from verbose prompts", () => {
+    const out = extractSearchKeywords(
+      "when I was working in the thing-event-system, I copied migrations, what were they?"
+    );
+    expect(out).toMatch(/thing-event-system/);
+    expect(out).toMatch(/migrations/);
+    expect(out).not.toMatch(/\bwhen\b/);
+    expect(out).not.toMatch(/\bwhat\b/);
+  });
+  it("preserves hyphenated compounds", () => {
+    expect(extractSearchKeywords("where is thing-event-system?")).toMatch(
+      /thing-event-system/
+    );
+  });
+  it("returns null when the distilled form equals the input", () => {
+    expect(extractSearchKeywords("deep-memory migrations")).toBeNull();
+  });
+  it("returns null when the prompt is only stopwords", () => {
+    expect(extractSearchKeywords("what were they?")).toBeNull();
+  });
+  it("returns null for non-string input", () => {
+    expect(extractSearchKeywords(null)).toBeNull();
+    expect(extractSearchKeywords(undefined)).toBeNull();
+  });
+});
+describe("assemble — keyword retry fallback (hosted mode)", () => {
+  it("retries with distilled keywords when raw prompt misses", async () => {
+    const queries = [];
+    globalThis.fetch = async (_url, init) => {
+      const body = JSON.parse(init.body);
+      const q = body.variables.query;
+      queries.push(q);
+      const isFirst = queries.length === 1;
+      return {
+        ok: true,
+        status: 200,
+        json: async () => ({
+          data: {
+            semanticSearchMemories: isFirst
+              ? []
+              : [{ id: "m1", content: "matched on retry", similarity: 0.7 }],
+          },
+        }),
+      };
+    };
+    const engine = makeEngine();
+    const result = await engine.assemble({
+      sessionId: "s",
+      messages: [
+        {
+          role: "user",
+          content:
+            "when I was working in the thing-event-system, what were those migration changes again?",
+        },
+      ],
+    });
+    expect(queries).toHaveLength(2);
+    expect(queries[0]).toMatch(/when I was working/);
+    expect(queries[1]).not.toMatch(/\bwhen\b/);
+    expect(queries[1]).toMatch(/thing-event-system/);
+    expect(queries[1]).toMatch(/migration/);
+    expect(result.systemPromptAddition).toMatch(/matched on retry/);
+  });
+  it("does not retry when the raw prompt already returns results", async () => {
+    const queries = [];
+    globalThis.fetch = async (_url, init) => {
+      queries.push(JSON.parse(init.body).variables.query);
+      return {
+        ok: true,
+        status: 200,
+        json: async () => ({
+          data: {
+            semanticSearchMemories: [
+              { id: "m1", content: "hit", similarity: 0.9 },
+            ],
+          },
+        }),
+      };
+    };
+    const engine = makeEngine();
+    await engine.assemble({
+      sessionId: "s",
+      messages: [{ role: "user", content: "thing-event-system migrations" }],
+    });
+    expect(queries).toHaveLength(1);
+  });
+  it("does not retry when distilled query equals the raw query", async () => {
+    const queries = [];
+    globalThis.fetch = async (_url, init) => {
+      queries.push(JSON.parse(init.body).variables.query);
+      return {
+        ok: true,
+        status: 200,
+        json: async () => ({ data: { semanticSearchMemories: [] } }),
+      };
+    };
+    const engine = makeEngine();
+    await engine.assemble({
+      sessionId: "s",
+      messages: [{ role: "user", content: "deep-memory migrations" }],
+    });
+    expect(queries).toHaveLength(1);
+  });
+});
+describe("assemble — keyword retry fallback (local mode)", () => {
+  it("retries via /search endpoint when raw query returns nothing", async () => {
+    const queries = [];
+    globalThis.fetch = async (_url, init) => {
+      const body = JSON.parse(init.body);
+      queries.push(body.query);
+      const isFirst = queries.length === 1;
+      return {
+        ok: true,
+        status: 200,
+        json: async () => ({
+          results: isFirst
+            ? []
+            : [{ id: "m1", content: "local hit", similarity: 0.6 }],
+        }),
+      };
+    };
+    let factory;
+    plugin.register({
+      pluginConfig: {}, // no tes_* creds → local mode
+      registerTool: () => {},
+      registerContextEngine: (_name, fn) => {
+        factory = fn;
+      },
+    });
+    const engine = factory();
+    const result = await engine.assemble({
+      sessionId: "s",
+      messages: [
+        { role: "user", content: "what were the migration changes again?" },
+      ],
+    });
+    expect(queries).toHaveLength(2);
+    expect(queries[1]).toMatch(/migration/);
+    expect(result.systemPromptAddition).toMatch(/local hit/);
+  });
+});

package/packages/memory/openclaw-plugin/index.js CHANGED Viewed

@@ -88,6 +88,33 @@ const stats = {
   setupPrompted: false,
 };
+// --- Query keyword extraction ---
+// Natural-language prompts ("what were those changes again?") often fall
+// below the semantic threshold even when relevant memories exist. We
+// drop stopwords and retry with the keyword-distilled form.
+const STOPWORDS = new Set([
+  "a", "am", "an", "and", "are", "as", "at", "be", "been", "but", "by",
+  "can", "did", "do", "does", "for", "from", "had", "has", "have", "he",
+  "her", "him", "his", "how", "i", "if", "in", "into", "is", "it", "its",
+  "just", "like", "made", "me", "my", "need", "needed", "of", "on", "or",
+  "our", "out", "over", "she", "so", "some", "than", "that", "the",
+  "their", "them", "then", "there", "these", "they", "this", "those",
+  "to", "up", "us", "was", "we", "went", "were", "what", "when", "where",
+  "which", "who", "why", "will", "with", "would", "you", "your",
+]);
+export function extractSearchKeywords(query) {
+  if (typeof query !== "string") return null;
+  const tokens = query
+    .toLowerCase()
+    .split(/[^a-z0-9-]+/)
+    .filter((t) => t.length >= 2 && !STOPWORDS.has(t));
+  if (tokens.length === 0) return null;
+  const distilled = tokens.join(" ");
+  if (distilled === query.toLowerCase().trim()) return null;
+  return distilled;
+}
 // --- Local mode: HTTP to memory server ---
 async function localSearch(baseUrl, query, limit = 5, minScore = 0.3) {
@@ -432,11 +459,23 @@ export default {
     stats.mode = hosted ? "hosted" : "local";
-    // Unified search/store that routes to local or hosted
-    const search = hosted
+    // Unified search/store that routes to local or hosted.
+    // If the raw query returns nothing, retry once with the
+    // keyword-distilled form — natural-language prompts frequently
+    // miss the semantic threshold even when matches exist.
+    const searchBackend = hosted
       ? (query, limit, score) => hostedSearch(config, query, limit, score)
       : (query, limit, score) => localSearch(baseUrl, query, limit, score);
+    const search = async (query, limit, score) => {
+      const first = await searchBackend(query, limit, score);
+      if (first.length > 0) return first;
+      const keywords = extractSearchKeywords(query);
+      if (!keywords) return first;
+      log(`search: retry "${query.substring(0, 40)}" → "${keywords}"`);
+      return searchBackend(keywords, limit, score);
+    };
     const store = hosted
       ? (content, metadata) => hostedStore(config, content, metadata)
       : (content, metadata) => localStore(baseUrl, content, metadata);

package/packages/memory/openclaw-plugin/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "pentatonic-memory",
   "name": "Pentatonic Memory",
   "description": "Persistent, searchable memory with multi-signal retrieval and HyDE query expansion. Local (Docker + Ollama) or hosted (Pentatonic TES).",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "kind": "context-engine",
   "configSchema": {
     "type": "object",

package/packages/memory/openclaw-plugin/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/openclaw-memory-plugin",
-  "version": "0.8.0",
+  "version": "0.8.1",
   "description": "Pentatonic Memory plugin for OpenClaw — persistent, searchable memory with multi-signal retrieval and HyDE query expansion",
   "type": "module",
   "main": "index.js",

package/packages/memory/src/__tests__/api-contract.test.js CHANGED Viewed

@@ -168,6 +168,11 @@ describe("named exports", () => {
 // --- AI client ---
 describe("createAIClient", () => {
+  const realFetch = globalThis.fetch;
+  afterEach(() => {
+    globalThis.fetch = realFetch;
+  });
   it("returns an object with embed() and chat()", () => {
     const client = createAIClient({
       url: "http://localhost:11434/v1",
@@ -185,6 +190,79 @@ describe("createAIClient", () => {
     });
     expect(client).toBeDefined();
   });
+  it("hits /embeddings by default (OpenAI spec)", async () => {
+    let hitUrl;
+    globalThis.fetch = async (url) => {
+      hitUrl = url;
+      return { ok: true, json: async () => ({ data: [{ embedding: [0.1, 0.2] }] }) };
+    };
+    const client = createAIClient({
+      url: "http://localhost:11434/v1",
+      model: "test",
+    });
+    await client.embed("hello");
+    expect(hitUrl).toBe("http://localhost:11434/v1/embeddings");
+  });
+  it("uses embeddingPath override (e.g. Pentatonic AI Gateway)", async () => {
+    let hitUrl;
+    globalThis.fetch = async (url) => {
+      hitUrl = url;
+      return { ok: true, json: async () => ({ data: [{ embedding: [0.1] }] }) };
+    };
+    const client = createAIClient({
+      url: "https://lambda-gateway.pentatonic.com/v1",
+      model: "NV-Embed-v2",
+      embeddingPath: "embed",
+    });
+    await client.embed("hello");
+    expect(hitUrl).toBe("https://lambda-gateway.pentatonic.com/v1/embed");
+  });
+  it("normalises leading slashes and trailing base-url slashes", async () => {
+    let hitUrl;
+    globalThis.fetch = async (url) => {
+      hitUrl = url;
+      return { ok: true, json: async () => ({ data: [{ embedding: [0.1] }] }) };
+    };
+    const client = createAIClient({
+      url: "https://gateway.test/v1/",
+      model: "m",
+      embeddingPath: "/embed",
+    });
+    await client.embed("hi");
+    expect(hitUrl).toBe("https://gateway.test/v1/embed");
+  });
+  it("chatPath override applies to chat() too", async () => {
+    let hitUrl;
+    globalThis.fetch = async (url) => {
+      hitUrl = url;
+      return { ok: true, json: async () => ({ choices: [{ message: { content: "hi" } }] }) };
+    };
+    const client = createAIClient({
+      url: "https://gateway.test/v1",
+      model: "m",
+      chatPath: "chat",
+    });
+    await client.chat([{ role: "user", content: "q" }]);
+    expect(hitUrl).toBe("https://gateway.test/v1/chat");
+  });
+  it("chat defaults to /chat/completions", async () => {
+    let hitUrl;
+    globalThis.fetch = async (url) => {
+      hitUrl = url;
+      return { ok: true, json: async () => ({ choices: [{ message: { content: "hi" } }] }) };
+    };
+    const client = createAIClient({
+      url: "http://localhost:11434/v1",
+      model: "m",
+    });
+    await client.chat([{ role: "user", content: "q" }]);
+    expect(hitUrl).toBe("http://localhost:11434/v1/chat/completions");
+  });
 });
 // --- Search options contract ---
@@ -231,4 +309,45 @@ describe("ingest options contract", () => {
     expect(result).toHaveProperty("content");
     expect(result).toHaveProperty("layerId");
   });
+  it("hands the distill background promise to opts.waitUntil when provided", async () => {
+    const mockDb = async (sql) => {
+      if (sql.includes("SELECT id FROM memory_layers")) {
+        return { rows: [{ id: "layer-1" }] };
+      }
+      return { rows: [] };
+    };
+    const mockAi = { embed: async () => null };
+    const mockLlm = { chat: async () => "[]" };
+    const registered = [];
+    await ingest(mockDb, mockAi, mockLlm, "test content", {
+      clientId: "test-client",
+      waitUntil: (p) => registered.push(p),
+    });
+    expect(registered.length).toBe(1);
+    expect(typeof registered[0].then).toBe("function");
+    await registered[0]; // should resolve cleanly
+  });
+  it("does not call waitUntil when distill is skipped", async () => {
+    const mockDb = async (sql) => {
+      if (sql.includes("SELECT id FROM memory_layers")) {
+        return { rows: [{ id: "layer-1" }] };
+      }
+      return { rows: [] };
+    };
+    const mockAi = { embed: async () => null };
+    const mockLlm = { chat: async () => "[]" };
+    const registered = [];
+    await ingest(mockDb, mockAi, mockLlm, "test content", {
+      clientId: "test-client",
+      distill: false,
+      waitUntil: (p) => registered.push(p),
+    });
+    expect(registered.length).toBe(0);
+  });
 });

package/packages/memory/src/ai.js CHANGED Viewed

@@ -8,10 +8,16 @@
 /**
  * Create an AI client from config.
  *
+ * Defaults to OpenAI-standard paths (`/embeddings`, `/chat/completions`).
+ * Override with `embeddingPath` / `chatPath` for gateways that use
+ * different routes — e.g. Pentatonic AI Gateway exposes `/embed`.
+ *
  * @param {object} config
  * @param {string} config.url - Base URL (e.g. "http://ollama:11434/v1")
  * @param {string} config.model - Model name
  * @param {string} [config.apiKey] - Optional API key
+ * @param {string} [config.embeddingPath="embeddings"] - Path appended to url
+ * @param {string} [config.chatPath="chat/completions"] - Path appended to url
  * @param {number} [config.dimensions] - Expected embedding dimensions
  * @returns {object} Client with embed() and chat() methods
  */
@@ -22,6 +28,23 @@ export function createAIClient(config) {
     headers["X-API-Key"] = config.apiKey;
   }
+  // Strip leading slashes so callers can use "embed" or "/embed"
+  // interchangeably. Base url may or may not have a trailing slash.
+  // Plain loops (not regex) to avoid polynomial-regex scanner flags.
+  const stripLeading = (s) => {
+    let i = 0;
+    while (i < s.length && s[i] === "/") i++;
+    return i === 0 ? s : s.slice(i);
+  };
+  const stripTrailing = (s) => {
+    let i = s.length;
+    while (i > 0 && s[i - 1] === "/") i--;
+    return i === s.length ? s : s.slice(0, i);
+  };
+  const embeddingPath = stripLeading(config.embeddingPath || "embeddings");
+  const chatPath = stripLeading(config.chatPath || "chat/completions");
+  const baseUrl = stripTrailing(config.url);
   return {
     /**
      * Generate an embedding vector for text.
@@ -32,7 +55,7 @@ export function createAIClient(config) {
      */
     async embed(text, inputType = "passage") {
       try {
-        const res = await fetch(`${config.url}/embeddings`, {
+        const res = await fetch(`${baseUrl}/${embeddingPath}`, {
           method: "POST",
           headers,
           body: JSON.stringify({
@@ -70,7 +93,7 @@ export function createAIClient(config) {
      */
     async chat(messages, opts = {}) {
       try {
-        const res = await fetch(`${config.url}/chat/completions`, {
+        const res = await fetch(`${baseUrl}/${chatPath}`, {
           method: "POST",
           headers,
           body: JSON.stringify({

package/packages/memory/src/ingest.js CHANGED Viewed

@@ -17,6 +17,10 @@ import { distill } from "./distill.js";
  * @param {string} [opts.layerType="episodic"] - Target layer
  * @param {object} [opts.metadata] - Additional metadata
  * @param {Function} [opts.logger] - Optional logger
+ * @param {Function} [opts.waitUntil] - Platform hook to register background
+ *   tasks (e.g. Cloudflare Worker ctx.waitUntil). If provided, the distill
+ *   background task is handed to it so the host keeps it alive past return.
+ *   Without it, distill is fire-and-forget (fine for Node/browser).
  * @returns {Promise<{id: string, content: string, layerId: string}>}
  */
 export async function ingest(db, ai, llm, content, opts = {}) {
@@ -86,9 +90,11 @@ export async function ingest(db, ai, llm, content, opts = {}) {
   // Distill atomic facts in the background — only for raw ingestions
   // (skip if this call is already storing a distilled atom or user opted out).
   if (opts.distill !== false && !opts.sourceId) {
-    distill(db, ai, llm, memoryId, content, { ...opts, logger: log }).catch(
-      (err) => log(`distill failed for ${memoryId}: ${err.message}`)
-    );
+    const distillPromise = distill(db, ai, llm, memoryId, content, {
+      ...opts,
+      logger: log,
+    }).catch((err) => log(`distill failed for ${memoryId}: ${err.message}`));
+    if (typeof opts.waitUntil === "function") opts.waitUntil(distillPromise);
   }
   return { id: memoryId, content, layerId };

package/packages/memory/src/server.js CHANGED Viewed

@@ -13,6 +13,9 @@
  *   LLM_URL          — OpenAI-compatible chat endpoint (required)
  *   LLM_MODEL        — Chat model name for HyDE (required)
  *   API_KEY          — API key for embedding/LLM endpoints (optional)
+ *   EMBEDDING_PATH   — Path appended to EMBEDDING_URL (default: "embeddings").
+ *                      Set to "embed" for the Pentatonic AI Gateway.
+ *   CHAT_PATH        — Path appended to LLM_URL (default: "chat/completions")
  *   CLIENT_ID        — Client ID for memory scoping (default: "default")
  *   PORT             — HTTP port for SSE transport (default: 3333)
  */
@@ -46,11 +49,13 @@ function createMemory() {
       url: process.env.EMBEDDING_URL,
       model: process.env.EMBEDDING_MODEL,
       apiKey: process.env.API_KEY,
+      embeddingPath: process.env.EMBEDDING_PATH,
     },
     llm: {
       url: process.env.LLM_URL,
       model: process.env.LLM_MODEL,
       apiKey: process.env.API_KEY,
+      chatPath: process.env.CHAT_PATH,
     },
     logger: (msg) => process.stderr.write(`[memory] ${msg}\n`),
   });
@@ -342,7 +347,7 @@ async function main() {
         const health = {
           status: "ok",
           client: CLIENT_ID,
-          version: "0.5.0",
+          version: "0.5.2",
           search: "text",
           db: false,
           ollama: false,