npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.5.5 → 0.5.6 - Mend

@pentatonic-ai/ai-agent-sdk 0.5.5 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/packages/memory/src/__tests__/api-contract.test.js +56 -0
package/packages/memory/src/ai.js +52 -25
package/packages/memory/src/distill.js +29 -4
package/packages/memory/src/server.js +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.5.5",
+  "version": "0.5.6",
   "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",

package/packages/memory/src/__tests__/api-contract.test.js CHANGED Viewed

@@ -263,6 +263,62 @@ describe("createAIClient", () => {
     await client.chat([{ role: "user", content: "q" }]);
     expect(hitUrl).toBe("http://localhost:11434/v1/chat/completions");
   });
+  it("embedBatch sends all inputs in one HTTP call", async () => {
+    let callCount = 0;
+    let lastBody;
+    globalThis.fetch = async (_url, opts) => {
+      callCount++;
+      lastBody = JSON.parse(opts.body);
+      return {
+        ok: true,
+        json: async () => ({
+          data: lastBody.input.map((_, i) => ({
+            embedding: [0.1, 0.2, 0.3],
+            index: i,
+          })),
+        }),
+      };
+    };
+    const client = createAIClient({
+      url: "http://localhost:11434/v1",
+      model: "m",
+    });
+    const out = await client.embedBatch(["a", "b", "c"], "passage");
+    expect(callCount).toBe(1);
+    expect(lastBody.input).toEqual(["a", "b", "c"]);
+    expect(out.length).toBe(3);
+    expect(out.every((r) => r.embedding.length === 3)).toBe(true);
+  });
+  it("embedBatch returns nulls on non-2xx without throwing", async () => {
+    globalThis.fetch = async () => ({ ok: false, json: async () => ({}) });
+    const client = createAIClient({
+      url: "http://localhost:11434/v1",
+      model: "m",
+    });
+    const out = await client.embedBatch(["a", "b"]);
+    expect(out).toEqual([null, null]);
+  });
+  it("embedBatch parses Ollama/Pentatonic-style {embeddings: [[...]]} response", async () => {
+    globalThis.fetch = async () => ({
+      ok: true,
+      json: async () => ({
+        embeddings: [
+          [0.1, 0.2],
+          [0.3, 0.4],
+        ],
+      }),
+    });
+    const client = createAIClient({
+      url: "http://localhost:11434/v1",
+      model: "m",
+    });
+    const out = await client.embedBatch(["x", "y"]);
+    expect(out[0].embedding).toEqual([0.1, 0.2]);
+    expect(out[1].embedding).toEqual([0.3, 0.4]);
+  });
 });
 // --- Search options contract ---

package/packages/memory/src/ai.js CHANGED Viewed

@@ -45,6 +45,40 @@ export function createAIClient(config) {
   const chatPath = stripLeading(config.chatPath || "chat/completions");
   const baseUrl = stripTrailing(config.url);
+  /**
+   * Send an embedding request with N inputs. Shared by embed() and
+   * embedBatch(). Returns an array of { embedding, dimensions, model } or
+   * nulls (one per input, preserving order).
+   */
+  async function rawEmbed(texts, inputType) {
+    if (!texts.length) return [];
+    try {
+      const res = await fetch(`${baseUrl}/${embeddingPath}`, {
+        method: "POST",
+        headers,
+        body: JSON.stringify({
+          input: texts.map((t) => (t ?? "").substring(0, 8192)),
+          model: config.model,
+          input_type: inputType,
+        }),
+        signal: AbortSignal.timeout(30000),
+      });
+      if (!res.ok) return texts.map(() => null);
+      const data = await res.json();
+      // OpenAI-compat: data.data = [{embedding, index}, ...]
+      // Pentatonic gateway / Ollama: data.embeddings = [[...], [...], ...]
+      const vectors =
+        data.data?.map((d) => d.embedding) || data.embeddings || [];
+      return texts.map((_, i) => {
+        const embedding = vectors[i];
+        if (!embedding) return null;
+        return { embedding, dimensions: embedding.length, model: config.model };
+      });
+    } catch {
+      return texts.map(() => null);
+    }
+  }
   return {
     /**
      * Generate an embedding vector for text.
@@ -54,32 +88,25 @@ export function createAIClient(config) {
      * @returns {Promise<{embedding: number[], dimensions: number, model: string} | null>}
      */
     async embed(text, inputType = "passage") {
-      try {
-        const res = await fetch(`${baseUrl}/${embeddingPath}`, {
-          method: "POST",
-          headers,
-          body: JSON.stringify({
-            input: [text.substring(0, 8192)],
-            model: config.model,
-            input_type: inputType,
-          }),
-          signal: AbortSignal.timeout(30000),
-        });
-        if (!res.ok) return null;
-        const data = await res.json();
-        const embedding = data.data?.[0]?.embedding || data.embeddings?.[0];
-        if (!embedding) return null;
+      const results = await rawEmbed([text], inputType);
+      return results[0];
+    },
-        return {
-          embedding,
-          dimensions: embedding.length,
-          model: config.model,
-        };
-      } catch {
-        return null;
-      }
+    /**
+     * Generate embeddings for N texts in a single HTTP round-trip. Returns
+     * an array the same length as the input; each entry is either the
+     * embedding object or null on failure.
+     *
+     * Batching matters under load — one call instead of N cuts GPU overhead
+     * and downstream queueing. Used by distill() to embed all atoms from a
+     * raw memory in one shot rather than N serial calls.
+     *
+     * @param {string[]} texts
+     * @param {string} [inputType="passage"]
+     * @returns {Promise<Array<{embedding: number[], dimensions: number, model: string} | null>>}
+     */
+    async embedBatch(texts, inputType = "passage") {
+      return rawEmbed(texts, inputType);
     },
     /**

package/packages/memory/src/distill.js CHANGED Viewed

@@ -104,8 +104,27 @@ export async function distill(db, ai, llm, sourceId, content, opts = {}) {
   }
   const layerId = layerResult.rows[0].id;
+  // Batch-embed all atoms in one HTTP call. Under load this is a big
+  // win over N serial embed calls — one GPU forward pass instead of N,
+  // less downstream queueing.
+  let embeddings;
+  if (ai.embedBatch) {
+    try {
+      embeddings = await ai.embedBatch(facts, "passage");
+    } catch (err) {
+      log(`distill: batch embed failed: ${err.message}`);
+      embeddings = facts.map(() => null);
+    }
+  } else {
+    // Older AI clients without embedBatch — fall through to per-atom embed
+    // inside the loop below. Kept for backwards compat with any custom
+    // client passed into createMemorySystem.
+    embeddings = null;
+  }
   const stored = [];
-  for (const fact of facts) {
+  for (let i = 0; i < facts.length; i++) {
+    const fact = facts[i];
     try {
       const atomId = `mem_${crypto.randomUUID()}`;
@@ -124,9 +143,13 @@ export async function distill(db, ai, llm, sourceId, content, opts = {}) {
         ]
       );
-      // Embed the atom (non-fatal)
+      // Attach embedding — from the batch when available, else fall back
+      // to a per-atom call.
       try {
-        const embResult = await ai.embed(fact, "passage");
+        let embResult = embeddings ? embeddings[i] : null;
+        if (!embResult && !embeddings) {
+          embResult = await ai.embed(fact, "passage");
+        }
         if (embResult?.embedding) {
           await db(
             `UPDATE memory_nodes SET embedding = $1, updated_at = NOW() WHERE id = $2`,
@@ -137,7 +160,9 @@ export async function distill(db, ai, llm, sourceId, content, opts = {}) {
         log(`distill: embedding failed for ${atomId}: ${err.message}`);
       }
-      // HyDE (2 queries for atoms — they're already focused)
+      // HyDE (2 queries for atoms — they're already focused).
+      // Still per-atom — chat completions don't share a batch surface
+      // across providers the way embeddings do.
       try {
         const queries = await generateHypotheticalQueries(llm, fact);
         const trimmed = queries.slice(0, 2);

package/packages/memory/src/server.js CHANGED Viewed

@@ -347,7 +347,7 @@ async function main() {
         const health = {
           status: "ok",
           client: CLIENT_ID,
-          version: "0.5.5",
+          version: "0.5.6",
           search: "text",
           db: false,
           ollama: false,