npm - @pentatonic-ai/ai-agent-sdk - Versions diffs - 0.5.7 → 0.5.8 - Mend

@pentatonic-ai/ai-agent-sdk 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs +244 -8
package/dist/index.js +244 -8
package/package.json +2 -2
package/packages/memory/src/hosted.js +7 -0
package/packages/memory/src/inject.js +83 -0
package/src/client.js +20 -2
package/src/wrapper.js +129 -6

package/dist/index.cjs CHANGED Viewed

@@ -417,7 +417,156 @@ var Session = class {
   }
 };
+// packages/memory/src/inject.js
+var MAX_CHARS_PER_MEMORY = 1200;
+function injectMemories(body, memories, provider) {
+  if (!memories || memories.length === 0)
+    return body;
+  const preamble = formatPreamble(memories);
+  if (provider === "anthropic") {
+    return injectAnthropic(body, preamble);
+  }
+  return injectOpenAI(body, preamble);
+}
+function formatPreamble(memories) {
+  const lines = ["<tes:context>"];
+  memories.forEach((m, i) => {
+    const sim = typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
+    const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
+    lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
+  });
+  lines.push("</tes:context>");
+  return lines.join("\n");
+}
+function injectAnthropic(body, preamble) {
+  const next = { ...body };
+  if (typeof body.system === "string") {
+    next.system = `${preamble}
+${body.system}`;
+  } else if (Array.isArray(body.system)) {
+    next.system = [{ type: "text", text: preamble }, ...body.system];
+  } else {
+    next.system = preamble;
+  }
+  return next;
+}
+function injectOpenAI(body, preamble) {
+  const messages = Array.isArray(body.messages) ? [...body.messages] : [];
+  if (messages.length > 0 && messages[0].role === "system") {
+    const existing = messages[0];
+    const existingContent = typeof existing.content === "string" ? existing.content : JSON.stringify(existing.content);
+    messages[0] = {
+      ...existing,
+      content: `${preamble}
+${existingContent}`
+    };
+  } else {
+    messages.unshift({ role: "system", content: preamble });
+  }
+  return { ...body, messages };
+}
+// packages/memory/src/hosted.js
+var SEMANTIC_SEARCH_QUERY = `
+  query SemanticSearchMemories($clientId: String!, $query: String!, $limit: Int, $minScore: Float) {
+    semanticSearchMemories(clientId: $clientId, query: $query, limit: $limit, minScore: $minScore) {
+      id
+      content
+      similarity
+    }
+  }
+`;
+var DEFAULT_SEARCH_TIMEOUT_MS = 5e3;
+var DEFAULT_SEARCH_LIMIT = 6;
+var DEFAULT_SEARCH_MIN_SCORE = 0.55;
+function normalizeConfig(config) {
+  if (!config)
+    throw new Error("hosted: config is required");
+  const endpoint = config.endpoint || config.tes_endpoint;
+  const clientId = config.clientId || config.tes_client_id;
+  const apiKey = config.apiKey || config.tes_api_key;
+  if (!endpoint || !clientId || !apiKey) {
+    throw new Error(
+      "hosted: config requires { endpoint, clientId, apiKey } (or legacy tes_* equivalents)"
+    );
+  }
+  return { endpoint, clientId, apiKey };
+}
+function buildHostedHeaders(config) {
+  const { clientId, apiKey } = normalizeConfig(config);
+  const headers = {
+    "Content-Type": "application/json",
+    "x-client-id": clientId
+  };
+  if (apiKey.startsWith("tes_")) {
+    headers["Authorization"] = `Bearer ${apiKey}`;
+  } else {
+    headers["x-service-key"] = apiKey;
+  }
+  return headers;
+}
+async function hostedSearch(config, query, opts = {}) {
+  if (!query)
+    return { memories: [], skipped: "no_query" };
+  let cfg;
+  try {
+    cfg = normalizeConfig(config);
+  } catch (err) {
+    return { memories: [], skipped: `config_error:${err.message}` };
+  }
+  const limit = opts.limit ?? DEFAULT_SEARCH_LIMIT;
+  const minScore = opts.minScore ?? DEFAULT_SEARCH_MIN_SCORE;
+  const timeoutMs = opts.timeoutMs ?? DEFAULT_SEARCH_TIMEOUT_MS;
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeoutMs);
+  let response;
+  try {
+    response = await fetch(`${cfg.endpoint}/api/graphql`, {
+      method: "POST",
+      headers: buildHostedHeaders(cfg),
+      body: JSON.stringify({
+        query: SEMANTIC_SEARCH_QUERY,
+        variables: { clientId: cfg.clientId, query, limit, minScore }
+      }),
+      signal: controller.signal
+    });
+  } catch (err) {
+    clearTimeout(timer);
+    return {
+      memories: [],
+      skipped: err.name === "AbortError" ? "tes_timeout" : "tes_unreachable"
+    };
+  }
+  clearTimeout(timer);
+  if (!response.ok) {
+    return { memories: [], skipped: `tes_http_${response.status}` };
+  }
+  let payload;
+  try {
+    payload = await response.json();
+  } catch {
+    return { memories: [], skipped: "tes_invalid_json" };
+  }
+  if (payload.errors?.length) {
+    const reason = payload.errors[0].message || "tes_graphql_error";
+    return { memories: [], skipped: `tes_graphql:${shortenReason(reason)}` };
+  }
+  return { memories: payload.data?.semanticSearchMemories || [] };
+}
+function shortenReason(msg) {
+  if (typeof msg !== "string")
+    return "unknown";
+  return msg.toLowerCase().replace(/[^a-z0-9]+/g, "_").slice(0, 60);
+}
 // src/wrapper.js
+var MEMORY_DEFAULTS = {
+  limit: 6,
+  minScore: 0.55,
+  timeoutMs: 800
+};
 function detectClientType(client) {
   if (client?.chat?.completions?.create)
     return "openai";
@@ -427,6 +576,57 @@ function detectClientType(client) {
     return "workers-ai";
   return "unknown";
 }
+function extractLastUserMessage(params, provider) {
+  const msgs = Array.isArray(params?.messages) ? params.messages : null;
+  if (!msgs)
+    return null;
+  for (let i = msgs.length - 1; i >= 0; i--) {
+    if (msgs[i].role === "user") {
+      const c = msgs[i].content;
+      if (typeof c === "string")
+        return c;
+      if (Array.isArray(c)) {
+        return c.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
+      }
+    }
+  }
+  return null;
+}
+async function maybeInjectMemories(clientConfig, sessionOpts, params, provider) {
+  if (sessionOpts.memory === false) {
+    return { params, injected: 0, skipped: "memory_disabled" };
+  }
+  if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
+    return { params, injected: 0, skipped: "no_tes_config" };
+  }
+  const userMessage = extractLastUserMessage(params, provider);
+  if (!userMessage) {
+    return { params, injected: 0, skipped: "no_user_message" };
+  }
+  const opts = { ...MEMORY_DEFAULTS, ...sessionOpts.memoryOpts || {} };
+  const { memories, skipped } = await hostedSearch(
+    {
+      endpoint: clientConfig.endpoint,
+      clientId: clientConfig.clientId,
+      apiKey: clientConfig.apiKey
+    },
+    userMessage,
+    opts
+  );
+  if (!memories?.length) {
+    return { params, injected: 0, skipped: skipped || "no_memories" };
+  }
+  return {
+    params: injectMemories(params, memories, provider),
+    injected: memories.length,
+    skipped: null
+  };
+}
+function recordMemoryStats(sessionOpts, stats) {
+  if (sessionOpts._session) {
+    sessionOpts._session._lastMemoryStats = stats;
+  }
+}
 function wrapClient(clientConfig, client, sessionOpts = {}) {
   sessionOpts._resolvedSessionId = sessionOpts.sessionId || crypto.randomUUID();
   sessionOpts._session = new Session(clientConfig, {
@@ -478,7 +678,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
     get(target, prop) {
       if (prop === "create") {
         return async (params) => {
-          const result = await target.create(params);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "openai"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.create(memStats.params);
           const content = result.choices?.[0]?.message?.content;
           if (content) {
             result.choices[0].message.content = await rewriteUrls(
@@ -491,7 +698,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params.messages,
+            memStats.params.messages,
             result
           );
           return result;
@@ -537,7 +744,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
     get(target, prop) {
       if (prop === "create") {
         return async (params) => {
-          const result = await target.create(params);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "anthropic"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.create(memStats.params);
           if (Array.isArray(result.content)) {
             for (const block of result.content) {
               if (block.type === "text" && block.text) {
@@ -553,7 +767,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params.messages,
+            memStats.params.messages,
             result
           );
           return result;
@@ -579,7 +793,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
     get(target, prop) {
       if (prop === "run") {
         return async (model, params, ...rest) => {
-          const result = await target.run(model, params, ...rest);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "workers-ai"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.run(model, memStats.params, ...rest);
           if (result.response) {
             result.response = await rewriteUrls(
               result.response,
@@ -591,7 +812,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params?.messages,
+            memStats.params?.messages,
             result,
             model
           );
@@ -764,8 +985,23 @@ var TESClient = class {
   session(opts) {
     return new Session(this._config, opts);
   }
-  wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
+  wrap(client, {
+    sessionId,
+    userId,
+    metadata,
+    autoEmit = true,
+    waitUntil,
+    memory,
+    memoryOpts
+  } = {}) {
     const config = userId ? { ...this._config, userId } : this._config;
-    return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
+    return wrapClient(config, client, {
+      sessionId,
+      metadata,
+      autoEmit,
+      waitUntil,
+      memory,
+      memoryOpts
+    });
   }
 };

package/dist/index.js CHANGED Viewed

@@ -386,7 +386,156 @@ var Session = class {
   }
 };
+// packages/memory/src/inject.js
+var MAX_CHARS_PER_MEMORY = 1200;
+function injectMemories(body, memories, provider) {
+  if (!memories || memories.length === 0)
+    return body;
+  const preamble = formatPreamble(memories);
+  if (provider === "anthropic") {
+    return injectAnthropic(body, preamble);
+  }
+  return injectOpenAI(body, preamble);
+}
+function formatPreamble(memories) {
+  const lines = ["<tes:context>"];
+  memories.forEach((m, i) => {
+    const sim = typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
+    const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
+    lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
+  });
+  lines.push("</tes:context>");
+  return lines.join("\n");
+}
+function injectAnthropic(body, preamble) {
+  const next = { ...body };
+  if (typeof body.system === "string") {
+    next.system = `${preamble}
+${body.system}`;
+  } else if (Array.isArray(body.system)) {
+    next.system = [{ type: "text", text: preamble }, ...body.system];
+  } else {
+    next.system = preamble;
+  }
+  return next;
+}
+function injectOpenAI(body, preamble) {
+  const messages = Array.isArray(body.messages) ? [...body.messages] : [];
+  if (messages.length > 0 && messages[0].role === "system") {
+    const existing = messages[0];
+    const existingContent = typeof existing.content === "string" ? existing.content : JSON.stringify(existing.content);
+    messages[0] = {
+      ...existing,
+      content: `${preamble}
+${existingContent}`
+    };
+  } else {
+    messages.unshift({ role: "system", content: preamble });
+  }
+  return { ...body, messages };
+}
+// packages/memory/src/hosted.js
+var SEMANTIC_SEARCH_QUERY = `
+  query SemanticSearchMemories($clientId: String!, $query: String!, $limit: Int, $minScore: Float) {
+    semanticSearchMemories(clientId: $clientId, query: $query, limit: $limit, minScore: $minScore) {
+      id
+      content
+      similarity
+    }
+  }
+`;
+var DEFAULT_SEARCH_TIMEOUT_MS = 5e3;
+var DEFAULT_SEARCH_LIMIT = 6;
+var DEFAULT_SEARCH_MIN_SCORE = 0.55;
+function normalizeConfig(config) {
+  if (!config)
+    throw new Error("hosted: config is required");
+  const endpoint = config.endpoint || config.tes_endpoint;
+  const clientId = config.clientId || config.tes_client_id;
+  const apiKey = config.apiKey || config.tes_api_key;
+  if (!endpoint || !clientId || !apiKey) {
+    throw new Error(
+      "hosted: config requires { endpoint, clientId, apiKey } (or legacy tes_* equivalents)"
+    );
+  }
+  return { endpoint, clientId, apiKey };
+}
+function buildHostedHeaders(config) {
+  const { clientId, apiKey } = normalizeConfig(config);
+  const headers = {
+    "Content-Type": "application/json",
+    "x-client-id": clientId
+  };
+  if (apiKey.startsWith("tes_")) {
+    headers["Authorization"] = `Bearer ${apiKey}`;
+  } else {
+    headers["x-service-key"] = apiKey;
+  }
+  return headers;
+}
+async function hostedSearch(config, query, opts = {}) {
+  if (!query)
+    return { memories: [], skipped: "no_query" };
+  let cfg;
+  try {
+    cfg = normalizeConfig(config);
+  } catch (err) {
+    return { memories: [], skipped: `config_error:${err.message}` };
+  }
+  const limit = opts.limit ?? DEFAULT_SEARCH_LIMIT;
+  const minScore = opts.minScore ?? DEFAULT_SEARCH_MIN_SCORE;
+  const timeoutMs = opts.timeoutMs ?? DEFAULT_SEARCH_TIMEOUT_MS;
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeoutMs);
+  let response;
+  try {
+    response = await fetch(`${cfg.endpoint}/api/graphql`, {
+      method: "POST",
+      headers: buildHostedHeaders(cfg),
+      body: JSON.stringify({
+        query: SEMANTIC_SEARCH_QUERY,
+        variables: { clientId: cfg.clientId, query, limit, minScore }
+      }),
+      signal: controller.signal
+    });
+  } catch (err) {
+    clearTimeout(timer);
+    return {
+      memories: [],
+      skipped: err.name === "AbortError" ? "tes_timeout" : "tes_unreachable"
+    };
+  }
+  clearTimeout(timer);
+  if (!response.ok) {
+    return { memories: [], skipped: `tes_http_${response.status}` };
+  }
+  let payload;
+  try {
+    payload = await response.json();
+  } catch {
+    return { memories: [], skipped: "tes_invalid_json" };
+  }
+  if (payload.errors?.length) {
+    const reason = payload.errors[0].message || "tes_graphql_error";
+    return { memories: [], skipped: `tes_graphql:${shortenReason(reason)}` };
+  }
+  return { memories: payload.data?.semanticSearchMemories || [] };
+}
+function shortenReason(msg) {
+  if (typeof msg !== "string")
+    return "unknown";
+  return msg.toLowerCase().replace(/[^a-z0-9]+/g, "_").slice(0, 60);
+}
 // src/wrapper.js
+var MEMORY_DEFAULTS = {
+  limit: 6,
+  minScore: 0.55,
+  timeoutMs: 800
+};
 function detectClientType(client) {
   if (client?.chat?.completions?.create)
     return "openai";
@@ -396,6 +545,57 @@ function detectClientType(client) {
     return "workers-ai";
   return "unknown";
 }
+function extractLastUserMessage(params, provider) {
+  const msgs = Array.isArray(params?.messages) ? params.messages : null;
+  if (!msgs)
+    return null;
+  for (let i = msgs.length - 1; i >= 0; i--) {
+    if (msgs[i].role === "user") {
+      const c = msgs[i].content;
+      if (typeof c === "string")
+        return c;
+      if (Array.isArray(c)) {
+        return c.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
+      }
+    }
+  }
+  return null;
+}
+async function maybeInjectMemories(clientConfig, sessionOpts, params, provider) {
+  if (sessionOpts.memory === false) {
+    return { params, injected: 0, skipped: "memory_disabled" };
+  }
+  if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
+    return { params, injected: 0, skipped: "no_tes_config" };
+  }
+  const userMessage = extractLastUserMessage(params, provider);
+  if (!userMessage) {
+    return { params, injected: 0, skipped: "no_user_message" };
+  }
+  const opts = { ...MEMORY_DEFAULTS, ...sessionOpts.memoryOpts || {} };
+  const { memories, skipped } = await hostedSearch(
+    {
+      endpoint: clientConfig.endpoint,
+      clientId: clientConfig.clientId,
+      apiKey: clientConfig.apiKey
+    },
+    userMessage,
+    opts
+  );
+  if (!memories?.length) {
+    return { params, injected: 0, skipped: skipped || "no_memories" };
+  }
+  return {
+    params: injectMemories(params, memories, provider),
+    injected: memories.length,
+    skipped: null
+  };
+}
+function recordMemoryStats(sessionOpts, stats) {
+  if (sessionOpts._session) {
+    sessionOpts._session._lastMemoryStats = stats;
+  }
+}
 function wrapClient(clientConfig, client, sessionOpts = {}) {
   sessionOpts._resolvedSessionId = sessionOpts.sessionId || crypto.randomUUID();
   sessionOpts._session = new Session(clientConfig, {
@@ -447,7 +647,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
     get(target, prop) {
       if (prop === "create") {
         return async (params) => {
-          const result = await target.create(params);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "openai"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.create(memStats.params);
           const content = result.choices?.[0]?.message?.content;
           if (content) {
             result.choices[0].message.content = await rewriteUrls(
@@ -460,7 +667,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params.messages,
+            memStats.params.messages,
             result
           );
           return result;
@@ -506,7 +713,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
     get(target, prop) {
       if (prop === "create") {
         return async (params) => {
-          const result = await target.create(params);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "anthropic"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.create(memStats.params);
           if (Array.isArray(result.content)) {
             for (const block of result.content) {
               if (block.type === "text" && block.text) {
@@ -522,7 +736,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params.messages,
+            memStats.params.messages,
             result
           );
           return result;
@@ -548,7 +762,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
     get(target, prop) {
       if (prop === "run") {
         return async (model, params, ...rest) => {
-          const result = await target.run(model, params, ...rest);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "workers-ai"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.run(model, memStats.params, ...rest);
           if (result.response) {
             result.response = await rewriteUrls(
               result.response,
@@ -560,7 +781,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params?.messages,
+            memStats.params?.messages,
             result,
             model
           );
@@ -733,9 +954,24 @@ var TESClient = class {
   session(opts) {
     return new Session(this._config, opts);
   }
-  wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
+  wrap(client, {
+    sessionId,
+    userId,
+    metadata,
+    autoEmit = true,
+    waitUntil,
+    memory,
+    memoryOpts
+  } = {}) {
     const config = userId ? { ...this._config, userId } : this._config;
-    return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
+    return wrapClient(config, client, {
+      sessionId,
+      metadata,
+      autoEmit,
+      waitUntil,
+      memory,
+      memoryOpts
+    });
   }
 };
 export {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@pentatonic-ai/ai-agent-sdk",
-  "version": "0.5.7",
-  "description": "TES SDK \u2014 LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
+  "version": "0.5.8",
+  "description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
   "type": "module",
   "main": "./dist/index.cjs",
   "module": "./dist/index.js",

package/packages/memory/src/hosted.js CHANGED Viewed

@@ -370,3 +370,10 @@ function shortenReason(msg) {
     .replace(/[^a-z0-9]+/g, "_")
     .slice(0, 60);
 }
+// Re-export the system-message injector so callers that import the
+// hosted module get the full memory-augmentation surface in one place.
+// Keeping the implementation in `./inject.js` lets non-hosted consumers
+// (e.g. a future "augment a request body" helper that doesn't talk to
+// TES) reuse it without pulling in the GraphQL surface.
+export { injectMemories } from "./inject.js";

package/packages/memory/src/inject.js ADDED Viewed

@@ -0,0 +1,83 @@
+/**
+ * Memory injection — formats retrieved memories as a system-message preamble
+ * and merges them into the upstream request body.
+ *
+ * Why a preamble (not a separate user-turn or tool-result):
+ *   - Customer's existing system prompt is preserved verbatim, just appended.
+ *   - Anthropic and OpenAI both treat system content as cache-friendly.
+ *   - No conversation-history mutation — replays remain reproducible.
+ *
+ * Format:
+ *   <tes:context>
+ *     [1] (similarity 0.82) memory text...
+ *     [2] (similarity 0.71) memory text...
+ *   </tes:context>
+ *
+ * The XML-ish wrapper makes it trivial for the model to ignore on demand
+ * and trivial for an evaluator to strip when measuring quality deltas.
+ */
+const MAX_CHARS_PER_MEMORY = 1200;
+/**
+ * @param {object} body                 — upstream request body, mutated copy returned
+ * @param {Array<{id, content, similarity}>} memories
+ * @param {"anthropic"|"openai"} provider
+ * @returns {object} new body
+ */
+export function injectMemories(body, memories, provider) {
+  if (!memories || memories.length === 0) return body;
+  const preamble = formatPreamble(memories);
+  if (provider === "anthropic") {
+    return injectAnthropic(body, preamble);
+  }
+  return injectOpenAI(body, preamble);
+}
+function formatPreamble(memories) {
+  const lines = ["<tes:context>"];
+  memories.forEach((m, i) => {
+    const sim =
+      typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
+    const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
+    lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
+  });
+  lines.push("</tes:context>");
+  return lines.join("\n");
+}
+function injectAnthropic(body, preamble) {
+  // Anthropic accepts `system` as either a string OR an array of content
+  // blocks. Preserve whichever shape the customer sent.
+  const next = { ...body };
+  if (typeof body.system === "string") {
+    next.system = `${preamble}\n\n${body.system}`;
+  } else if (Array.isArray(body.system)) {
+    next.system = [{ type: "text", text: preamble }, ...body.system];
+  } else {
+    next.system = preamble;
+  }
+  return next;
+}
+function injectOpenAI(body, preamble) {
+  // OpenAI carries the system prompt as the first message with role:'system'.
+  // If one exists we prepend; otherwise we insert a fresh one at index 0.
+  const messages = Array.isArray(body.messages) ? [...body.messages] : [];
+  if (messages.length > 0 && messages[0].role === "system") {
+    const existing = messages[0];
+    const existingContent =
+      typeof existing.content === "string"
+        ? existing.content
+        : JSON.stringify(existing.content);
+    messages[0] = {
+      ...existing,
+      content: `${preamble}\n\n${existingContent}`,
+    };
+  } else {
+    messages.unshift({ role: "system", content: preamble });
+  }
+  return { ...body, messages };
+}

package/src/client.js CHANGED Viewed

@@ -56,8 +56,26 @@ export class TESClient {
     return new Session(this._config, opts);
   }
-  wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
+  wrap(
+    client,
+    {
+      sessionId,
+      userId,
+      metadata,
+      autoEmit = true,
+      waitUntil,
+      memory,
+      memoryOpts,
+    } = {}
+  ) {
     const config = userId ? { ...this._config, userId } : this._config;
-    return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
+    return wrapClient(config, client, {
+      sessionId,
+      metadata,
+      autoEmit,
+      waitUntil,
+      memory,
+      memoryOpts,
+    });
   }
 }

package/src/wrapper.js CHANGED Viewed

@@ -1,6 +1,18 @@
 import { Session } from "./session.js";
 import { normalizeResponse } from "./normalizer.js";
 import { rewriteUrls } from "./tracking.js";
+import {
+  hostedSearch,
+  injectMemories,
+} from "../packages/memory/src/hosted.js";
+// Default memory-injection knobs. Match the proxy's defaults so SDK and
+// proxy customers see identical retrieval behaviour.
+const MEMORY_DEFAULTS = {
+  limit: 6,
+  minScore: 0.55,
+  timeoutMs: 800,
+};
 /**
  * Detect the client type by duck-typing its shape.
@@ -12,6 +24,96 @@ function detectClientType(client) {
   return "unknown";
 }
+/**
+ * Pull the last user message from a request body. Anthropic + OpenAI both
+ * carry messages on `params.messages`; Workers AI may also use
+ * `params.prompt` or `params.input_text`. Returns null when nothing usable
+ * is present (e.g. embedding call, empty prompt) so memory retrieval is
+ * skipped cleanly.
+ */
+function extractLastUserMessage(params, provider) {
+  // Only messages-shaped requests are eligible for system-prompt injection.
+  // Workers AI prompt-style calls (`{ prompt: "..." }`) are passed through
+  // unchanged — there's no clean place to insert memory context without
+  // changing the request shape, and we never want to surprise the caller
+  // by mutating their prompt string.
+  void provider;
+  const msgs = Array.isArray(params?.messages) ? params.messages : null;
+  if (!msgs) return null;
+  for (let i = msgs.length - 1; i >= 0; i--) {
+    if (msgs[i].role === "user") {
+      const c = msgs[i].content;
+      if (typeof c === "string") return c;
+      if (Array.isArray(c)) {
+        return c
+          .filter((p) => p.type === "text" && typeof p.text === "string")
+          .map((p) => p.text)
+          .join("\n");
+      }
+    }
+  }
+  return null;
+}
+/**
+ * Inject memories from TES into request params before the LLM call.
+ *
+ * Default-on. Disable per-wrapClient via `sessionOpts.memory: false` or
+ * per-call via `sessionOpts.memoryOpts.disable: true`. Knobs come from
+ * `sessionOpts.memoryOpts` (`limit`, `minScore`, `timeoutMs`).
+ *
+ * Failure modes (TES timeout, module disabled, network error) are
+ * non-fatal — the call proceeds with the customer's original params and
+ * the skip reason is recorded on the session under `_lastMemoryStats`
+ * for observability.
+ */
+async function maybeInjectMemories(
+  clientConfig,
+  sessionOpts,
+  params,
+  provider
+) {
+  if (sessionOpts.memory === false) {
+    return { params, injected: 0, skipped: "memory_disabled" };
+  }
+  if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
+    return { params, injected: 0, skipped: "no_tes_config" };
+  }
+  const userMessage = extractLastUserMessage(params, provider);
+  if (!userMessage) {
+    return { params, injected: 0, skipped: "no_user_message" };
+  }
+  const opts = { ...MEMORY_DEFAULTS, ...(sessionOpts.memoryOpts || {}) };
+  const { memories, skipped } = await hostedSearch(
+    {
+      endpoint: clientConfig.endpoint,
+      clientId: clientConfig.clientId,
+      apiKey: clientConfig.apiKey,
+    },
+    userMessage,
+    opts
+  );
+  if (!memories?.length) {
+    return { params, injected: 0, skipped: skipped || "no_memories" };
+  }
+  return {
+    params: injectMemories(params, memories, provider),
+    injected: memories.length,
+    skipped: null,
+  };
+}
+function recordMemoryStats(sessionOpts, stats) {
+  if (sessionOpts._session) {
+    sessionOpts._session._lastMemoryStats = stats;
+  }
+}
 /**
  * Wrap any supported LLM client with automatic usage tracking.
  * Auto-detects OpenAI, Anthropic, and Workers AI clients.
@@ -77,7 +179,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
     get(target, prop) {
       if (prop === "create") {
         return async (params) => {
-          const result = await target.create(params);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "openai"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.create(memStats.params);
           const content = result.choices?.[0]?.message?.content;
           if (content) {
             result.choices[0].message.content = await rewriteUrls(
@@ -90,7 +199,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params.messages,
+            memStats.params.messages,
             result
           );
           return result;
@@ -140,7 +249,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
     get(target, prop) {
       if (prop === "create") {
         return async (params) => {
-          const result = await target.create(params);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "anthropic"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.create(memStats.params);
           if (Array.isArray(result.content)) {
             for (const block of result.content) {
               if (block.type === "text" && block.text) {
@@ -156,7 +272,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params.messages,
+            memStats.params.messages,
             result
           );
           return result;
@@ -187,7 +303,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
     get(target, prop) {
       if (prop === "run") {
         return async (model, params, ...rest) => {
-          const result = await target.run(model, params, ...rest);
+          const memStats = await maybeInjectMemories(
+            clientConfig,
+            sessionOpts,
+            params,
+            "workers-ai"
+          );
+          recordMemoryStats(sessionOpts, memStats);
+          const result = await target.run(model, memStats.params, ...rest);
           if (result.response) {
             result.response = await rewriteUrls(
               result.response,
@@ -199,7 +322,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
           fireAndForgetEmit(
             clientConfig,
             sessionOpts,
-            params?.messages,
+            memStats.params?.messages,
             result,
             model
           );