@pentatonic-ai/ai-agent-sdk 0.5.7 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/wrapper.js CHANGED
@@ -1,6 +1,18 @@
1
1
  import { Session } from "./session.js";
2
2
  import { normalizeResponse } from "./normalizer.js";
3
3
  import { rewriteUrls } from "./tracking.js";
4
+ import {
5
+ hostedSearch,
6
+ injectMemories,
7
+ } from "../packages/memory/src/hosted.js";
8
+
9
+ // Default memory-injection knobs. Match the proxy's defaults so SDK and
10
+ // proxy customers see identical retrieval behaviour.
11
+ const MEMORY_DEFAULTS = {
12
+ limit: 6,
13
+ minScore: 0.55,
14
+ timeoutMs: 800,
15
+ };
4
16
 
5
17
  /**
6
18
  * Detect the client type by duck-typing its shape.
@@ -12,6 +24,96 @@ function detectClientType(client) {
12
24
  return "unknown";
13
25
  }
14
26
 
27
+ /**
28
+ * Pull the last user message from a request body. Anthropic + OpenAI both
29
+ * carry messages on `params.messages`; Workers AI may also use
30
+ * `params.prompt` or `params.input_text`. Returns null when nothing usable
31
+ * is present (e.g. embedding call, empty prompt) so memory retrieval is
32
+ * skipped cleanly.
33
+ */
34
+ function extractLastUserMessage(params, provider) {
35
+ // Only messages-shaped requests are eligible for system-prompt injection.
36
+ // Workers AI prompt-style calls (`{ prompt: "..." }`) are passed through
37
+ // unchanged — there's no clean place to insert memory context without
38
+ // changing the request shape, and we never want to surprise the caller
39
+ // by mutating their prompt string.
40
+ void provider;
41
+ const msgs = Array.isArray(params?.messages) ? params.messages : null;
42
+ if (!msgs) return null;
43
+ for (let i = msgs.length - 1; i >= 0; i--) {
44
+ if (msgs[i].role === "user") {
45
+ const c = msgs[i].content;
46
+ if (typeof c === "string") return c;
47
+ if (Array.isArray(c)) {
48
+ return c
49
+ .filter((p) => p.type === "text" && typeof p.text === "string")
50
+ .map((p) => p.text)
51
+ .join("\n");
52
+ }
53
+ }
54
+ }
55
+ return null;
56
+ }
57
+
58
+ /**
59
+ * Inject memories from TES into request params before the LLM call.
60
+ *
61
+ * Default-on. Disable per-wrapClient via `sessionOpts.memory: false` or
62
+ * per-call via `sessionOpts.memoryOpts.disable: true`. Knobs come from
63
+ * `sessionOpts.memoryOpts` (`limit`, `minScore`, `timeoutMs`).
64
+ *
65
+ * Failure modes (TES timeout, module disabled, network error) are
66
+ * non-fatal — the call proceeds with the customer's original params and
67
+ * the skip reason is recorded on the session under `_lastMemoryStats`
68
+ * for observability.
69
+ */
70
+ async function maybeInjectMemories(
71
+ clientConfig,
72
+ sessionOpts,
73
+ params,
74
+ provider
75
+ ) {
76
+ if (sessionOpts.memory === false) {
77
+ return { params, injected: 0, skipped: "memory_disabled" };
78
+ }
79
+
80
+ if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
81
+ return { params, injected: 0, skipped: "no_tes_config" };
82
+ }
83
+
84
+ const userMessage = extractLastUserMessage(params, provider);
85
+ if (!userMessage) {
86
+ return { params, injected: 0, skipped: "no_user_message" };
87
+ }
88
+
89
+ const opts = { ...MEMORY_DEFAULTS, ...(sessionOpts.memoryOpts || {}) };
90
+ const { memories, skipped } = await hostedSearch(
91
+ {
92
+ endpoint: clientConfig.endpoint,
93
+ clientId: clientConfig.clientId,
94
+ apiKey: clientConfig.apiKey,
95
+ },
96
+ userMessage,
97
+ opts
98
+ );
99
+
100
+ if (!memories?.length) {
101
+ return { params, injected: 0, skipped: skipped || "no_memories" };
102
+ }
103
+
104
+ return {
105
+ params: injectMemories(params, memories, provider),
106
+ injected: memories.length,
107
+ skipped: null,
108
+ };
109
+ }
110
+
111
+ function recordMemoryStats(sessionOpts, stats) {
112
+ if (sessionOpts._session) {
113
+ sessionOpts._session._lastMemoryStats = stats;
114
+ }
115
+ }
116
+
15
117
  /**
16
118
  * Wrap any supported LLM client with automatic usage tracking.
17
119
  * Auto-detects OpenAI, Anthropic, and Workers AI clients.
@@ -77,7 +179,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
77
179
  get(target, prop) {
78
180
  if (prop === "create") {
79
181
  return async (params) => {
80
- const result = await target.create(params);
182
+ const memStats = await maybeInjectMemories(
183
+ clientConfig,
184
+ sessionOpts,
185
+ params,
186
+ "openai"
187
+ );
188
+ recordMemoryStats(sessionOpts, memStats);
189
+ const result = await target.create(memStats.params);
81
190
  const content = result.choices?.[0]?.message?.content;
82
191
  if (content) {
83
192
  result.choices[0].message.content = await rewriteUrls(
@@ -90,7 +199,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
90
199
  fireAndForgetEmit(
91
200
  clientConfig,
92
201
  sessionOpts,
93
- params.messages,
202
+ memStats.params.messages,
94
203
  result
95
204
  );
96
205
  return result;
@@ -140,7 +249,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
140
249
  get(target, prop) {
141
250
  if (prop === "create") {
142
251
  return async (params) => {
143
- const result = await target.create(params);
252
+ const memStats = await maybeInjectMemories(
253
+ clientConfig,
254
+ sessionOpts,
255
+ params,
256
+ "anthropic"
257
+ );
258
+ recordMemoryStats(sessionOpts, memStats);
259
+ const result = await target.create(memStats.params);
144
260
  if (Array.isArray(result.content)) {
145
261
  for (const block of result.content) {
146
262
  if (block.type === "text" && block.text) {
@@ -156,7 +272,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
156
272
  fireAndForgetEmit(
157
273
  clientConfig,
158
274
  sessionOpts,
159
- params.messages,
275
+ memStats.params.messages,
160
276
  result
161
277
  );
162
278
  return result;
@@ -187,7 +303,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
187
303
  get(target, prop) {
188
304
  if (prop === "run") {
189
305
  return async (model, params, ...rest) => {
190
- const result = await target.run(model, params, ...rest);
306
+ const memStats = await maybeInjectMemories(
307
+ clientConfig,
308
+ sessionOpts,
309
+ params,
310
+ "workers-ai"
311
+ );
312
+ recordMemoryStats(sessionOpts, memStats);
313
+ const result = await target.run(model, memStats.params, ...rest);
191
314
  if (result.response) {
192
315
  result.response = await rewriteUrls(
193
316
  result.response,
@@ -199,7 +322,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
199
322
  fireAndForgetEmit(
200
323
  clientConfig,
201
324
  sessionOpts,
202
- params?.messages,
325
+ memStats.params?.messages,
203
326
  result,
204
327
  model
205
328
  );