@pentatonic-ai/ai-agent-sdk 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +39 -72
- package/dist/index.js +36 -69
- package/package.json +9 -2
- package/packages/memory/package-lock.json +49 -33
- package/packages/memory/package.json +4 -1
- package/packages/memory/src/__tests__/engine.test.js +40 -5
- package/packages/memory/src/engine.js +38 -3
- package/packages/memory-engine/docker-compose.yml +16 -1
- package/packages/memory-engine/engine/services/_shared/embed_provider.py +125 -31
- package/packages/memory-engine/tests/test_embed_provider.py +201 -0
package/dist/index.cjs
CHANGED
|
@@ -17,8 +17,8 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
17
17
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
18
|
|
|
19
19
|
// src/index.js
|
|
20
|
-
var
|
|
21
|
-
__export(
|
|
20
|
+
var index_exports = {};
|
|
21
|
+
__export(index_exports, {
|
|
22
22
|
Session: () => Session,
|
|
23
23
|
TESClient: () => TESClient,
|
|
24
24
|
buildTrackUrl: () => buildTrackUrl,
|
|
@@ -27,7 +27,7 @@ __export(src_exports, {
|
|
|
27
27
|
signPayload: () => signPayload,
|
|
28
28
|
verifyPayload: () => verifyPayload
|
|
29
29
|
});
|
|
30
|
-
module.exports = __toCommonJS(
|
|
30
|
+
module.exports = __toCommonJS(index_exports);
|
|
31
31
|
|
|
32
32
|
// src/normalizer.js
|
|
33
33
|
function normalizeResponse(raw) {
|
|
@@ -189,8 +189,7 @@ var encoder = new TextEncoder();
|
|
|
189
189
|
function toBase64Url(buffer) {
|
|
190
190
|
const bytes = new Uint8Array(buffer);
|
|
191
191
|
let binary = "";
|
|
192
|
-
for (let i = 0; i < bytes.length; i++)
|
|
193
|
-
binary += String.fromCharCode(bytes[i]);
|
|
192
|
+
for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
|
|
194
193
|
return btoa(binary).replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
|
195
194
|
}
|
|
196
195
|
async function signPayload(secret, payload) {
|
|
@@ -211,27 +210,22 @@ async function verifyPayload(secret, payload, signature) {
|
|
|
211
210
|
}
|
|
212
211
|
async function buildTrackUrl(endpoint, apiKey, payload) {
|
|
213
212
|
const p = { ...payload };
|
|
214
|
-
if (!p.e)
|
|
215
|
-
p.e = "LINK_CLICK";
|
|
213
|
+
if (!p.e) p.e = "LINK_CLICK";
|
|
216
214
|
const encoded = toBase64Url(encoder.encode(JSON.stringify(p)));
|
|
217
215
|
const sig = await signPayload(apiKey, p);
|
|
218
216
|
return `${endpoint}/r/${encoded}?sig=${sig}`;
|
|
219
217
|
}
|
|
220
218
|
var URL_RE = /https?:\/\/[^\s"'<>)\]]+/g;
|
|
221
219
|
async function rewriteUrls(text, config, sessionId, metadata) {
|
|
222
|
-
if (!text)
|
|
223
|
-
return text;
|
|
220
|
+
if (!text) return text;
|
|
224
221
|
const redirectPrefix = `${config.endpoint}/r/`;
|
|
225
222
|
const matches = [...text.matchAll(URL_RE)];
|
|
226
|
-
if (matches.length === 0)
|
|
227
|
-
return text;
|
|
223
|
+
if (matches.length === 0) return text;
|
|
228
224
|
const replacements = /* @__PURE__ */ new Map();
|
|
229
225
|
for (const m of matches) {
|
|
230
226
|
const originalUrl = m[0];
|
|
231
|
-
if (originalUrl.startsWith(redirectPrefix))
|
|
232
|
-
|
|
233
|
-
if (replacements.has(originalUrl))
|
|
234
|
-
continue;
|
|
227
|
+
if (originalUrl.startsWith(redirectPrefix)) continue;
|
|
228
|
+
if (replacements.has(originalUrl)) continue;
|
|
235
229
|
const payload = {
|
|
236
230
|
u: originalUrl,
|
|
237
231
|
s: sessionId,
|
|
@@ -254,10 +248,8 @@ async function rewriteUrls(text, config, sessionId, metadata) {
|
|
|
254
248
|
|
|
255
249
|
// src/session.js
|
|
256
250
|
function truncate(value, maxLen) {
|
|
257
|
-
if (!value || !maxLen || typeof value !== "string")
|
|
258
|
-
|
|
259
|
-
if (value.length <= maxLen)
|
|
260
|
-
return value;
|
|
251
|
+
if (!value || !maxLen || typeof value !== "string") return value;
|
|
252
|
+
if (value.length <= maxLen) return value;
|
|
261
253
|
return value.slice(0, maxLen) + "...[truncated]";
|
|
262
254
|
}
|
|
263
255
|
var Session = class {
|
|
@@ -420,8 +412,7 @@ var Session = class {
|
|
|
420
412
|
// packages/memory/src/inject.js
|
|
421
413
|
var MAX_CHARS_PER_MEMORY = 1200;
|
|
422
414
|
function injectMemories(body, memories, provider) {
|
|
423
|
-
if (!memories || memories.length === 0)
|
|
424
|
-
return body;
|
|
415
|
+
if (!memories || memories.length === 0) return body;
|
|
425
416
|
const preamble = formatPreamble(memories);
|
|
426
417
|
if (provider === "anthropic") {
|
|
427
418
|
return injectAnthropic(body, preamble);
|
|
@@ -482,8 +473,7 @@ var DEFAULT_SEARCH_TIMEOUT_MS = 5e3;
|
|
|
482
473
|
var DEFAULT_SEARCH_LIMIT = 6;
|
|
483
474
|
var DEFAULT_SEARCH_MIN_SCORE = 0.55;
|
|
484
475
|
function normalizeConfig(config) {
|
|
485
|
-
if (!config)
|
|
486
|
-
throw new Error("hosted: config is required");
|
|
476
|
+
if (!config) throw new Error("hosted: config is required");
|
|
487
477
|
const endpoint = config.endpoint || config.tes_endpoint;
|
|
488
478
|
const clientId = config.clientId || config.tes_client_id;
|
|
489
479
|
const apiKey = config.apiKey || config.tes_api_key;
|
|
@@ -508,8 +498,7 @@ function buildHostedHeaders(config) {
|
|
|
508
498
|
return headers;
|
|
509
499
|
}
|
|
510
500
|
async function hostedSearch(config, query, opts = {}) {
|
|
511
|
-
if (!query)
|
|
512
|
-
return { memories: [], skipped: "no_query" };
|
|
501
|
+
if (!query) return { memories: [], skipped: "no_query" };
|
|
513
502
|
let cfg;
|
|
514
503
|
try {
|
|
515
504
|
cfg = normalizeConfig(config);
|
|
@@ -556,8 +545,7 @@ async function hostedSearch(config, query, opts = {}) {
|
|
|
556
545
|
return { memories: payload.data?.semanticSearchMemories || [] };
|
|
557
546
|
}
|
|
558
547
|
function shortenReason(msg) {
|
|
559
|
-
if (typeof msg !== "string")
|
|
560
|
-
return "unknown";
|
|
548
|
+
if (typeof msg !== "string") return "unknown";
|
|
561
549
|
return msg.toLowerCase().replace(/[^a-z0-9]+/g, "_").slice(0, 60);
|
|
562
550
|
}
|
|
563
551
|
|
|
@@ -568,23 +556,19 @@ var MEMORY_DEFAULTS = {
|
|
|
568
556
|
timeoutMs: 800
|
|
569
557
|
};
|
|
570
558
|
function detectClientType(client) {
|
|
571
|
-
if (client?.chat?.completions?.create)
|
|
572
|
-
|
|
573
|
-
if (client?.
|
|
574
|
-
return "anthropic";
|
|
575
|
-
if (typeof client?.run === "function")
|
|
576
|
-
return "workers-ai";
|
|
559
|
+
if (client?.chat?.completions?.create) return "openai";
|
|
560
|
+
if (client?.messages?.create) return "anthropic";
|
|
561
|
+
if (typeof client?.run === "function") return "workers-ai";
|
|
577
562
|
return "unknown";
|
|
578
563
|
}
|
|
579
564
|
function extractLastUserMessage(params, provider) {
|
|
565
|
+
void provider;
|
|
580
566
|
const msgs = Array.isArray(params?.messages) ? params.messages : null;
|
|
581
|
-
if (!msgs)
|
|
582
|
-
return null;
|
|
567
|
+
if (!msgs) return null;
|
|
583
568
|
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
584
569
|
if (msgs[i].role === "user") {
|
|
585
570
|
const c = msgs[i].content;
|
|
586
|
-
if (typeof c === "string")
|
|
587
|
-
return c;
|
|
571
|
+
if (typeof c === "string") return c;
|
|
588
572
|
if (Array.isArray(c)) {
|
|
589
573
|
return c.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
|
|
590
574
|
}
|
|
@@ -634,8 +618,7 @@ function wrapClient(clientConfig, client, sessionOpts = {}) {
|
|
|
634
618
|
metadata: sessionOpts.metadata
|
|
635
619
|
});
|
|
636
620
|
const type = detectClientType(client);
|
|
637
|
-
if (type === "openai")
|
|
638
|
-
return wrapOpenAI(clientConfig, client, sessionOpts);
|
|
621
|
+
if (type === "openai") return wrapOpenAI(clientConfig, client, sessionOpts);
|
|
639
622
|
if (type === "anthropic")
|
|
640
623
|
return wrapAnthropic(clientConfig, client, sessionOpts);
|
|
641
624
|
if (type === "workers-ai")
|
|
@@ -649,10 +632,8 @@ function wrapOpenAI(clientConfig, client, sessionOpts) {
|
|
|
649
632
|
get(target, prop) {
|
|
650
633
|
if (prop === "chat")
|
|
651
634
|
return wrapOpenAIChat(clientConfig, target.chat, target, sessionOpts);
|
|
652
|
-
if (prop === "sessionId")
|
|
653
|
-
|
|
654
|
-
if (prop === "tesSession")
|
|
655
|
-
return sessionOpts._session;
|
|
635
|
+
if (prop === "sessionId") return sessionOpts._resolvedSessionId;
|
|
636
|
+
if (prop === "tesSession") return sessionOpts._session;
|
|
656
637
|
if (prop === "session")
|
|
657
638
|
return (opts) => new OpenAISession(clientConfig, target, opts);
|
|
658
639
|
return target[prop];
|
|
@@ -729,10 +710,8 @@ function wrapAnthropic(clientConfig, client, sessionOpts) {
|
|
|
729
710
|
target,
|
|
730
711
|
sessionOpts
|
|
731
712
|
);
|
|
732
|
-
if (prop === "sessionId")
|
|
733
|
-
|
|
734
|
-
if (prop === "tesSession")
|
|
735
|
-
return sessionOpts._session;
|
|
713
|
+
if (prop === "sessionId") return sessionOpts._resolvedSessionId;
|
|
714
|
+
if (prop === "tesSession") return sessionOpts._session;
|
|
736
715
|
if (prop === "session")
|
|
737
716
|
return (opts) => new AnthropicSession(clientConfig, target, opts);
|
|
738
717
|
return target[prop];
|
|
@@ -819,10 +798,8 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
|
|
|
819
798
|
return result;
|
|
820
799
|
};
|
|
821
800
|
}
|
|
822
|
-
if (prop === "sessionId")
|
|
823
|
-
|
|
824
|
-
if (prop === "tesSession")
|
|
825
|
-
return sessionOpts._session;
|
|
801
|
+
if (prop === "sessionId") return sessionOpts._resolvedSessionId;
|
|
802
|
+
if (prop === "tesSession") return sessionOpts._session;
|
|
826
803
|
if (prop === "session")
|
|
827
804
|
return (opts) => new WorkersAISession(clientConfig, target, opts);
|
|
828
805
|
return target[prop];
|
|
@@ -841,29 +818,24 @@ var WorkersAISession = class extends Session {
|
|
|
841
818
|
}
|
|
842
819
|
};
|
|
843
820
|
function extractToolResults(session, messages) {
|
|
844
|
-
if (!messages?.length || !session._toolCalls.length)
|
|
845
|
-
return;
|
|
821
|
+
if (!messages?.length || !session._toolCalls.length) return;
|
|
846
822
|
const idToName = /* @__PURE__ */ new Map();
|
|
847
823
|
for (const msg of messages) {
|
|
848
824
|
if (msg.role === "assistant" && msg.tool_calls) {
|
|
849
825
|
for (const tc of msg.tool_calls) {
|
|
850
826
|
const id = tc.id || tc.tool_call_id;
|
|
851
827
|
const name = tc.function?.name || tc.name;
|
|
852
|
-
if (id && name)
|
|
853
|
-
idToName.set(id, name);
|
|
828
|
+
if (id && name) idToName.set(id, name);
|
|
854
829
|
}
|
|
855
830
|
}
|
|
856
831
|
}
|
|
857
832
|
for (const msg of messages) {
|
|
858
|
-
if (msg.role !== "tool" || !msg.content)
|
|
859
|
-
continue;
|
|
833
|
+
if (msg.role !== "tool" || !msg.content) continue;
|
|
860
834
|
const callId = msg.tool_call_id;
|
|
861
835
|
const toolName = callId ? idToName.get(callId) : null;
|
|
862
836
|
for (const tc of session._toolCalls) {
|
|
863
|
-
if (tc.result)
|
|
864
|
-
|
|
865
|
-
if (toolName && tc.tool !== toolName)
|
|
866
|
-
continue;
|
|
837
|
+
if (tc.result) continue;
|
|
838
|
+
if (toolName && tc.tool !== toolName) continue;
|
|
867
839
|
try {
|
|
868
840
|
const parsed = JSON.parse(msg.content);
|
|
869
841
|
if (Array.isArray(parsed)) {
|
|
@@ -906,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
906
878
|
}
|
|
907
879
|
|
|
908
880
|
// src/telemetry.js
|
|
909
|
-
var VERSION = "0.9.
|
|
881
|
+
var VERSION = "0.9.6";
|
|
910
882
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
911
883
|
function machineId() {
|
|
912
884
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
|
@@ -917,11 +889,9 @@ function machineId() {
|
|
|
917
889
|
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
918
890
|
}
|
|
919
891
|
function emitTelemetry(mode) {
|
|
920
|
-
if (typeof process !== "undefined" && process.env?.PENTATONIC_TELEMETRY === "0")
|
|
921
|
-
return;
|
|
892
|
+
if (typeof process !== "undefined" && process.env?.PENTATONIC_TELEMETRY === "0") return;
|
|
922
893
|
const f = globalThis.fetch;
|
|
923
|
-
if (!f)
|
|
924
|
-
return;
|
|
894
|
+
if (!f) return;
|
|
925
895
|
f(TELEMETRY_URL, {
|
|
926
896
|
method: "POST",
|
|
927
897
|
headers: { "Content-Type": "application/json" },
|
|
@@ -941,12 +911,9 @@ function emitTelemetry(mode) {
|
|
|
941
911
|
// src/client.js
|
|
942
912
|
var TESClient = class {
|
|
943
913
|
constructor({ clientId, apiKey, endpoint, headers, userId, captureContent = true, maxContentLength = 4096 }) {
|
|
944
|
-
if (!clientId)
|
|
945
|
-
|
|
946
|
-
if (!
|
|
947
|
-
throw new Error("apiKey is required");
|
|
948
|
-
if (!endpoint)
|
|
949
|
-
throw new Error("endpoint is required");
|
|
914
|
+
if (!clientId) throw new Error("clientId is required");
|
|
915
|
+
if (!apiKey) throw new Error("apiKey is required");
|
|
916
|
+
if (!endpoint) throw new Error("endpoint is required");
|
|
950
917
|
const cleanEndpoint = endpoint.replace(/\/$/, "");
|
|
951
918
|
const isLocalDev = /^http:\/\/localhost(:\d+)?(\/|$)/.test(cleanEndpoint) || /^http:\/\/127\.0\.0\.1(:\d+)?(\/|$)/.test(cleanEndpoint);
|
|
952
919
|
if (!cleanEndpoint.startsWith("https://") && !isLocalDev) {
|
package/dist/index.js
CHANGED
|
@@ -158,8 +158,7 @@ var encoder = new TextEncoder();
|
|
|
158
158
|
function toBase64Url(buffer) {
|
|
159
159
|
const bytes = new Uint8Array(buffer);
|
|
160
160
|
let binary = "";
|
|
161
|
-
for (let i = 0; i < bytes.length; i++)
|
|
162
|
-
binary += String.fromCharCode(bytes[i]);
|
|
161
|
+
for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
|
|
163
162
|
return btoa(binary).replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
|
|
164
163
|
}
|
|
165
164
|
async function signPayload(secret, payload) {
|
|
@@ -180,27 +179,22 @@ async function verifyPayload(secret, payload, signature) {
|
|
|
180
179
|
}
|
|
181
180
|
async function buildTrackUrl(endpoint, apiKey, payload) {
|
|
182
181
|
const p = { ...payload };
|
|
183
|
-
if (!p.e)
|
|
184
|
-
p.e = "LINK_CLICK";
|
|
182
|
+
if (!p.e) p.e = "LINK_CLICK";
|
|
185
183
|
const encoded = toBase64Url(encoder.encode(JSON.stringify(p)));
|
|
186
184
|
const sig = await signPayload(apiKey, p);
|
|
187
185
|
return `${endpoint}/r/${encoded}?sig=${sig}`;
|
|
188
186
|
}
|
|
189
187
|
var URL_RE = /https?:\/\/[^\s"'<>)\]]+/g;
|
|
190
188
|
async function rewriteUrls(text, config, sessionId, metadata) {
|
|
191
|
-
if (!text)
|
|
192
|
-
return text;
|
|
189
|
+
if (!text) return text;
|
|
193
190
|
const redirectPrefix = `${config.endpoint}/r/`;
|
|
194
191
|
const matches = [...text.matchAll(URL_RE)];
|
|
195
|
-
if (matches.length === 0)
|
|
196
|
-
return text;
|
|
192
|
+
if (matches.length === 0) return text;
|
|
197
193
|
const replacements = /* @__PURE__ */ new Map();
|
|
198
194
|
for (const m of matches) {
|
|
199
195
|
const originalUrl = m[0];
|
|
200
|
-
if (originalUrl.startsWith(redirectPrefix))
|
|
201
|
-
|
|
202
|
-
if (replacements.has(originalUrl))
|
|
203
|
-
continue;
|
|
196
|
+
if (originalUrl.startsWith(redirectPrefix)) continue;
|
|
197
|
+
if (replacements.has(originalUrl)) continue;
|
|
204
198
|
const payload = {
|
|
205
199
|
u: originalUrl,
|
|
206
200
|
s: sessionId,
|
|
@@ -223,10 +217,8 @@ async function rewriteUrls(text, config, sessionId, metadata) {
|
|
|
223
217
|
|
|
224
218
|
// src/session.js
|
|
225
219
|
function truncate(value, maxLen) {
|
|
226
|
-
if (!value || !maxLen || typeof value !== "string")
|
|
227
|
-
|
|
228
|
-
if (value.length <= maxLen)
|
|
229
|
-
return value;
|
|
220
|
+
if (!value || !maxLen || typeof value !== "string") return value;
|
|
221
|
+
if (value.length <= maxLen) return value;
|
|
230
222
|
return value.slice(0, maxLen) + "...[truncated]";
|
|
231
223
|
}
|
|
232
224
|
var Session = class {
|
|
@@ -389,8 +381,7 @@ var Session = class {
|
|
|
389
381
|
// packages/memory/src/inject.js
|
|
390
382
|
var MAX_CHARS_PER_MEMORY = 1200;
|
|
391
383
|
function injectMemories(body, memories, provider) {
|
|
392
|
-
if (!memories || memories.length === 0)
|
|
393
|
-
return body;
|
|
384
|
+
if (!memories || memories.length === 0) return body;
|
|
394
385
|
const preamble = formatPreamble(memories);
|
|
395
386
|
if (provider === "anthropic") {
|
|
396
387
|
return injectAnthropic(body, preamble);
|
|
@@ -451,8 +442,7 @@ var DEFAULT_SEARCH_TIMEOUT_MS = 5e3;
|
|
|
451
442
|
var DEFAULT_SEARCH_LIMIT = 6;
|
|
452
443
|
var DEFAULT_SEARCH_MIN_SCORE = 0.55;
|
|
453
444
|
function normalizeConfig(config) {
|
|
454
|
-
if (!config)
|
|
455
|
-
throw new Error("hosted: config is required");
|
|
445
|
+
if (!config) throw new Error("hosted: config is required");
|
|
456
446
|
const endpoint = config.endpoint || config.tes_endpoint;
|
|
457
447
|
const clientId = config.clientId || config.tes_client_id;
|
|
458
448
|
const apiKey = config.apiKey || config.tes_api_key;
|
|
@@ -477,8 +467,7 @@ function buildHostedHeaders(config) {
|
|
|
477
467
|
return headers;
|
|
478
468
|
}
|
|
479
469
|
async function hostedSearch(config, query, opts = {}) {
|
|
480
|
-
if (!query)
|
|
481
|
-
return { memories: [], skipped: "no_query" };
|
|
470
|
+
if (!query) return { memories: [], skipped: "no_query" };
|
|
482
471
|
let cfg;
|
|
483
472
|
try {
|
|
484
473
|
cfg = normalizeConfig(config);
|
|
@@ -525,8 +514,7 @@ async function hostedSearch(config, query, opts = {}) {
|
|
|
525
514
|
return { memories: payload.data?.semanticSearchMemories || [] };
|
|
526
515
|
}
|
|
527
516
|
function shortenReason(msg) {
|
|
528
|
-
if (typeof msg !== "string")
|
|
529
|
-
return "unknown";
|
|
517
|
+
if (typeof msg !== "string") return "unknown";
|
|
530
518
|
return msg.toLowerCase().replace(/[^a-z0-9]+/g, "_").slice(0, 60);
|
|
531
519
|
}
|
|
532
520
|
|
|
@@ -537,23 +525,19 @@ var MEMORY_DEFAULTS = {
|
|
|
537
525
|
timeoutMs: 800
|
|
538
526
|
};
|
|
539
527
|
function detectClientType(client) {
|
|
540
|
-
if (client?.chat?.completions?.create)
|
|
541
|
-
|
|
542
|
-
if (client?.
|
|
543
|
-
return "anthropic";
|
|
544
|
-
if (typeof client?.run === "function")
|
|
545
|
-
return "workers-ai";
|
|
528
|
+
if (client?.chat?.completions?.create) return "openai";
|
|
529
|
+
if (client?.messages?.create) return "anthropic";
|
|
530
|
+
if (typeof client?.run === "function") return "workers-ai";
|
|
546
531
|
return "unknown";
|
|
547
532
|
}
|
|
548
533
|
function extractLastUserMessage(params, provider) {
|
|
534
|
+
void provider;
|
|
549
535
|
const msgs = Array.isArray(params?.messages) ? params.messages : null;
|
|
550
|
-
if (!msgs)
|
|
551
|
-
return null;
|
|
536
|
+
if (!msgs) return null;
|
|
552
537
|
for (let i = msgs.length - 1; i >= 0; i--) {
|
|
553
538
|
if (msgs[i].role === "user") {
|
|
554
539
|
const c = msgs[i].content;
|
|
555
|
-
if (typeof c === "string")
|
|
556
|
-
return c;
|
|
540
|
+
if (typeof c === "string") return c;
|
|
557
541
|
if (Array.isArray(c)) {
|
|
558
542
|
return c.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
|
|
559
543
|
}
|
|
@@ -603,8 +587,7 @@ function wrapClient(clientConfig, client, sessionOpts = {}) {
|
|
|
603
587
|
metadata: sessionOpts.metadata
|
|
604
588
|
});
|
|
605
589
|
const type = detectClientType(client);
|
|
606
|
-
if (type === "openai")
|
|
607
|
-
return wrapOpenAI(clientConfig, client, sessionOpts);
|
|
590
|
+
if (type === "openai") return wrapOpenAI(clientConfig, client, sessionOpts);
|
|
608
591
|
if (type === "anthropic")
|
|
609
592
|
return wrapAnthropic(clientConfig, client, sessionOpts);
|
|
610
593
|
if (type === "workers-ai")
|
|
@@ -618,10 +601,8 @@ function wrapOpenAI(clientConfig, client, sessionOpts) {
|
|
|
618
601
|
get(target, prop) {
|
|
619
602
|
if (prop === "chat")
|
|
620
603
|
return wrapOpenAIChat(clientConfig, target.chat, target, sessionOpts);
|
|
621
|
-
if (prop === "sessionId")
|
|
622
|
-
|
|
623
|
-
if (prop === "tesSession")
|
|
624
|
-
return sessionOpts._session;
|
|
604
|
+
if (prop === "sessionId") return sessionOpts._resolvedSessionId;
|
|
605
|
+
if (prop === "tesSession") return sessionOpts._session;
|
|
625
606
|
if (prop === "session")
|
|
626
607
|
return (opts) => new OpenAISession(clientConfig, target, opts);
|
|
627
608
|
return target[prop];
|
|
@@ -698,10 +679,8 @@ function wrapAnthropic(clientConfig, client, sessionOpts) {
|
|
|
698
679
|
target,
|
|
699
680
|
sessionOpts
|
|
700
681
|
);
|
|
701
|
-
if (prop === "sessionId")
|
|
702
|
-
|
|
703
|
-
if (prop === "tesSession")
|
|
704
|
-
return sessionOpts._session;
|
|
682
|
+
if (prop === "sessionId") return sessionOpts._resolvedSessionId;
|
|
683
|
+
if (prop === "tesSession") return sessionOpts._session;
|
|
705
684
|
if (prop === "session")
|
|
706
685
|
return (opts) => new AnthropicSession(clientConfig, target, opts);
|
|
707
686
|
return target[prop];
|
|
@@ -788,10 +767,8 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
|
|
|
788
767
|
return result;
|
|
789
768
|
};
|
|
790
769
|
}
|
|
791
|
-
if (prop === "sessionId")
|
|
792
|
-
|
|
793
|
-
if (prop === "tesSession")
|
|
794
|
-
return sessionOpts._session;
|
|
770
|
+
if (prop === "sessionId") return sessionOpts._resolvedSessionId;
|
|
771
|
+
if (prop === "tesSession") return sessionOpts._session;
|
|
795
772
|
if (prop === "session")
|
|
796
773
|
return (opts) => new WorkersAISession(clientConfig, target, opts);
|
|
797
774
|
return target[prop];
|
|
@@ -810,29 +787,24 @@ var WorkersAISession = class extends Session {
|
|
|
810
787
|
}
|
|
811
788
|
};
|
|
812
789
|
function extractToolResults(session, messages) {
|
|
813
|
-
if (!messages?.length || !session._toolCalls.length)
|
|
814
|
-
return;
|
|
790
|
+
if (!messages?.length || !session._toolCalls.length) return;
|
|
815
791
|
const idToName = /* @__PURE__ */ new Map();
|
|
816
792
|
for (const msg of messages) {
|
|
817
793
|
if (msg.role === "assistant" && msg.tool_calls) {
|
|
818
794
|
for (const tc of msg.tool_calls) {
|
|
819
795
|
const id = tc.id || tc.tool_call_id;
|
|
820
796
|
const name = tc.function?.name || tc.name;
|
|
821
|
-
if (id && name)
|
|
822
|
-
idToName.set(id, name);
|
|
797
|
+
if (id && name) idToName.set(id, name);
|
|
823
798
|
}
|
|
824
799
|
}
|
|
825
800
|
}
|
|
826
801
|
for (const msg of messages) {
|
|
827
|
-
if (msg.role !== "tool" || !msg.content)
|
|
828
|
-
continue;
|
|
802
|
+
if (msg.role !== "tool" || !msg.content) continue;
|
|
829
803
|
const callId = msg.tool_call_id;
|
|
830
804
|
const toolName = callId ? idToName.get(callId) : null;
|
|
831
805
|
for (const tc of session._toolCalls) {
|
|
832
|
-
if (tc.result)
|
|
833
|
-
|
|
834
|
-
if (toolName && tc.tool !== toolName)
|
|
835
|
-
continue;
|
|
806
|
+
if (tc.result) continue;
|
|
807
|
+
if (toolName && tc.tool !== toolName) continue;
|
|
836
808
|
try {
|
|
837
809
|
const parsed = JSON.parse(msg.content);
|
|
838
810
|
if (Array.isArray(parsed)) {
|
|
@@ -875,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
875
847
|
}
|
|
876
848
|
|
|
877
849
|
// src/telemetry.js
|
|
878
|
-
var VERSION = "0.9.
|
|
850
|
+
var VERSION = "0.9.6";
|
|
879
851
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
880
852
|
function machineId() {
|
|
881
853
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
|
@@ -886,11 +858,9 @@ function machineId() {
|
|
|
886
858
|
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
887
859
|
}
|
|
888
860
|
function emitTelemetry(mode) {
|
|
889
|
-
if (typeof process !== "undefined" && process.env?.PENTATONIC_TELEMETRY === "0")
|
|
890
|
-
return;
|
|
861
|
+
if (typeof process !== "undefined" && process.env?.PENTATONIC_TELEMETRY === "0") return;
|
|
891
862
|
const f = globalThis.fetch;
|
|
892
|
-
if (!f)
|
|
893
|
-
return;
|
|
863
|
+
if (!f) return;
|
|
894
864
|
f(TELEMETRY_URL, {
|
|
895
865
|
method: "POST",
|
|
896
866
|
headers: { "Content-Type": "application/json" },
|
|
@@ -910,12 +880,9 @@ function emitTelemetry(mode) {
|
|
|
910
880
|
// src/client.js
|
|
911
881
|
var TESClient = class {
|
|
912
882
|
constructor({ clientId, apiKey, endpoint, headers, userId, captureContent = true, maxContentLength = 4096 }) {
|
|
913
|
-
if (!clientId)
|
|
914
|
-
|
|
915
|
-
if (!
|
|
916
|
-
throw new Error("apiKey is required");
|
|
917
|
-
if (!endpoint)
|
|
918
|
-
throw new Error("endpoint is required");
|
|
883
|
+
if (!clientId) throw new Error("clientId is required");
|
|
884
|
+
if (!apiKey) throw new Error("apiKey is required");
|
|
885
|
+
if (!endpoint) throw new Error("endpoint is required");
|
|
919
886
|
const cleanEndpoint = endpoint.replace(/\/$/, "");
|
|
920
887
|
const isLocalDev = /^http:\/\/localhost(:\d+)?(\/|$)/.test(cleanEndpoint) || /^http:\/\/127\.0\.0\.1(:\d+)?(\/|$)/.test(cleanEndpoint);
|
|
921
888
|
if (!cleanEndpoint.startsWith("https://") && !isLocalDev) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.6",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -73,11 +73,18 @@
|
|
|
73
73
|
"dependencies": {
|
|
74
74
|
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
75
75
|
"@pentatonic-ai/ai-agent-sdk": "^0.4.0",
|
|
76
|
-
"esbuild": "^0.
|
|
76
|
+
"esbuild": "^0.25.0"
|
|
77
77
|
},
|
|
78
78
|
"devDependencies": {
|
|
79
79
|
"@jest/globals": "^29.7.0",
|
|
80
80
|
"jest": "^29.7.0",
|
|
81
81
|
"pg": "^8.20.0"
|
|
82
|
+
},
|
|
83
|
+
"overrides": {
|
|
84
|
+
"path-to-regexp": "^8.4.0",
|
|
85
|
+
"ip-address": "^10.1.1",
|
|
86
|
+
"@hono/node-server": "^1.19.13",
|
|
87
|
+
"picomatch": "^4.0.4",
|
|
88
|
+
"esbuild": "^0.25.0"
|
|
82
89
|
}
|
|
83
90
|
}
|
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
{
|
|
2
|
-
"name": "
|
|
3
|
-
"version": "0.1.0",
|
|
2
|
+
"name": "memory",
|
|
4
3
|
"lockfileVersion": 3,
|
|
5
4
|
"requires": true,
|
|
6
5
|
"packages": {
|
|
7
6
|
"": {
|
|
8
|
-
"name": "
|
|
7
|
+
"name": "memory",
|
|
9
8
|
"dependencies": {
|
|
10
9
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
11
10
|
"pg": "^8.13.0"
|
|
12
11
|
}
|
|
13
12
|
},
|
|
14
13
|
"node_modules/@hono/node-server": {
|
|
15
|
-
"version": "1.19.
|
|
16
|
-
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.
|
|
17
|
-
"integrity": "sha512-
|
|
14
|
+
"version": "1.19.14",
|
|
15
|
+
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.14.tgz",
|
|
16
|
+
"integrity": "sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==",
|
|
18
17
|
"license": "MIT",
|
|
19
18
|
"engines": {
|
|
20
19
|
"node": ">=18.14.1"
|
|
@@ -77,9 +76,9 @@
|
|
|
77
76
|
}
|
|
78
77
|
},
|
|
79
78
|
"node_modules/ajv": {
|
|
80
|
-
"version": "8.
|
|
81
|
-
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.
|
|
82
|
-
"integrity": "sha512-
|
|
79
|
+
"version": "8.20.0",
|
|
80
|
+
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.20.0.tgz",
|
|
81
|
+
"integrity": "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==",
|
|
83
82
|
"license": "MIT",
|
|
84
83
|
"dependencies": {
|
|
85
84
|
"fast-deep-equal": "^3.1.3",
|
|
@@ -355,9 +354,9 @@
|
|
|
355
354
|
}
|
|
356
355
|
},
|
|
357
356
|
"node_modules/eventsource-parser": {
|
|
358
|
-
"version": "3.0.
|
|
359
|
-
"resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.
|
|
360
|
-
"integrity": "sha512-
|
|
357
|
+
"version": "3.0.8",
|
|
358
|
+
"resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.8.tgz",
|
|
359
|
+
"integrity": "sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ==",
|
|
361
360
|
"license": "MIT",
|
|
362
361
|
"engines": {
|
|
363
362
|
"node": ">=18.0.0"
|
|
@@ -407,12 +406,12 @@
|
|
|
407
406
|
}
|
|
408
407
|
},
|
|
409
408
|
"node_modules/express-rate-limit": {
|
|
410
|
-
"version": "8.
|
|
411
|
-
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.
|
|
412
|
-
"integrity": "sha512-
|
|
409
|
+
"version": "8.5.1",
|
|
410
|
+
"resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.5.1.tgz",
|
|
411
|
+
"integrity": "sha512-5O6KYmyJEpuPJV5hNTXKbAHWRqrzyu+OI3vUnSd2kXFubIVpG7ezpgxQy76Zo5GQZtrQBg86hF+CM/NX+cioiQ==",
|
|
413
412
|
"license": "MIT",
|
|
414
413
|
"dependencies": {
|
|
415
|
-
"ip-address": "10.
|
|
414
|
+
"ip-address": "^10.2.0"
|
|
416
415
|
},
|
|
417
416
|
"engines": {
|
|
418
417
|
"node": ">= 16"
|
|
@@ -556,9 +555,9 @@
|
|
|
556
555
|
}
|
|
557
556
|
},
|
|
558
557
|
"node_modules/hasown": {
|
|
559
|
-
"version": "2.0.
|
|
560
|
-
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.
|
|
561
|
-
"integrity": "sha512-
|
|
558
|
+
"version": "2.0.3",
|
|
559
|
+
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
|
|
560
|
+
"integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
|
|
562
561
|
"license": "MIT",
|
|
563
562
|
"dependencies": {
|
|
564
563
|
"function-bind": "^1.1.2"
|
|
@@ -619,9 +618,9 @@
|
|
|
619
618
|
"license": "ISC"
|
|
620
619
|
},
|
|
621
620
|
"node_modules/ip-address": {
|
|
622
|
-
"version": "10.
|
|
623
|
-
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.
|
|
624
|
-
"integrity": "sha512
|
|
621
|
+
"version": "10.2.0",
|
|
622
|
+
"resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.2.0.tgz",
|
|
623
|
+
"integrity": "sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==",
|
|
625
624
|
"license": "MIT",
|
|
626
625
|
"engines": {
|
|
627
626
|
"node": ">= 12"
|
|
@@ -649,9 +648,9 @@
|
|
|
649
648
|
"license": "ISC"
|
|
650
649
|
},
|
|
651
650
|
"node_modules/jose": {
|
|
652
|
-
"version": "6.2.
|
|
653
|
-
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.
|
|
654
|
-
"integrity": "sha512-
|
|
651
|
+
"version": "6.2.3",
|
|
652
|
+
"resolved": "https://registry.npmjs.org/jose/-/jose-6.2.3.tgz",
|
|
653
|
+
"integrity": "sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==",
|
|
655
654
|
"license": "MIT",
|
|
656
655
|
"funding": {
|
|
657
656
|
"url": "https://github.com/sponsors/panva"
|
|
@@ -1201,17 +1200,34 @@
|
|
|
1201
1200
|
}
|
|
1202
1201
|
},
|
|
1203
1202
|
"node_modules/type-is": {
|
|
1204
|
-
"version": "2.0
|
|
1205
|
-
"resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.
|
|
1206
|
-
"integrity": "sha512-
|
|
1203
|
+
"version": "2.1.0",
|
|
1204
|
+
"resolved": "https://registry.npmjs.org/type-is/-/type-is-2.1.0.tgz",
|
|
1205
|
+
"integrity": "sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA==",
|
|
1207
1206
|
"license": "MIT",
|
|
1208
1207
|
"dependencies": {
|
|
1209
|
-
"content-type": "^
|
|
1208
|
+
"content-type": "^2.0.0",
|
|
1210
1209
|
"media-typer": "^1.1.0",
|
|
1211
1210
|
"mime-types": "^3.0.0"
|
|
1212
1211
|
},
|
|
1213
1212
|
"engines": {
|
|
1214
|
-
"node": ">=
|
|
1213
|
+
"node": ">= 18"
|
|
1214
|
+
},
|
|
1215
|
+
"funding": {
|
|
1216
|
+
"type": "opencollective",
|
|
1217
|
+
"url": "https://opencollective.com/express"
|
|
1218
|
+
}
|
|
1219
|
+
},
|
|
1220
|
+
"node_modules/type-is/node_modules/content-type": {
|
|
1221
|
+
"version": "2.0.0",
|
|
1222
|
+
"resolved": "https://registry.npmjs.org/content-type/-/content-type-2.0.0.tgz",
|
|
1223
|
+
"integrity": "sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ==",
|
|
1224
|
+
"license": "MIT",
|
|
1225
|
+
"engines": {
|
|
1226
|
+
"node": ">=18"
|
|
1227
|
+
},
|
|
1228
|
+
"funding": {
|
|
1229
|
+
"type": "opencollective",
|
|
1230
|
+
"url": "https://opencollective.com/express"
|
|
1215
1231
|
}
|
|
1216
1232
|
},
|
|
1217
1233
|
"node_modules/unpipe": {
|
|
@@ -1263,9 +1279,9 @@
|
|
|
1263
1279
|
}
|
|
1264
1280
|
},
|
|
1265
1281
|
"node_modules/zod": {
|
|
1266
|
-
"version": "4.3
|
|
1267
|
-
"resolved": "https://registry.npmjs.org/zod/-/zod-4.3.
|
|
1268
|
-
"integrity": "sha512-
|
|
1282
|
+
"version": "4.4.3",
|
|
1283
|
+
"resolved": "https://registry.npmjs.org/zod/-/zod-4.4.3.tgz",
|
|
1284
|
+
"integrity": "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==",
|
|
1269
1285
|
"license": "MIT",
|
|
1270
1286
|
"funding": {
|
|
1271
1287
|
"url": "https://github.com/sponsors/colinhacks"
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"private": true,
|
|
3
3
|
"name": "memory",
|
|
4
|
-
"description": "Memory subsystem
|
|
4
|
+
"description": "Memory subsystem \u2014 imported via @pentatonic-ai/ai-agent-sdk/memory",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
8
8
|
"pg": "^8.13.0"
|
|
9
|
+
},
|
|
10
|
+
"overrides": {
|
|
11
|
+
"ip-address": "^10.1.1"
|
|
9
12
|
}
|
|
10
13
|
}
|
|
@@ -730,15 +730,22 @@ describe("engine HTTP client", () => {
|
|
|
730
730
|
});
|
|
731
731
|
|
|
732
732
|
describe("engineForget", () => {
|
|
733
|
-
it("forwards id when provided", async () => {
|
|
733
|
+
it("forwards id when provided (no arena composition for id-based deletes)", async () => {
|
|
734
734
|
mockOk({ deleted: 1 });
|
|
735
735
|
await engineForget("https://e", { clientId: "acme", id: "abc" });
|
|
736
736
|
const body = JSON.parse(calls[0].init.body);
|
|
737
737
|
expect(calls[0].url).toBe("https://e/forget");
|
|
738
|
-
|
|
738
|
+
// id-only deletes target the global record id; the engine's
|
|
739
|
+
// id path doesn't read arena scope, so we don't inject it.
|
|
740
|
+
expect(body).toEqual({ id: "abc" });
|
|
739
741
|
});
|
|
740
742
|
|
|
741
|
-
it("forwards metadata_contains
|
|
743
|
+
it("forwards metadata_contains and injects arena INSIDE it (tenant default)", async () => {
|
|
744
|
+
// The engine reads `metadata_contains.arena` (not top-level
|
|
745
|
+
// arena) to scope a forget at L2. Pre-2026-05-14 this helper
|
|
746
|
+
// put arena at the top level, which the engine silently
|
|
747
|
+
// ignored — only L6 ever got wiped. Pinning the post-fix
|
|
748
|
+
// contract here so a regression can't sneak back in.
|
|
742
749
|
mockOk({ deleted: 5 });
|
|
743
750
|
await engineForget("https://e", {
|
|
744
751
|
clientId: "acme",
|
|
@@ -746,11 +753,39 @@ describe("engine HTTP client", () => {
|
|
|
746
753
|
});
|
|
747
754
|
const body = JSON.parse(calls[0].init.body);
|
|
748
755
|
expect(body).toEqual({
|
|
749
|
-
arena: "acme",
|
|
750
|
-
|
|
756
|
+
metadata_contains: { arena: "acme", source_repo: "monorepo" },
|
|
757
|
+
});
|
|
758
|
+
// Top-level arena must NOT be sent — the engine ignores it and
|
|
759
|
+
// its presence would mislead anyone reading wire dumps.
|
|
760
|
+
expect(body.arena).toBeUndefined();
|
|
761
|
+
});
|
|
762
|
+
|
|
763
|
+
it("composes user-scoped arena when userId is supplied", async () => {
|
|
764
|
+
mockOk({ deleted: 12 });
|
|
765
|
+
await engineForget("https://e", {
|
|
766
|
+
clientId: "acme",
|
|
767
|
+
userId: "u-1",
|
|
768
|
+
metadataContains: { actor_user_id: "u-1" },
|
|
769
|
+
});
|
|
770
|
+
const body = JSON.parse(calls[0].init.body);
|
|
771
|
+
expect(body).toEqual({
|
|
772
|
+
metadata_contains: { arena: "acme:u-1", actor_user_id: "u-1" },
|
|
751
773
|
});
|
|
752
774
|
});
|
|
753
775
|
|
|
776
|
+
it("respects caller-supplied arena inside metadataContains (super-admin override)", async () => {
|
|
777
|
+
// Super-admin tooling that wipes "some other tenant's user arena"
|
|
778
|
+
// — pass the explicit arena and the SDK leaves it alone instead
|
|
779
|
+
// of recomposing from (clientId, userId).
|
|
780
|
+
mockOk({ deleted: 99 });
|
|
781
|
+
await engineForget("https://e", {
|
|
782
|
+
clientId: "tes-admin",
|
|
783
|
+
metadataContains: { arena: "victim-tenant:u-7", source: "x" },
|
|
784
|
+
});
|
|
785
|
+
const body = JSON.parse(calls[0].init.body);
|
|
786
|
+
expect(body.metadata_contains.arena).toBe("victim-tenant:u-7");
|
|
787
|
+
});
|
|
788
|
+
|
|
754
789
|
it("requires id or metadataContains", async () => {
|
|
755
790
|
await expect(
|
|
756
791
|
engineForget("https://e", { clientId: "acme" })
|
|
@@ -328,9 +328,31 @@ export async function engineSearch(engineUrl, opts) {
|
|
|
328
328
|
*
|
|
329
329
|
* Caller must supply exactly one of `id` or `metadataContains`.
|
|
330
330
|
*
|
|
331
|
+
* Arena scope: the engine extracts the arena from `metadata_contains.arena`
|
|
332
|
+
* (see memory-engine `compat/server.py:1048-1052`). Top-level `arena` is
|
|
333
|
+
* NOT read by the engine — previous versions of this helper put it there
|
|
334
|
+
* and the resulting calls only ever wiped L6, leaving L0/L2/L3/L4 records
|
|
335
|
+
* untouched. The 2026-05-14 Pip dedup cutover surfaced the bug: an
|
|
336
|
+
* actor_user_id wipe returned 0 against an arena that personFacets
|
|
337
|
+
* confirmed held thousands of records. This helper now injects `arena`
|
|
338
|
+
* into `metadata_contains` so the engine forwards to L2 /forget-internal
|
|
339
|
+
* and actually wipes the cross-layer arena.
|
|
340
|
+
*
|
|
341
|
+
* By default the row is **user-scoped** (`arena = clientId:userId`) when
|
|
342
|
+
* `userId` is supplied, otherwise **tenant-wide** (`arena = clientId`).
|
|
343
|
+
* Pass `scope: "tenant"` explicitly to bypass the user-arena scope from a
|
|
344
|
+
* user-context. Matches `engineStore`'s arena semantics for symmetry.
|
|
345
|
+
*
|
|
346
|
+
* If the caller passes `arena` inside `metadataContains` themselves, the
|
|
347
|
+
* SDK respects it as-is and skips composition — useful for super-admin
|
|
348
|
+
* tools that need to wipe an arena other than the one derived from
|
|
349
|
+
* (clientId, userId).
|
|
350
|
+
*
|
|
331
351
|
* @param {string} engineUrl
|
|
332
352
|
* @param {object} opts
|
|
333
353
|
* @param {string} opts.clientId
|
|
354
|
+
* @param {string} [opts.userId] user id within the tenant; controls default scope
|
|
355
|
+
* @param {"tenant"|"user"} [opts.scope] override the default scope. "user" requires userId.
|
|
334
356
|
* @param {string} [opts.id] forget a single record by engine id
|
|
335
357
|
* @param {object} [opts.metadataContains] forget all records matching every key=value pair
|
|
336
358
|
* @param {Record<string,string>} [opts.headers] forwarded HTTP headers
|
|
@@ -338,15 +360,28 @@ export async function engineSearch(engineUrl, opts) {
|
|
|
338
360
|
* @returns {Promise<{deleted: number}>}
|
|
339
361
|
*/
|
|
340
362
|
export async function engineForget(engineUrl, opts) {
|
|
341
|
-
const { clientId, id, metadataContains, headers } = opts || {};
|
|
363
|
+
const { clientId, userId, scope, id, metadataContains, headers } = opts || {};
|
|
342
364
|
if (!clientId) throw new Error("engineForget: clientId required");
|
|
343
365
|
if (!id && !metadataContains) {
|
|
344
366
|
throw new Error("engineForget: provide id or metadataContains");
|
|
345
367
|
}
|
|
368
|
+
|
|
369
|
+
// Compose arena from (clientId, userId, scope) using the same shape
|
|
370
|
+
// engineStore uses. Caller-supplied `metadataContains.arena` wins —
|
|
371
|
+
// the SDK shouldn't second-guess a super-admin explicitly targeting
|
|
372
|
+
// a specific arena.
|
|
373
|
+
let mergedMetadata;
|
|
374
|
+
if (metadataContains) {
|
|
375
|
+
const hasExplicitArena =
|
|
376
|
+
typeof metadataContains.arena === "string" && metadataContains.arena;
|
|
377
|
+
mergedMetadata = hasExplicitArena
|
|
378
|
+
? metadataContains
|
|
379
|
+
: { ...metadataContains, arena: composeArena(clientId, userId, scope) };
|
|
380
|
+
}
|
|
381
|
+
|
|
346
382
|
const body = {
|
|
347
|
-
arena: clientId,
|
|
348
383
|
...(id ? { id } : {}),
|
|
349
|
-
...(
|
|
384
|
+
...(mergedMetadata ? { metadata_contains: mergedMetadata } : {}),
|
|
350
385
|
};
|
|
351
386
|
return fetchEngine(engineUrl, "/forget", body, { headers });
|
|
352
387
|
}
|
|
@@ -72,7 +72,22 @@ services:
|
|
|
72
72
|
environment:
|
|
73
73
|
NEO4J_AUTH: ${NEO4J_AUTH:-neo4j/local-dev-pw}
|
|
74
74
|
NEO4J_PLUGINS: '["apoc"]'
|
|
75
|
-
|
|
75
|
+
# Heap defaults were 512m hardcoded — fine for an empty dev
|
|
76
|
+
# graph, catastrophic at production scale. A 2026-05-14 prod
|
|
77
|
+
# incident on a ~10M-relationship KG saw L3 sit at >600% CPU
|
|
78
|
+
# locked in parallel GC, blocking the L2 write fan-out and
|
|
79
|
+
# triggering cascading 5xx through L6 and the embed gateway.
|
|
80
|
+
# The graph fit in RAM fine; the JVM just had nowhere to put
|
|
81
|
+
# short-lived allocations.
|
|
82
|
+
#
|
|
83
|
+
# Defaults now sized for a small-but-realistic local graph
|
|
84
|
+
# (~1M relationships): 1g heap + 256m initial + 512m pagecache.
|
|
85
|
+
# Production deployments override via PME_L3_HEAP_MAX etc.
|
|
86
|
+
# (the AWS overlay sets 4g/1g/1g — see thing-event-system
|
|
87
|
+
# modules/pentatonic-memory/deploy/docker-compose.aws.yml).
|
|
88
|
+
NEO4J_dbms_memory_heap_max__size: ${PME_L3_HEAP_MAX:-1g}
|
|
89
|
+
NEO4J_dbms_memory_heap_initial__size: ${PME_L3_HEAP_INITIAL:-256m}
|
|
90
|
+
NEO4J_dbms_memory_pagecache_size: ${PME_L3_PAGECACHE:-512m}
|
|
76
91
|
volumes:
|
|
77
92
|
- pme-l3-data:/data
|
|
78
93
|
healthcheck:
|
|
@@ -212,6 +212,9 @@ class EmbedClient:
|
|
|
212
212
|
timeout: float = 120.0,
|
|
213
213
|
env_prefix: str = "",
|
|
214
214
|
max_batch: int = 5,
|
|
215
|
+
max_retries: int = 3,
|
|
216
|
+
retry_base_delay: float = 0.1,
|
|
217
|
+
retry_max_delay: float = 1.0,
|
|
215
218
|
) -> None:
|
|
216
219
|
self._configured_provider = provider
|
|
217
220
|
self._provider = provider
|
|
@@ -229,6 +232,25 @@ class EmbedClient:
|
|
|
229
232
|
# cap observed on Pentatonic AI Gateway — above which it 502s and the
|
|
230
233
|
# caller silently loses vector writes (see test_chunking_* tests).
|
|
231
234
|
self._max_batch = max(0, max_batch)
|
|
235
|
+
# Retry-with-jitter for transient gateway saturation. The
|
|
236
|
+
# Pentatonic AI Gateway has a K≈10 concurrent-request cap; when
|
|
237
|
+
# multiple chunks of a single batch (or multiple concurrent
|
|
238
|
+
# batches from different layers) saturate it, individual POSTs
|
|
239
|
+
# 502/503. The 2026-05-15 incident showed an L6 fallback path
|
|
240
|
+
# 502-rate of 96% under Pip backfill load — every shared-embed
|
|
241
|
+
# failed, every per-layer fallback also failed, the cascade
|
|
242
|
+
# cleared only when traffic dropped.
|
|
243
|
+
#
|
|
244
|
+
# Retries with full jitter let those transient saturations
|
|
245
|
+
# absorb instead of cascading: when many concurrent chunks all
|
|
246
|
+
# 502 at once, jittered backoff staggers their retries so the
|
|
247
|
+
# gateway recovers slot-by-slot rather than thundering-herding.
|
|
248
|
+
# Tuned via {prefix}EMBED_MAX_RETRIES (default 3); set to 0
|
|
249
|
+
# to restore pre-fix behaviour. Only 429/502/503/504 are
|
|
250
|
+
# retried — auth + 4xx errors fail fast.
|
|
251
|
+
self._max_retries = max(0, max_retries)
|
|
252
|
+
self._retry_base_delay = max(0.0, retry_base_delay)
|
|
253
|
+
self._retry_max_delay = max(self._retry_base_delay, retry_max_delay)
|
|
232
254
|
|
|
233
255
|
# ------------------------------------------------------------------
|
|
234
256
|
# Construction
|
|
@@ -268,6 +290,13 @@ class EmbedClient:
|
|
|
268
290
|
autodetect = os.environ.get(f"{prefix}EMBED_AUTODETECT", "true").lower() == "true"
|
|
269
291
|
timeout = float(os.environ.get(f"{prefix}EMBED_TIMEOUT", "120"))
|
|
270
292
|
max_batch = int(os.environ.get(f"{prefix}EMBED_MAX_BATCH", "5"))
|
|
293
|
+
max_retries = int(os.environ.get(f"{prefix}EMBED_MAX_RETRIES", "3"))
|
|
294
|
+
retry_base_delay = float(
|
|
295
|
+
os.environ.get(f"{prefix}EMBED_RETRY_BASE_DELAY", "0.1")
|
|
296
|
+
)
|
|
297
|
+
retry_max_delay = float(
|
|
298
|
+
os.environ.get(f"{prefix}EMBED_RETRY_MAX_DELAY", "1.0")
|
|
299
|
+
)
|
|
271
300
|
|
|
272
301
|
provider = resolve_provider(provider_name, env_prefix=prefix)
|
|
273
302
|
return cls(
|
|
@@ -279,6 +308,9 @@ class EmbedClient:
|
|
|
279
308
|
timeout=timeout,
|
|
280
309
|
env_prefix=prefix,
|
|
281
310
|
max_batch=max_batch,
|
|
311
|
+
max_retries=max_retries,
|
|
312
|
+
retry_base_delay=retry_base_delay,
|
|
313
|
+
retry_max_delay=retry_max_delay,
|
|
282
314
|
)
|
|
283
315
|
|
|
284
316
|
# ------------------------------------------------------------------
|
|
@@ -369,41 +401,103 @@ class EmbedClient:
|
|
|
369
401
|
# Request paths
|
|
370
402
|
# ------------------------------------------------------------------
|
|
371
403
|
|
|
404
|
+
# Status codes that indicate transient gateway capacity issues
|
|
405
|
+
# (rate-limit, upstream saturation, transient unavailability,
|
|
406
|
+
# upstream timeout). 401 + other 4xx + non-listed 5xx fail fast —
|
|
407
|
+
# they typically indicate caller or config problems where retrying
|
|
408
|
+
# won't help.
|
|
409
|
+
_RETRYABLE_STATUS = frozenset({429, 502, 503, 504})
|
|
410
|
+
|
|
411
|
+
def _backoff_delay(self, attempt: int) -> float:
|
|
412
|
+
"""Exponential backoff with full jitter.
|
|
413
|
+
|
|
414
|
+
Full jitter (random.uniform(0, cap)) is preferred over equal
|
|
415
|
+
jitter for the embed gateway case: many concurrent chunks all
|
|
416
|
+
503 at the same instant, and full jitter maximally spreads
|
|
417
|
+
their retries so the gateway recovers slot-by-slot instead of
|
|
418
|
+
seeing periodic thundering herds.
|
|
419
|
+
"""
|
|
420
|
+
import random
|
|
421
|
+
cap = min(self._retry_base_delay * (2 ** attempt), self._retry_max_delay)
|
|
422
|
+
return random.uniform(0, cap)
|
|
423
|
+
|
|
372
424
|
def _post_with_autodetect(self, texts: list[str], *, async_mode: bool) -> list[list[float]]:
|
|
373
425
|
del async_mode # kept for symmetry; sync path is its own method
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
426
|
+
import time as _time
|
|
427
|
+
last_exc: EmbedHTTPError | None = None
|
|
428
|
+
for attempt in range(self._max_retries + 1):
|
|
429
|
+
body = self._provider.body_builder(texts, self._model)
|
|
430
|
+
headers = self._headers(self._provider)
|
|
431
|
+
try:
|
|
432
|
+
r = httpx.post(
|
|
433
|
+
self._url, json=body, headers=headers, timeout=self._timeout
|
|
434
|
+
)
|
|
435
|
+
except httpx.HTTPError as exc:
|
|
436
|
+
# Network-level error (DNS, connect refused, timeout).
|
|
437
|
+
# Treat as retryable — transient network blips are
|
|
438
|
+
# exactly what jittered retry is designed to absorb.
|
|
439
|
+
last_exc = EmbedHTTPError(0, str(exc))
|
|
440
|
+
if attempt >= self._max_retries:
|
|
441
|
+
raise last_exc from exc
|
|
442
|
+
_time.sleep(self._backoff_delay(attempt))
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
if r.status_code == 401 and self._autodetect and not self._detected:
|
|
446
|
+
# Autodetect runs at most once (gated by self._detected)
|
|
447
|
+
# and tries other providers in sequence; no retry layer
|
|
448
|
+
# needed on top.
|
|
449
|
+
return self._autodetect_and_retry(texts, last_body=r.text)
|
|
450
|
+
if r.status_code == 401:
|
|
451
|
+
raise EmbedAuthError(r.text)
|
|
452
|
+
if not r.is_success:
|
|
453
|
+
if (
|
|
454
|
+
r.status_code in self._RETRYABLE_STATUS
|
|
455
|
+
and attempt < self._max_retries
|
|
456
|
+
):
|
|
457
|
+
last_exc = EmbedHTTPError(r.status_code, r.text)
|
|
458
|
+
_time.sleep(self._backoff_delay(attempt))
|
|
459
|
+
continue
|
|
460
|
+
raise EmbedHTTPError(r.status_code, r.text)
|
|
461
|
+
return self._provider.response_parser(r.json())
|
|
462
|
+
|
|
463
|
+
# Loop exited without success or raise — shouldn't happen, but
|
|
464
|
+
# keep the type checker happy.
|
|
465
|
+
assert last_exc is not None
|
|
466
|
+
raise last_exc
|
|
389
467
|
|
|
390
468
|
async def _post_with_autodetect_async(self, texts: list[str]) -> list[list[float]]:
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
469
|
+
import asyncio as _asyncio
|
|
470
|
+
last_exc: EmbedHTTPError | None = None
|
|
471
|
+
for attempt in range(self._max_retries + 1):
|
|
472
|
+
body = self._provider.body_builder(texts, self._model)
|
|
473
|
+
headers = self._headers(self._provider)
|
|
474
|
+
try:
|
|
475
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
476
|
+
r = await client.post(self._url, json=body, headers=headers)
|
|
477
|
+
except httpx.HTTPError as exc:
|
|
478
|
+
last_exc = EmbedHTTPError(0, str(exc))
|
|
479
|
+
if attempt >= self._max_retries:
|
|
480
|
+
raise last_exc from exc
|
|
481
|
+
await _asyncio.sleep(self._backoff_delay(attempt))
|
|
482
|
+
continue
|
|
483
|
+
|
|
484
|
+
if r.status_code == 401 and self._autodetect and not self._detected:
|
|
485
|
+
return await self._autodetect_and_retry_async(texts, last_body=r.text)
|
|
486
|
+
if r.status_code == 401:
|
|
487
|
+
raise EmbedAuthError(r.text)
|
|
488
|
+
if not r.is_success:
|
|
489
|
+
if (
|
|
490
|
+
r.status_code in self._RETRYABLE_STATUS
|
|
491
|
+
and attempt < self._max_retries
|
|
492
|
+
):
|
|
493
|
+
last_exc = EmbedHTTPError(r.status_code, r.text)
|
|
494
|
+
await _asyncio.sleep(self._backoff_delay(attempt))
|
|
495
|
+
continue
|
|
496
|
+
raise EmbedHTTPError(r.status_code, r.text)
|
|
497
|
+
return self._provider.response_parser(r.json())
|
|
498
|
+
|
|
499
|
+
assert last_exc is not None
|
|
500
|
+
raise last_exc
|
|
407
501
|
|
|
408
502
|
# ------------------------------------------------------------------
|
|
409
503
|
# Auto-detect
|
|
@@ -268,6 +268,9 @@ def test_autodetect_all_fail_raises(recorder):
|
|
|
268
268
|
# ----------------------------------------------------------------------
|
|
269
269
|
|
|
270
270
|
def test_non_401_http_error_does_not_trigger_autodetect(recorder):
|
|
271
|
+
# max_retries=0 isolates this test to autodetect behaviour. With
|
|
272
|
+
# retries enabled (default), 503 triggers the retry path which is
|
|
273
|
+
# exercised separately in the retry tests below.
|
|
271
274
|
recorder.respond(
|
|
272
275
|
"https://gw/v1/embeddings",
|
|
273
276
|
_FakeResponse(503, "upstream down"),
|
|
@@ -277,6 +280,7 @@ def test_non_401_http_error_does_not_trigger_autodetect(recorder):
|
|
|
277
280
|
api_key="k",
|
|
278
281
|
model="m",
|
|
279
282
|
provider=PROVIDERS["openai"],
|
|
283
|
+
max_retries=0,
|
|
280
284
|
)
|
|
281
285
|
with pytest.raises(EmbedHTTPError) as exc:
|
|
282
286
|
client.embed_batch(["x"])
|
|
@@ -490,3 +494,200 @@ def test_from_env_default_max_batch_is_five(monkeypatch):
|
|
|
490
494
|
client.embed_batch([f"t{i}" for i in range(10)])
|
|
491
495
|
# 10 with default chunk=5 → [5, 5] → 2 calls
|
|
492
496
|
assert len(stub.calls) == 2
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
# ----------------------------------------------------------------------
|
|
500
|
+
# Retry-with-jitter on transient gateway saturation (502/503/504/429)
|
|
501
|
+
# ----------------------------------------------------------------------
|
|
502
|
+
#
|
|
503
|
+
# These tests exercise the retry path added 2026-05-15. Motivation:
|
|
504
|
+
# the Pentatonic AI Gateway has a K≈10 concurrency cap and 502s under
|
|
505
|
+
# saturation; without retry, a single 502 cascades through the engine's
|
|
506
|
+
# per-layer fallback path and amplifies load instead of damping it.
|
|
507
|
+
# See the prod incident note on EmbedClient.__init__ for context.
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
class _SequencedRecorder:
|
|
511
|
+
"""Returns a different response on each successive call.
|
|
512
|
+
|
|
513
|
+
The default `_Recorder` returns the same response every time, which
|
|
514
|
+
is wrong for retry tests — we need to verify "first call 502, then
|
|
515
|
+
succeed on retry". This recorder pops responses off a queue per
|
|
516
|
+
URL and falls back to the last response if the queue is empty
|
|
517
|
+
(matching the "persistent failure" test case naturally).
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
def __init__(self):
|
|
521
|
+
self.calls: list[dict] = []
|
|
522
|
+
self.queues: dict[str, list[_FakeResponse]] = {}
|
|
523
|
+
|
|
524
|
+
def queue(self, url: str, responses: list[_FakeResponse]) -> None:
|
|
525
|
+
self.queues[url] = list(responses)
|
|
526
|
+
|
|
527
|
+
def __call__(self, url, *, json, headers, timeout):
|
|
528
|
+
self.calls.append({"url": url, "json": json})
|
|
529
|
+
q = self.queues.get(url, [])
|
|
530
|
+
if not q:
|
|
531
|
+
return _FakeResponse(401, "no responses queued")
|
|
532
|
+
# Pop unless this is the last one — keep returning the tail so
|
|
533
|
+
# "all attempts fail" tests don't need to queue N copies.
|
|
534
|
+
return q.pop(0) if len(q) > 1 else q[0]
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
@pytest.fixture
|
|
538
|
+
def sequenced(monkeypatch):
|
|
539
|
+
rec = _SequencedRecorder()
|
|
540
|
+
monkeypatch.setattr(httpx, "post", rec)
|
|
541
|
+
# Avoid the test taking real wall time on backoff sleeps — patch
|
|
542
|
+
# time.sleep to no-op. The jitter calculation still runs, just
|
|
543
|
+
# without the actual delay.
|
|
544
|
+
import time as _time
|
|
545
|
+
monkeypatch.setattr(_time, "sleep", lambda _s: None)
|
|
546
|
+
return rec
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def test_retries_on_502_and_succeeds(sequenced):
|
|
550
|
+
sequenced.queue(
|
|
551
|
+
"https://gw/v1/embeddings",
|
|
552
|
+
[
|
|
553
|
+
_FakeResponse(502, "bad gateway"),
|
|
554
|
+
_FakeResponse(200, {"data": [{"embedding": [0.1, 0.2]}]}),
|
|
555
|
+
],
|
|
556
|
+
)
|
|
557
|
+
client = EmbedClient(
|
|
558
|
+
url="https://gw/v1/embeddings",
|
|
559
|
+
api_key="k",
|
|
560
|
+
model="m",
|
|
561
|
+
provider=PROVIDERS["openai"],
|
|
562
|
+
max_retries=3,
|
|
563
|
+
)
|
|
564
|
+
out = client.embed_batch(["hello"])
|
|
565
|
+
assert out == [[0.1, 0.2]]
|
|
566
|
+
# First call 502, second call 200 — exactly two attempts.
|
|
567
|
+
assert len(sequenced.calls) == 2
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def test_retries_on_503_504_429(sequenced):
|
|
571
|
+
"""Each transient code triggers the retry path the same way."""
|
|
572
|
+
for code in (503, 504, 429):
|
|
573
|
+
sequenced.calls.clear()
|
|
574
|
+
sequenced.queue(
|
|
575
|
+
"https://gw/v1/embeddings",
|
|
576
|
+
[
|
|
577
|
+
_FakeResponse(code, "transient"),
|
|
578
|
+
_FakeResponse(200, {"data": [{"embedding": [0.0]}]}),
|
|
579
|
+
],
|
|
580
|
+
)
|
|
581
|
+
client = EmbedClient(
|
|
582
|
+
url="https://gw/v1/embeddings",
|
|
583
|
+
api_key="k",
|
|
584
|
+
model="m",
|
|
585
|
+
provider=PROVIDERS["openai"],
|
|
586
|
+
max_retries=3,
|
|
587
|
+
)
|
|
588
|
+
out = client.embed_batch(["x"])
|
|
589
|
+
assert out == [[0.0]], f"retry failed for status {code}"
|
|
590
|
+
assert len(sequenced.calls) == 2, f"wrong call count for status {code}"
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def test_does_not_retry_on_500(sequenced):
|
|
594
|
+
"""500 is server-side bug, not transient saturation — fail fast."""
|
|
595
|
+
sequenced.queue(
|
|
596
|
+
"https://gw/v1/embeddings",
|
|
597
|
+
[_FakeResponse(500, "internal server error")],
|
|
598
|
+
)
|
|
599
|
+
client = EmbedClient(
|
|
600
|
+
url="https://gw/v1/embeddings",
|
|
601
|
+
api_key="k",
|
|
602
|
+
model="m",
|
|
603
|
+
provider=PROVIDERS["openai"],
|
|
604
|
+
max_retries=3,
|
|
605
|
+
)
|
|
606
|
+
with pytest.raises(EmbedHTTPError) as exc:
|
|
607
|
+
client.embed_batch(["x"])
|
|
608
|
+
assert exc.value.status == 500
|
|
609
|
+
# Exactly one attempt — no retry on 500.
|
|
610
|
+
assert len(sequenced.calls) == 1
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def test_does_not_retry_on_400(sequenced):
|
|
614
|
+
"""4xx (other than 401-autodetect / 429) indicates caller error."""
|
|
615
|
+
sequenced.queue(
|
|
616
|
+
"https://gw/v1/embeddings",
|
|
617
|
+
[_FakeResponse(400, "bad request")],
|
|
618
|
+
)
|
|
619
|
+
client = EmbedClient(
|
|
620
|
+
url="https://gw/v1/embeddings",
|
|
621
|
+
api_key="k",
|
|
622
|
+
model="m",
|
|
623
|
+
provider=PROVIDERS["openai"],
|
|
624
|
+
max_retries=3,
|
|
625
|
+
)
|
|
626
|
+
with pytest.raises(EmbedHTTPError) as exc:
|
|
627
|
+
client.embed_batch(["x"])
|
|
628
|
+
assert exc.value.status == 400
|
|
629
|
+
assert len(sequenced.calls) == 1
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def test_max_retries_exhausted_raises(sequenced):
|
|
633
|
+
"""Persistent 502 raises after max_retries+1 attempts."""
|
|
634
|
+
sequenced.queue(
|
|
635
|
+
"https://gw/v1/embeddings",
|
|
636
|
+
[_FakeResponse(502, "still down")],
|
|
637
|
+
)
|
|
638
|
+
client = EmbedClient(
|
|
639
|
+
url="https://gw/v1/embeddings",
|
|
640
|
+
api_key="k",
|
|
641
|
+
model="m",
|
|
642
|
+
provider=PROVIDERS["openai"],
|
|
643
|
+
max_retries=3,
|
|
644
|
+
)
|
|
645
|
+
with pytest.raises(EmbedHTTPError) as exc:
|
|
646
|
+
client.embed_batch(["x"])
|
|
647
|
+
assert exc.value.status == 502
|
|
648
|
+
# max_retries=3 → 1 original + 3 retries = 4 calls total.
|
|
649
|
+
assert len(sequenced.calls) == 4
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def test_max_retries_zero_disables_retry(sequenced):
|
|
653
|
+
"""Explicit opt-out preserves pre-fix behaviour for callers that
|
|
654
|
+
handle their own retry."""
|
|
655
|
+
sequenced.queue(
|
|
656
|
+
"https://gw/v1/embeddings",
|
|
657
|
+
[_FakeResponse(502, "down")],
|
|
658
|
+
)
|
|
659
|
+
client = EmbedClient(
|
|
660
|
+
url="https://gw/v1/embeddings",
|
|
661
|
+
api_key="k",
|
|
662
|
+
model="m",
|
|
663
|
+
provider=PROVIDERS["openai"],
|
|
664
|
+
max_retries=0,
|
|
665
|
+
)
|
|
666
|
+
with pytest.raises(EmbedHTTPError):
|
|
667
|
+
client.embed_batch(["x"])
|
|
668
|
+
assert len(sequenced.calls) == 1
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def test_from_env_reads_retry_config(monkeypatch):
|
|
672
|
+
"""{prefix}EMBED_MAX_RETRIES + EMBED_RETRY_BASE_DELAY +
|
|
673
|
+
EMBED_RETRY_MAX_DELAY override the defaults."""
|
|
674
|
+
monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
|
|
675
|
+
monkeypatch.setenv("L4_EMBED_API_KEY", "k")
|
|
676
|
+
monkeypatch.setenv("L4_EMBED_MAX_RETRIES", "5")
|
|
677
|
+
monkeypatch.setenv("L4_EMBED_RETRY_BASE_DELAY", "0.25")
|
|
678
|
+
monkeypatch.setenv("L4_EMBED_RETRY_MAX_DELAY", "2.5")
|
|
679
|
+
client = EmbedClient.from_env(prefix="L4_")
|
|
680
|
+
assert client._max_retries == 5
|
|
681
|
+
assert client._retry_base_delay == 0.25
|
|
682
|
+
assert client._retry_max_delay == 2.5
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def test_from_env_default_retry_config(monkeypatch):
|
|
686
|
+
"""Defaults: 3 retries, 100ms base, 1s cap — tuned for K≈10
|
|
687
|
+
gateway under burst load."""
|
|
688
|
+
monkeypatch.setenv("L4_NV_EMBED_URL", "https://gw/v1/embeddings")
|
|
689
|
+
monkeypatch.setenv("L4_EMBED_API_KEY", "k")
|
|
690
|
+
client = EmbedClient.from_env(prefix="L4_")
|
|
691
|
+
assert client._max_retries == 3
|
|
692
|
+
assert client._retry_base_delay == 0.1
|
|
693
|
+
assert client._retry_max_delay == 1.0
|