@pentatonic-ai/ai-agent-sdk 0.5.7 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -417,7 +417,156 @@ var Session = class {
417
417
  }
418
418
  };
419
419
 
420
+ // packages/memory/src/inject.js
421
+ var MAX_CHARS_PER_MEMORY = 1200;
422
+ function injectMemories(body, memories, provider) {
423
+ if (!memories || memories.length === 0)
424
+ return body;
425
+ const preamble = formatPreamble(memories);
426
+ if (provider === "anthropic") {
427
+ return injectAnthropic(body, preamble);
428
+ }
429
+ return injectOpenAI(body, preamble);
430
+ }
431
+ function formatPreamble(memories) {
432
+ const lines = ["<tes:context>"];
433
+ memories.forEach((m, i) => {
434
+ const sim = typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
435
+ const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
436
+ lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
437
+ });
438
+ lines.push("</tes:context>");
439
+ return lines.join("\n");
440
+ }
441
+ function injectAnthropic(body, preamble) {
442
+ const next = { ...body };
443
+ if (typeof body.system === "string") {
444
+ next.system = `${preamble}
445
+
446
+ ${body.system}`;
447
+ } else if (Array.isArray(body.system)) {
448
+ next.system = [{ type: "text", text: preamble }, ...body.system];
449
+ } else {
450
+ next.system = preamble;
451
+ }
452
+ return next;
453
+ }
454
+ function injectOpenAI(body, preamble) {
455
+ const messages = Array.isArray(body.messages) ? [...body.messages] : [];
456
+ if (messages.length > 0 && messages[0].role === "system") {
457
+ const existing = messages[0];
458
+ const existingContent = typeof existing.content === "string" ? existing.content : JSON.stringify(existing.content);
459
+ messages[0] = {
460
+ ...existing,
461
+ content: `${preamble}
462
+
463
+ ${existingContent}`
464
+ };
465
+ } else {
466
+ messages.unshift({ role: "system", content: preamble });
467
+ }
468
+ return { ...body, messages };
469
+ }
470
+
471
+ // packages/memory/src/hosted.js
472
+ var SEMANTIC_SEARCH_QUERY = `
473
+ query SemanticSearchMemories($clientId: String!, $query: String!, $limit: Int, $minScore: Float) {
474
+ semanticSearchMemories(clientId: $clientId, query: $query, limit: $limit, minScore: $minScore) {
475
+ id
476
+ content
477
+ similarity
478
+ }
479
+ }
480
+ `;
481
+ var DEFAULT_SEARCH_TIMEOUT_MS = 5e3;
482
+ var DEFAULT_SEARCH_LIMIT = 6;
483
+ var DEFAULT_SEARCH_MIN_SCORE = 0.55;
484
+ function normalizeConfig(config) {
485
+ if (!config)
486
+ throw new Error("hosted: config is required");
487
+ const endpoint = config.endpoint || config.tes_endpoint;
488
+ const clientId = config.clientId || config.tes_client_id;
489
+ const apiKey = config.apiKey || config.tes_api_key;
490
+ if (!endpoint || !clientId || !apiKey) {
491
+ throw new Error(
492
+ "hosted: config requires { endpoint, clientId, apiKey } (or legacy tes_* equivalents)"
493
+ );
494
+ }
495
+ return { endpoint, clientId, apiKey };
496
+ }
497
+ function buildHostedHeaders(config) {
498
+ const { clientId, apiKey } = normalizeConfig(config);
499
+ const headers = {
500
+ "Content-Type": "application/json",
501
+ "x-client-id": clientId
502
+ };
503
+ if (apiKey.startsWith("tes_")) {
504
+ headers["Authorization"] = `Bearer ${apiKey}`;
505
+ } else {
506
+ headers["x-service-key"] = apiKey;
507
+ }
508
+ return headers;
509
+ }
510
+ async function hostedSearch(config, query, opts = {}) {
511
+ if (!query)
512
+ return { memories: [], skipped: "no_query" };
513
+ let cfg;
514
+ try {
515
+ cfg = normalizeConfig(config);
516
+ } catch (err) {
517
+ return { memories: [], skipped: `config_error:${err.message}` };
518
+ }
519
+ const limit = opts.limit ?? DEFAULT_SEARCH_LIMIT;
520
+ const minScore = opts.minScore ?? DEFAULT_SEARCH_MIN_SCORE;
521
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_SEARCH_TIMEOUT_MS;
522
+ const controller = new AbortController();
523
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
524
+ let response;
525
+ try {
526
+ response = await fetch(`${cfg.endpoint}/api/graphql`, {
527
+ method: "POST",
528
+ headers: buildHostedHeaders(cfg),
529
+ body: JSON.stringify({
530
+ query: SEMANTIC_SEARCH_QUERY,
531
+ variables: { clientId: cfg.clientId, query, limit, minScore }
532
+ }),
533
+ signal: controller.signal
534
+ });
535
+ } catch (err) {
536
+ clearTimeout(timer);
537
+ return {
538
+ memories: [],
539
+ skipped: err.name === "AbortError" ? "tes_timeout" : "tes_unreachable"
540
+ };
541
+ }
542
+ clearTimeout(timer);
543
+ if (!response.ok) {
544
+ return { memories: [], skipped: `tes_http_${response.status}` };
545
+ }
546
+ let payload;
547
+ try {
548
+ payload = await response.json();
549
+ } catch {
550
+ return { memories: [], skipped: "tes_invalid_json" };
551
+ }
552
+ if (payload.errors?.length) {
553
+ const reason = payload.errors[0].message || "tes_graphql_error";
554
+ return { memories: [], skipped: `tes_graphql:${shortenReason(reason)}` };
555
+ }
556
+ return { memories: payload.data?.semanticSearchMemories || [] };
557
+ }
558
+ function shortenReason(msg) {
559
+ if (typeof msg !== "string")
560
+ return "unknown";
561
+ return msg.toLowerCase().replace(/[^a-z0-9]+/g, "_").slice(0, 60);
562
+ }
563
+
420
564
  // src/wrapper.js
565
+ var MEMORY_DEFAULTS = {
566
+ limit: 6,
567
+ minScore: 0.55,
568
+ timeoutMs: 800
569
+ };
421
570
  function detectClientType(client) {
422
571
  if (client?.chat?.completions?.create)
423
572
  return "openai";
@@ -427,6 +576,57 @@ function detectClientType(client) {
427
576
  return "workers-ai";
428
577
  return "unknown";
429
578
  }
579
+ function extractLastUserMessage(params, provider) {
580
+ const msgs = Array.isArray(params?.messages) ? params.messages : null;
581
+ if (!msgs)
582
+ return null;
583
+ for (let i = msgs.length - 1; i >= 0; i--) {
584
+ if (msgs[i].role === "user") {
585
+ const c = msgs[i].content;
586
+ if (typeof c === "string")
587
+ return c;
588
+ if (Array.isArray(c)) {
589
+ return c.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
590
+ }
591
+ }
592
+ }
593
+ return null;
594
+ }
595
+ async function maybeInjectMemories(clientConfig, sessionOpts, params, provider) {
596
+ if (sessionOpts.memory === false) {
597
+ return { params, injected: 0, skipped: "memory_disabled" };
598
+ }
599
+ if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
600
+ return { params, injected: 0, skipped: "no_tes_config" };
601
+ }
602
+ const userMessage = extractLastUserMessage(params, provider);
603
+ if (!userMessage) {
604
+ return { params, injected: 0, skipped: "no_user_message" };
605
+ }
606
+ const opts = { ...MEMORY_DEFAULTS, ...sessionOpts.memoryOpts || {} };
607
+ const { memories, skipped } = await hostedSearch(
608
+ {
609
+ endpoint: clientConfig.endpoint,
610
+ clientId: clientConfig.clientId,
611
+ apiKey: clientConfig.apiKey
612
+ },
613
+ userMessage,
614
+ opts
615
+ );
616
+ if (!memories?.length) {
617
+ return { params, injected: 0, skipped: skipped || "no_memories" };
618
+ }
619
+ return {
620
+ params: injectMemories(params, memories, provider),
621
+ injected: memories.length,
622
+ skipped: null
623
+ };
624
+ }
625
+ function recordMemoryStats(sessionOpts, stats) {
626
+ if (sessionOpts._session) {
627
+ sessionOpts._session._lastMemoryStats = stats;
628
+ }
629
+ }
430
630
  function wrapClient(clientConfig, client, sessionOpts = {}) {
431
631
  sessionOpts._resolvedSessionId = sessionOpts.sessionId || crypto.randomUUID();
432
632
  sessionOpts._session = new Session(clientConfig, {
@@ -478,7 +678,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
478
678
  get(target, prop) {
479
679
  if (prop === "create") {
480
680
  return async (params) => {
481
- const result = await target.create(params);
681
+ const memStats = await maybeInjectMemories(
682
+ clientConfig,
683
+ sessionOpts,
684
+ params,
685
+ "openai"
686
+ );
687
+ recordMemoryStats(sessionOpts, memStats);
688
+ const result = await target.create(memStats.params);
482
689
  const content = result.choices?.[0]?.message?.content;
483
690
  if (content) {
484
691
  result.choices[0].message.content = await rewriteUrls(
@@ -491,7 +698,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
491
698
  fireAndForgetEmit(
492
699
  clientConfig,
493
700
  sessionOpts,
494
- params.messages,
701
+ memStats.params.messages,
495
702
  result
496
703
  );
497
704
  return result;
@@ -537,7 +744,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
537
744
  get(target, prop) {
538
745
  if (prop === "create") {
539
746
  return async (params) => {
540
- const result = await target.create(params);
747
+ const memStats = await maybeInjectMemories(
748
+ clientConfig,
749
+ sessionOpts,
750
+ params,
751
+ "anthropic"
752
+ );
753
+ recordMemoryStats(sessionOpts, memStats);
754
+ const result = await target.create(memStats.params);
541
755
  if (Array.isArray(result.content)) {
542
756
  for (const block of result.content) {
543
757
  if (block.type === "text" && block.text) {
@@ -553,7 +767,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
553
767
  fireAndForgetEmit(
554
768
  clientConfig,
555
769
  sessionOpts,
556
- params.messages,
770
+ memStats.params.messages,
557
771
  result
558
772
  );
559
773
  return result;
@@ -579,7 +793,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
579
793
  get(target, prop) {
580
794
  if (prop === "run") {
581
795
  return async (model, params, ...rest) => {
582
- const result = await target.run(model, params, ...rest);
796
+ const memStats = await maybeInjectMemories(
797
+ clientConfig,
798
+ sessionOpts,
799
+ params,
800
+ "workers-ai"
801
+ );
802
+ recordMemoryStats(sessionOpts, memStats);
803
+ const result = await target.run(model, memStats.params, ...rest);
583
804
  if (result.response) {
584
805
  result.response = await rewriteUrls(
585
806
  result.response,
@@ -591,7 +812,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
591
812
  fireAndForgetEmit(
592
813
  clientConfig,
593
814
  sessionOpts,
594
- params?.messages,
815
+ memStats.params?.messages,
595
816
  result,
596
817
  model
597
818
  );
@@ -764,8 +985,23 @@ var TESClient = class {
764
985
  session(opts) {
765
986
  return new Session(this._config, opts);
766
987
  }
767
- wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
988
+ wrap(client, {
989
+ sessionId,
990
+ userId,
991
+ metadata,
992
+ autoEmit = true,
993
+ waitUntil,
994
+ memory,
995
+ memoryOpts
996
+ } = {}) {
768
997
  const config = userId ? { ...this._config, userId } : this._config;
769
- return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
998
+ return wrapClient(config, client, {
999
+ sessionId,
1000
+ metadata,
1001
+ autoEmit,
1002
+ waitUntil,
1003
+ memory,
1004
+ memoryOpts
1005
+ });
770
1006
  }
771
1007
  };
package/dist/index.js CHANGED
@@ -386,7 +386,156 @@ var Session = class {
386
386
  }
387
387
  };
388
388
 
389
+ // packages/memory/src/inject.js
390
+ var MAX_CHARS_PER_MEMORY = 1200;
391
+ function injectMemories(body, memories, provider) {
392
+ if (!memories || memories.length === 0)
393
+ return body;
394
+ const preamble = formatPreamble(memories);
395
+ if (provider === "anthropic") {
396
+ return injectAnthropic(body, preamble);
397
+ }
398
+ return injectOpenAI(body, preamble);
399
+ }
400
+ function formatPreamble(memories) {
401
+ const lines = ["<tes:context>"];
402
+ memories.forEach((m, i) => {
403
+ const sim = typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
404
+ const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
405
+ lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
406
+ });
407
+ lines.push("</tes:context>");
408
+ return lines.join("\n");
409
+ }
410
+ function injectAnthropic(body, preamble) {
411
+ const next = { ...body };
412
+ if (typeof body.system === "string") {
413
+ next.system = `${preamble}
414
+
415
+ ${body.system}`;
416
+ } else if (Array.isArray(body.system)) {
417
+ next.system = [{ type: "text", text: preamble }, ...body.system];
418
+ } else {
419
+ next.system = preamble;
420
+ }
421
+ return next;
422
+ }
423
+ function injectOpenAI(body, preamble) {
424
+ const messages = Array.isArray(body.messages) ? [...body.messages] : [];
425
+ if (messages.length > 0 && messages[0].role === "system") {
426
+ const existing = messages[0];
427
+ const existingContent = typeof existing.content === "string" ? existing.content : JSON.stringify(existing.content);
428
+ messages[0] = {
429
+ ...existing,
430
+ content: `${preamble}
431
+
432
+ ${existingContent}`
433
+ };
434
+ } else {
435
+ messages.unshift({ role: "system", content: preamble });
436
+ }
437
+ return { ...body, messages };
438
+ }
439
+
440
+ // packages/memory/src/hosted.js
441
+ var SEMANTIC_SEARCH_QUERY = `
442
+ query SemanticSearchMemories($clientId: String!, $query: String!, $limit: Int, $minScore: Float) {
443
+ semanticSearchMemories(clientId: $clientId, query: $query, limit: $limit, minScore: $minScore) {
444
+ id
445
+ content
446
+ similarity
447
+ }
448
+ }
449
+ `;
450
+ var DEFAULT_SEARCH_TIMEOUT_MS = 5e3;
451
+ var DEFAULT_SEARCH_LIMIT = 6;
452
+ var DEFAULT_SEARCH_MIN_SCORE = 0.55;
453
+ function normalizeConfig(config) {
454
+ if (!config)
455
+ throw new Error("hosted: config is required");
456
+ const endpoint = config.endpoint || config.tes_endpoint;
457
+ const clientId = config.clientId || config.tes_client_id;
458
+ const apiKey = config.apiKey || config.tes_api_key;
459
+ if (!endpoint || !clientId || !apiKey) {
460
+ throw new Error(
461
+ "hosted: config requires { endpoint, clientId, apiKey } (or legacy tes_* equivalents)"
462
+ );
463
+ }
464
+ return { endpoint, clientId, apiKey };
465
+ }
466
+ function buildHostedHeaders(config) {
467
+ const { clientId, apiKey } = normalizeConfig(config);
468
+ const headers = {
469
+ "Content-Type": "application/json",
470
+ "x-client-id": clientId
471
+ };
472
+ if (apiKey.startsWith("tes_")) {
473
+ headers["Authorization"] = `Bearer ${apiKey}`;
474
+ } else {
475
+ headers["x-service-key"] = apiKey;
476
+ }
477
+ return headers;
478
+ }
479
+ async function hostedSearch(config, query, opts = {}) {
480
+ if (!query)
481
+ return { memories: [], skipped: "no_query" };
482
+ let cfg;
483
+ try {
484
+ cfg = normalizeConfig(config);
485
+ } catch (err) {
486
+ return { memories: [], skipped: `config_error:${err.message}` };
487
+ }
488
+ const limit = opts.limit ?? DEFAULT_SEARCH_LIMIT;
489
+ const minScore = opts.minScore ?? DEFAULT_SEARCH_MIN_SCORE;
490
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_SEARCH_TIMEOUT_MS;
491
+ const controller = new AbortController();
492
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
493
+ let response;
494
+ try {
495
+ response = await fetch(`${cfg.endpoint}/api/graphql`, {
496
+ method: "POST",
497
+ headers: buildHostedHeaders(cfg),
498
+ body: JSON.stringify({
499
+ query: SEMANTIC_SEARCH_QUERY,
500
+ variables: { clientId: cfg.clientId, query, limit, minScore }
501
+ }),
502
+ signal: controller.signal
503
+ });
504
+ } catch (err) {
505
+ clearTimeout(timer);
506
+ return {
507
+ memories: [],
508
+ skipped: err.name === "AbortError" ? "tes_timeout" : "tes_unreachable"
509
+ };
510
+ }
511
+ clearTimeout(timer);
512
+ if (!response.ok) {
513
+ return { memories: [], skipped: `tes_http_${response.status}` };
514
+ }
515
+ let payload;
516
+ try {
517
+ payload = await response.json();
518
+ } catch {
519
+ return { memories: [], skipped: "tes_invalid_json" };
520
+ }
521
+ if (payload.errors?.length) {
522
+ const reason = payload.errors[0].message || "tes_graphql_error";
523
+ return { memories: [], skipped: `tes_graphql:${shortenReason(reason)}` };
524
+ }
525
+ return { memories: payload.data?.semanticSearchMemories || [] };
526
+ }
527
+ function shortenReason(msg) {
528
+ if (typeof msg !== "string")
529
+ return "unknown";
530
+ return msg.toLowerCase().replace(/[^a-z0-9]+/g, "_").slice(0, 60);
531
+ }
532
+
389
533
  // src/wrapper.js
534
+ var MEMORY_DEFAULTS = {
535
+ limit: 6,
536
+ minScore: 0.55,
537
+ timeoutMs: 800
538
+ };
390
539
  function detectClientType(client) {
391
540
  if (client?.chat?.completions?.create)
392
541
  return "openai";
@@ -396,6 +545,57 @@ function detectClientType(client) {
396
545
  return "workers-ai";
397
546
  return "unknown";
398
547
  }
548
+ function extractLastUserMessage(params, provider) {
549
+ const msgs = Array.isArray(params?.messages) ? params.messages : null;
550
+ if (!msgs)
551
+ return null;
552
+ for (let i = msgs.length - 1; i >= 0; i--) {
553
+ if (msgs[i].role === "user") {
554
+ const c = msgs[i].content;
555
+ if (typeof c === "string")
556
+ return c;
557
+ if (Array.isArray(c)) {
558
+ return c.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
559
+ }
560
+ }
561
+ }
562
+ return null;
563
+ }
564
+ async function maybeInjectMemories(clientConfig, sessionOpts, params, provider) {
565
+ if (sessionOpts.memory === false) {
566
+ return { params, injected: 0, skipped: "memory_disabled" };
567
+ }
568
+ if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
569
+ return { params, injected: 0, skipped: "no_tes_config" };
570
+ }
571
+ const userMessage = extractLastUserMessage(params, provider);
572
+ if (!userMessage) {
573
+ return { params, injected: 0, skipped: "no_user_message" };
574
+ }
575
+ const opts = { ...MEMORY_DEFAULTS, ...sessionOpts.memoryOpts || {} };
576
+ const { memories, skipped } = await hostedSearch(
577
+ {
578
+ endpoint: clientConfig.endpoint,
579
+ clientId: clientConfig.clientId,
580
+ apiKey: clientConfig.apiKey
581
+ },
582
+ userMessage,
583
+ opts
584
+ );
585
+ if (!memories?.length) {
586
+ return { params, injected: 0, skipped: skipped || "no_memories" };
587
+ }
588
+ return {
589
+ params: injectMemories(params, memories, provider),
590
+ injected: memories.length,
591
+ skipped: null
592
+ };
593
+ }
594
+ function recordMemoryStats(sessionOpts, stats) {
595
+ if (sessionOpts._session) {
596
+ sessionOpts._session._lastMemoryStats = stats;
597
+ }
598
+ }
399
599
  function wrapClient(clientConfig, client, sessionOpts = {}) {
400
600
  sessionOpts._resolvedSessionId = sessionOpts.sessionId || crypto.randomUUID();
401
601
  sessionOpts._session = new Session(clientConfig, {
@@ -447,7 +647,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
447
647
  get(target, prop) {
448
648
  if (prop === "create") {
449
649
  return async (params) => {
450
- const result = await target.create(params);
650
+ const memStats = await maybeInjectMemories(
651
+ clientConfig,
652
+ sessionOpts,
653
+ params,
654
+ "openai"
655
+ );
656
+ recordMemoryStats(sessionOpts, memStats);
657
+ const result = await target.create(memStats.params);
451
658
  const content = result.choices?.[0]?.message?.content;
452
659
  if (content) {
453
660
  result.choices[0].message.content = await rewriteUrls(
@@ -460,7 +667,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
460
667
  fireAndForgetEmit(
461
668
  clientConfig,
462
669
  sessionOpts,
463
- params.messages,
670
+ memStats.params.messages,
464
671
  result
465
672
  );
466
673
  return result;
@@ -506,7 +713,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
506
713
  get(target, prop) {
507
714
  if (prop === "create") {
508
715
  return async (params) => {
509
- const result = await target.create(params);
716
+ const memStats = await maybeInjectMemories(
717
+ clientConfig,
718
+ sessionOpts,
719
+ params,
720
+ "anthropic"
721
+ );
722
+ recordMemoryStats(sessionOpts, memStats);
723
+ const result = await target.create(memStats.params);
510
724
  if (Array.isArray(result.content)) {
511
725
  for (const block of result.content) {
512
726
  if (block.type === "text" && block.text) {
@@ -522,7 +736,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
522
736
  fireAndForgetEmit(
523
737
  clientConfig,
524
738
  sessionOpts,
525
- params.messages,
739
+ memStats.params.messages,
526
740
  result
527
741
  );
528
742
  return result;
@@ -548,7 +762,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
548
762
  get(target, prop) {
549
763
  if (prop === "run") {
550
764
  return async (model, params, ...rest) => {
551
- const result = await target.run(model, params, ...rest);
765
+ const memStats = await maybeInjectMemories(
766
+ clientConfig,
767
+ sessionOpts,
768
+ params,
769
+ "workers-ai"
770
+ );
771
+ recordMemoryStats(sessionOpts, memStats);
772
+ const result = await target.run(model, memStats.params, ...rest);
552
773
  if (result.response) {
553
774
  result.response = await rewriteUrls(
554
775
  result.response,
@@ -560,7 +781,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
560
781
  fireAndForgetEmit(
561
782
  clientConfig,
562
783
  sessionOpts,
563
- params?.messages,
784
+ memStats.params?.messages,
564
785
  result,
565
786
  model
566
787
  );
@@ -733,9 +954,24 @@ var TESClient = class {
733
954
  session(opts) {
734
955
  return new Session(this._config, opts);
735
956
  }
736
- wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
957
+ wrap(client, {
958
+ sessionId,
959
+ userId,
960
+ metadata,
961
+ autoEmit = true,
962
+ waitUntil,
963
+ memory,
964
+ memoryOpts
965
+ } = {}) {
737
966
  const config = userId ? { ...this._config, userId } : this._config;
738
- return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
967
+ return wrapClient(config, client, {
968
+ sessionId,
969
+ metadata,
970
+ autoEmit,
971
+ waitUntil,
972
+ memory,
973
+ memoryOpts
974
+ });
739
975
  }
740
976
  };
741
977
  export {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@pentatonic-ai/ai-agent-sdk",
3
- "version": "0.5.7",
4
- "description": "TES SDK \u2014 LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
3
+ "version": "0.5.8",
4
+ "description": "TES SDK LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
7
7
  "module": "./dist/index.js",
@@ -370,3 +370,10 @@ function shortenReason(msg) {
370
370
  .replace(/[^a-z0-9]+/g, "_")
371
371
  .slice(0, 60);
372
372
  }
373
+
374
+ // Re-export the system-message injector so callers that import the
375
+ // hosted module get the full memory-augmentation surface in one place.
376
+ // Keeping the implementation in `./inject.js` lets non-hosted consumers
377
+ // (e.g. a future "augment a request body" helper that doesn't talk to
378
+ // TES) reuse it without pulling in the GraphQL surface.
379
+ export { injectMemories } from "./inject.js";
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Memory injection — formats retrieved memories as a system-message preamble
3
+ * and merges them into the upstream request body.
4
+ *
5
+ * Why a preamble (not a separate user-turn or tool-result):
6
+ * - Customer's existing system prompt is preserved verbatim, just appended.
7
+ * - Anthropic and OpenAI both treat system content as cache-friendly.
8
+ * - No conversation-history mutation — replays remain reproducible.
9
+ *
10
+ * Format:
11
+ * <tes:context>
12
+ * [1] (similarity 0.82) memory text...
13
+ * [2] (similarity 0.71) memory text...
14
+ * </tes:context>
15
+ *
16
+ * The XML-ish wrapper makes it trivial for the model to ignore on demand
17
+ * and trivial for an evaluator to strip when measuring quality deltas.
18
+ */
19
+
20
+ const MAX_CHARS_PER_MEMORY = 1200;
21
+
22
+ /**
23
+ * @param {object} body — upstream request body, mutated copy returned
24
+ * @param {Array<{id, content, similarity}>} memories
25
+ * @param {"anthropic"|"openai"} provider
26
+ * @returns {object} new body
27
+ */
28
+ export function injectMemories(body, memories, provider) {
29
+ if (!memories || memories.length === 0) return body;
30
+
31
+ const preamble = formatPreamble(memories);
32
+
33
+ if (provider === "anthropic") {
34
+ return injectAnthropic(body, preamble);
35
+ }
36
+ return injectOpenAI(body, preamble);
37
+ }
38
+
39
+ function formatPreamble(memories) {
40
+ const lines = ["<tes:context>"];
41
+ memories.forEach((m, i) => {
42
+ const sim =
43
+ typeof m.similarity === "number" ? m.similarity.toFixed(2) : "?";
44
+ const content = (m.content || "").slice(0, MAX_CHARS_PER_MEMORY);
45
+ lines.push(`[${i + 1}] (similarity ${sim}) ${content}`);
46
+ });
47
+ lines.push("</tes:context>");
48
+ return lines.join("\n");
49
+ }
50
+
51
+ function injectAnthropic(body, preamble) {
52
+ // Anthropic accepts `system` as either a string OR an array of content
53
+ // blocks. Preserve whichever shape the customer sent.
54
+ const next = { ...body };
55
+ if (typeof body.system === "string") {
56
+ next.system = `${preamble}\n\n${body.system}`;
57
+ } else if (Array.isArray(body.system)) {
58
+ next.system = [{ type: "text", text: preamble }, ...body.system];
59
+ } else {
60
+ next.system = preamble;
61
+ }
62
+ return next;
63
+ }
64
+
65
+ function injectOpenAI(body, preamble) {
66
+ // OpenAI carries the system prompt as the first message with role:'system'.
67
+ // If one exists we prepend; otherwise we insert a fresh one at index 0.
68
+ const messages = Array.isArray(body.messages) ? [...body.messages] : [];
69
+ if (messages.length > 0 && messages[0].role === "system") {
70
+ const existing = messages[0];
71
+ const existingContent =
72
+ typeof existing.content === "string"
73
+ ? existing.content
74
+ : JSON.stringify(existing.content);
75
+ messages[0] = {
76
+ ...existing,
77
+ content: `${preamble}\n\n${existingContent}`,
78
+ };
79
+ } else {
80
+ messages.unshift({ role: "system", content: preamble });
81
+ }
82
+ return { ...body, messages };
83
+ }
package/src/client.js CHANGED
@@ -56,8 +56,26 @@ export class TESClient {
56
56
  return new Session(this._config, opts);
57
57
  }
58
58
 
59
- wrap(client, { sessionId, userId, metadata, autoEmit = true, waitUntil } = {}) {
59
+ wrap(
60
+ client,
61
+ {
62
+ sessionId,
63
+ userId,
64
+ metadata,
65
+ autoEmit = true,
66
+ waitUntil,
67
+ memory,
68
+ memoryOpts,
69
+ } = {}
70
+ ) {
60
71
  const config = userId ? { ...this._config, userId } : this._config;
61
- return wrapClient(config, client, { sessionId, metadata, autoEmit, waitUntil });
72
+ return wrapClient(config, client, {
73
+ sessionId,
74
+ metadata,
75
+ autoEmit,
76
+ waitUntil,
77
+ memory,
78
+ memoryOpts,
79
+ });
62
80
  }
63
81
  }
package/src/wrapper.js CHANGED
@@ -1,6 +1,18 @@
1
1
  import { Session } from "./session.js";
2
2
  import { normalizeResponse } from "./normalizer.js";
3
3
  import { rewriteUrls } from "./tracking.js";
4
+ import {
5
+ hostedSearch,
6
+ injectMemories,
7
+ } from "../packages/memory/src/hosted.js";
8
+
9
+ // Default memory-injection knobs. Match the proxy's defaults so SDK and
10
+ // proxy customers see identical retrieval behaviour.
11
+ const MEMORY_DEFAULTS = {
12
+ limit: 6,
13
+ minScore: 0.55,
14
+ timeoutMs: 800,
15
+ };
4
16
 
5
17
  /**
6
18
  * Detect the client type by duck-typing its shape.
@@ -12,6 +24,96 @@ function detectClientType(client) {
12
24
  return "unknown";
13
25
  }
14
26
 
27
+ /**
28
+ * Pull the last user message from a request body. Anthropic + OpenAI both
29
+ * carry messages on `params.messages`; Workers AI may also use
30
+ * `params.prompt` or `params.input_text`. Returns null when nothing usable
31
+ * is present (e.g. embedding call, empty prompt) so memory retrieval is
32
+ * skipped cleanly.
33
+ */
34
+ function extractLastUserMessage(params, provider) {
35
+ // Only messages-shaped requests are eligible for system-prompt injection.
36
+ // Workers AI prompt-style calls (`{ prompt: "..." }`) are passed through
37
+ // unchanged — there's no clean place to insert memory context without
38
+ // changing the request shape, and we never want to surprise the caller
39
+ // by mutating their prompt string.
40
+ void provider;
41
+ const msgs = Array.isArray(params?.messages) ? params.messages : null;
42
+ if (!msgs) return null;
43
+ for (let i = msgs.length - 1; i >= 0; i--) {
44
+ if (msgs[i].role === "user") {
45
+ const c = msgs[i].content;
46
+ if (typeof c === "string") return c;
47
+ if (Array.isArray(c)) {
48
+ return c
49
+ .filter((p) => p.type === "text" && typeof p.text === "string")
50
+ .map((p) => p.text)
51
+ .join("\n");
52
+ }
53
+ }
54
+ }
55
+ return null;
56
+ }
57
+
58
+ /**
59
+ * Inject memories from TES into request params before the LLM call.
60
+ *
61
+ * Default-on. Disable per-wrapClient via `sessionOpts.memory: false` or
62
+ * per-call via `sessionOpts.memoryOpts.disable: true`. Knobs come from
63
+ * `sessionOpts.memoryOpts` (`limit`, `minScore`, `timeoutMs`).
64
+ *
65
+ * Failure modes (TES timeout, module disabled, network error) are
66
+ * non-fatal — the call proceeds with the customer's original params and
67
+ * the skip reason is recorded on the session under `_lastMemoryStats`
68
+ * for observability.
69
+ */
70
+ async function maybeInjectMemories(
71
+ clientConfig,
72
+ sessionOpts,
73
+ params,
74
+ provider
75
+ ) {
76
+ if (sessionOpts.memory === false) {
77
+ return { params, injected: 0, skipped: "memory_disabled" };
78
+ }
79
+
80
+ if (!clientConfig?.endpoint || !clientConfig?.apiKey) {
81
+ return { params, injected: 0, skipped: "no_tes_config" };
82
+ }
83
+
84
+ const userMessage = extractLastUserMessage(params, provider);
85
+ if (!userMessage) {
86
+ return { params, injected: 0, skipped: "no_user_message" };
87
+ }
88
+
89
+ const opts = { ...MEMORY_DEFAULTS, ...(sessionOpts.memoryOpts || {}) };
90
+ const { memories, skipped } = await hostedSearch(
91
+ {
92
+ endpoint: clientConfig.endpoint,
93
+ clientId: clientConfig.clientId,
94
+ apiKey: clientConfig.apiKey,
95
+ },
96
+ userMessage,
97
+ opts
98
+ );
99
+
100
+ if (!memories?.length) {
101
+ return { params, injected: 0, skipped: skipped || "no_memories" };
102
+ }
103
+
104
+ return {
105
+ params: injectMemories(params, memories, provider),
106
+ injected: memories.length,
107
+ skipped: null,
108
+ };
109
+ }
110
+
111
+ function recordMemoryStats(sessionOpts, stats) {
112
+ if (sessionOpts._session) {
113
+ sessionOpts._session._lastMemoryStats = stats;
114
+ }
115
+ }
116
+
15
117
  /**
16
118
  * Wrap any supported LLM client with automatic usage tracking.
17
119
  * Auto-detects OpenAI, Anthropic, and Workers AI clients.
@@ -77,7 +179,14 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
77
179
  get(target, prop) {
78
180
  if (prop === "create") {
79
181
  return async (params) => {
80
- const result = await target.create(params);
182
+ const memStats = await maybeInjectMemories(
183
+ clientConfig,
184
+ sessionOpts,
185
+ params,
186
+ "openai"
187
+ );
188
+ recordMemoryStats(sessionOpts, memStats);
189
+ const result = await target.create(memStats.params);
81
190
  const content = result.choices?.[0]?.message?.content;
82
191
  if (content) {
83
192
  result.choices[0].message.content = await rewriteUrls(
@@ -90,7 +199,7 @@ function wrapOpenAICompletions(clientConfig, completions, client, sessionOpts) {
90
199
  fireAndForgetEmit(
91
200
  clientConfig,
92
201
  sessionOpts,
93
- params.messages,
202
+ memStats.params.messages,
94
203
  result
95
204
  );
96
205
  return result;
@@ -140,7 +249,14 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
140
249
  get(target, prop) {
141
250
  if (prop === "create") {
142
251
  return async (params) => {
143
- const result = await target.create(params);
252
+ const memStats = await maybeInjectMemories(
253
+ clientConfig,
254
+ sessionOpts,
255
+ params,
256
+ "anthropic"
257
+ );
258
+ recordMemoryStats(sessionOpts, memStats);
259
+ const result = await target.create(memStats.params);
144
260
  if (Array.isArray(result.content)) {
145
261
  for (const block of result.content) {
146
262
  if (block.type === "text" && block.text) {
@@ -156,7 +272,7 @@ function wrapAnthropicMessages(clientConfig, messages, client, sessionOpts) {
156
272
  fireAndForgetEmit(
157
273
  clientConfig,
158
274
  sessionOpts,
159
- params.messages,
275
+ memStats.params.messages,
160
276
  result
161
277
  );
162
278
  return result;
@@ -187,7 +303,14 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
187
303
  get(target, prop) {
188
304
  if (prop === "run") {
189
305
  return async (model, params, ...rest) => {
190
- const result = await target.run(model, params, ...rest);
306
+ const memStats = await maybeInjectMemories(
307
+ clientConfig,
308
+ sessionOpts,
309
+ params,
310
+ "workers-ai"
311
+ );
312
+ recordMemoryStats(sessionOpts, memStats);
313
+ const result = await target.run(model, memStats.params, ...rest);
191
314
  if (result.response) {
192
315
  result.response = await rewriteUrls(
193
316
  result.response,
@@ -199,7 +322,7 @@ function wrapWorkersAI(clientConfig, aiBinding, sessionOpts) {
199
322
  fireAndForgetEmit(
200
323
  clientConfig,
201
324
  sessionOpts,
202
- params?.messages,
325
+ memStats.params?.messages,
203
326
  result,
204
327
  model
205
328
  );