@librechat/agents 3.2.33 → 3.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +47 -10
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/enum.cjs +13 -0
  4. package/dist/cjs/common/enum.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +121 -3
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/llm/bedrock/index.cjs +21 -2
  8. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  9. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
  10. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  11. package/dist/cjs/llm/google/utils/common.cjs +6 -0
  12. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  13. package/dist/cjs/llm/invoke.cjs +49 -8
  14. package/dist/cjs/llm/invoke.cjs.map +1 -1
  15. package/dist/cjs/llm/openai/index.cjs +48 -1
  16. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  17. package/dist/cjs/llm/vertexai/index.cjs +19 -0
  18. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  19. package/dist/cjs/main.cjs +2 -0
  20. package/dist/cjs/messages/content.cjs +12 -14
  21. package/dist/cjs/messages/content.cjs.map +1 -1
  22. package/dist/cjs/messages/prune.cjs +31 -13
  23. package/dist/cjs/messages/prune.cjs.map +1 -1
  24. package/dist/cjs/run.cjs +7 -2
  25. package/dist/cjs/run.cjs.map +1 -1
  26. package/dist/cjs/stream.cjs +20 -2
  27. package/dist/cjs/stream.cjs.map +1 -1
  28. package/dist/cjs/summarization/node.cjs +12 -1
  29. package/dist/cjs/summarization/node.cjs.map +1 -1
  30. package/dist/cjs/tools/ToolNode.cjs +41 -4
  31. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  32. package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
  33. package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
  34. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
  35. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  36. package/dist/cjs/utils/tokens.cjs +30 -0
  37. package/dist/cjs/utils/tokens.cjs.map +1 -1
  38. package/dist/esm/agents/AgentContext.mjs +47 -10
  39. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  40. package/dist/esm/common/enum.mjs +13 -0
  41. package/dist/esm/common/enum.mjs.map +1 -1
  42. package/dist/esm/graphs/Graph.mjs +122 -4
  43. package/dist/esm/graphs/Graph.mjs.map +1 -1
  44. package/dist/esm/llm/bedrock/index.mjs +22 -3
  45. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  46. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
  47. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  48. package/dist/esm/llm/google/utils/common.mjs +6 -0
  49. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  50. package/dist/esm/llm/invoke.mjs +49 -8
  51. package/dist/esm/llm/invoke.mjs.map +1 -1
  52. package/dist/esm/llm/openai/index.mjs +48 -1
  53. package/dist/esm/llm/openai/index.mjs.map +1 -1
  54. package/dist/esm/llm/vertexai/index.mjs +19 -0
  55. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  56. package/dist/esm/main.mjs +3 -3
  57. package/dist/esm/messages/content.mjs +12 -15
  58. package/dist/esm/messages/content.mjs.map +1 -1
  59. package/dist/esm/messages/prune.mjs +31 -13
  60. package/dist/esm/messages/prune.mjs.map +1 -1
  61. package/dist/esm/run.mjs +7 -2
  62. package/dist/esm/run.mjs.map +1 -1
  63. package/dist/esm/stream.mjs +21 -3
  64. package/dist/esm/stream.mjs.map +1 -1
  65. package/dist/esm/summarization/node.mjs +12 -1
  66. package/dist/esm/summarization/node.mjs.map +1 -1
  67. package/dist/esm/tools/ToolNode.mjs +41 -4
  68. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  69. package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
  70. package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
  71. package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
  72. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  73. package/dist/esm/utils/tokens.mjs +30 -1
  74. package/dist/esm/utils/tokens.mjs.map +1 -1
  75. package/dist/types/agents/AgentContext.d.ts +7 -3
  76. package/dist/types/common/enum.d.ts +13 -0
  77. package/dist/types/graphs/Graph.d.ts +8 -1
  78. package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
  79. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
  80. package/dist/types/llm/invoke.d.ts +1 -1
  81. package/dist/types/llm/vertexai/index.d.ts +10 -0
  82. package/dist/types/messages/content.d.ts +5 -0
  83. package/dist/types/messages/prune.d.ts +4 -0
  84. package/dist/types/run.d.ts +1 -0
  85. package/dist/types/tools/ToolNode.d.ts +8 -0
  86. package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
  87. package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
  88. package/dist/types/types/graph.d.ts +89 -3
  89. package/dist/types/types/run.d.ts +13 -0
  90. package/dist/types/types/tools.d.ts +10 -0
  91. package/dist/types/utils/tokens.d.ts +7 -0
  92. package/package.json +1 -1
  93. package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
  94. package/src/agents/AgentContext.ts +69 -6
  95. package/src/agents/__tests__/AgentContext.test.ts +6 -2
  96. package/src/common/enum.ts +13 -0
  97. package/src/graphs/Graph.ts +196 -0
  98. package/src/llm/bedrock/index.ts +40 -0
  99. package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
  100. package/src/llm/bedrock/utils/index.ts +1 -0
  101. package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
  102. package/src/llm/bedrock/utils/message_outputs.ts +43 -0
  103. package/src/llm/google/utils/common.test.ts +64 -0
  104. package/src/llm/google/utils/common.ts +18 -0
  105. package/src/llm/invoke.test.ts +79 -1
  106. package/src/llm/invoke.ts +58 -4
  107. package/src/llm/openai/index.ts +95 -1
  108. package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
  109. package/src/llm/vertexai/index.ts +31 -0
  110. package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
  111. package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
  112. package/src/messages/content.ts +24 -32
  113. package/src/messages/prune.ts +39 -2
  114. package/src/run.ts +5 -0
  115. package/src/scripts/subagent-usage-sink.ts +176 -0
  116. package/src/specs/context-accuracy.live.test.ts +409 -0
  117. package/src/specs/context-usage-event.test.ts +117 -0
  118. package/src/specs/context-usage.live.test.ts +297 -0
  119. package/src/specs/prune.test.ts +51 -1
  120. package/src/specs/subagent.test.ts +124 -1
  121. package/src/stream.ts +40 -6
  122. package/src/summarization/__tests__/node.test.ts +60 -1
  123. package/src/summarization/node.ts +20 -1
  124. package/src/tools/ToolNode.ts +85 -3
  125. package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
  126. package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
  127. package/src/tools/streamedToolCallSeals.ts +37 -9
  128. package/src/tools/subagent/SubagentExecutor.ts +221 -3
  129. package/src/types/graph.ts +94 -1
  130. package/src/types/run.ts +13 -0
  131. package/src/types/tools.ts +10 -0
  132. package/src/utils/__tests__/apportion.test.ts +32 -0
  133. package/src/utils/tokens.ts +33 -0
@@ -4,6 +4,7 @@ const require_callbacks = require("../utils/callbacks.cjs");
4
4
  const require_enum = require("../common/enum.cjs");
5
5
  require("../common/index.cjs");
6
6
  const require_instrumentation = require("../instrumentation.cjs");
7
+ const require_tokens = require("../utils/tokens.cjs");
7
8
  const require_core = require("../messages/core.cjs");
8
9
  const require_ids = require("../messages/ids.cjs");
9
10
  const require_prune = require("../messages/prune.cjs");
@@ -48,6 +49,35 @@ let nanoid = require("nanoid");
48
49
  const { AGENT, TOOLS, SUMMARIZE } = require_enum.GraphNodeKeys;
49
50
  /** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
50
51
  const CALIBRATION_VARIANCE_THRESHOLD = .15;
52
+ /**
53
+ * Start index of the span post-prune formatters can mutate in place: the
54
+ * trailing tool batch plus its owning AI message (artifact formatting touches
55
+ * every tool result after the last AI tool call; Bedrock rewrites the AI
56
+ * message before a trailing tool result). Capped so the usage-snapshot
57
+ * recount stays constant-cost.
58
+ */
59
+ function trailingMutationStart(messages) {
60
+ const MAX_SPAN = 16;
61
+ let index = messages.length - 1;
62
+ while (index >= 0 && messages[index]?.getType() === "tool" && messages.length - index < MAX_SPAN) index--;
63
+ return Math.max(0, Math.min(index, messages.length - 2));
64
+ }
65
+ /**
66
+ * Re-derives the breakdown fields coupled to the calibrated budget math so
67
+ * the snapshot stays internally consistent: the aggregate
68
+ * `instructionTokens`/`availableForMessages` reflect the pruner's effective
69
+ * (calibrated) overhead — component fields remain local estimates — and
70
+ * `messageTokens` mirrors `contextBudget - instructions - remaining`.
71
+ */
72
+ function syncBudgetDerivedFields(usage) {
73
+ const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
74
+ if (effectiveInstructionTokens == null) return;
75
+ breakdown.instructionTokens = effectiveInstructionTokens;
76
+ if (contextBudget == null) return;
77
+ breakdown.availableForMessages = Math.max(0, contextBudget - effectiveInstructionTokens);
78
+ if (usage.remainingContextTokens == null) return;
79
+ breakdown.messageTokens = Math.max(0, contextBudget - effectiveInstructionTokens - usage.remainingContextTokens);
80
+ }
51
81
  function getHandlerDispatchedEventKey(eventName, stepId) {
52
82
  return `${eventName}:${stepId}`;
53
83
  }
@@ -435,11 +465,19 @@ var StandardGraph = class StandardGraph extends Graph {
435
465
  agentContexts = /* @__PURE__ */ new Map();
436
466
  /** Default agent ID to use */
437
467
  defaultAgentId;
438
- constructor({ runId, signal, agents, langfuse, tokenCounter, indexTokenCountMap, calibrationRatio }) {
468
+ /**
469
+ * Host sink for model usage emitted inside subagent child runs. Threaded
470
+ * into each `SubagentExecutor` this graph creates (and from there into
471
+ * child graphs, so nested subagents report too). See
472
+ * {@link t.StandardGraphInput.subagentUsageSink}.
473
+ */
474
+ subagentUsageSink;
475
+ constructor({ runId, signal, agents, langfuse, tokenCounter, indexTokenCountMap, calibrationRatio, subagentUsageSink }) {
439
476
  super();
440
477
  this.runId = runId;
441
478
  this.signal = signal;
442
479
  this.langfuse = langfuse;
480
+ this.subagentUsageSink = subagentUsageSink;
443
481
  if (agents.length === 0) throw new Error("At least one agent configuration is required");
444
482
  for (const agentConfig of agents) {
445
483
  const agentContext = require_AgentContext.AgentContext.fromConfig(agentConfig, tokenCounter, indexTokenCountMap);
@@ -748,6 +786,7 @@ var StandardGraph = class StandardGraph extends Graph {
748
786
  if (!config.signal) config.signal = this.signal;
749
787
  this.config = config;
750
788
  let messagesToUse = messages;
789
+ let contextUsage = null;
751
790
  if (!agentContext.pruneMessages && agentContext.tokenCounter && agentContext.maxContextTokens != null) agentContext.pruneMessages = require_prune.createPruneMessages({
752
791
  startIndex: agentContext.indexTokenCountMap[0] != null ? this.startIndex : 0,
753
792
  provider: agentContext.provider,
@@ -768,7 +807,7 @@ var StandardGraph = class StandardGraph extends Graph {
768
807
  }
769
808
  });
770
809
  if (agentContext.pruneMessages) {
771
- const { context, indexTokenCountMap, messagesToRefine, prePruneContextTokens, remainingContextTokens, originalToolContent, calibrationRatio, resolvedInstructionOverhead } = agentContext.pruneMessages({
810
+ const { context, indexTokenCountMap, messagesToRefine, prePruneContextTokens, remainingContextTokens, originalToolContent, calibrationRatio, resolvedInstructionOverhead, contextBudget, effectiveInstructionTokens } = agentContext.pruneMessages({
772
811
  messages,
773
812
  usageMetadata: agentContext.currentUsage,
774
813
  lastCallUsage: agentContext.lastCallUsage,
@@ -781,9 +820,36 @@ var StandardGraph = class StandardGraph extends Graph {
781
820
  const nonToolOverhead = agentContext.instructionTokens - agentContext.toolSchemaTokens;
782
821
  const calibratedToolTokens = Math.max(0, resolvedInstructionOverhead - nonToolOverhead);
783
822
  const currentToolTokens = agentContext.toolSchemaTokens;
784
- if ((currentToolTokens > 0 ? Math.abs(calibratedToolTokens - currentToolTokens) / currentToolTokens : 1) > CALIBRATION_VARIANCE_THRESHOLD) agentContext.toolSchemaTokens = calibratedToolTokens;
823
+ if ((currentToolTokens > 0 ? Math.abs(calibratedToolTokens - currentToolTokens) / currentToolTokens : 1) > CALIBRATION_VARIANCE_THRESHOLD) {
824
+ agentContext.toolSchemaTokens = calibratedToolTokens;
825
+ /** Largest-remainder apportionment keeps the per-tool breakdown
826
+ * summing exactly to the calibrated aggregate */
827
+ if (agentContext.toolTokenCounts != null && currentToolTokens > 0) agentContext.toolTokenCounts = require_tokens.apportionTokenCounts(agentContext.toolTokenCounts, calibratedToolTokens / currentToolTokens, calibratedToolTokens);
828
+ }
785
829
  }
786
830
  messagesToUse = context;
831
+ /** Dispatched right before the model invoke — a summarization
832
+ * detour returns from this node without an LLM call, and the
833
+ * post-summary retry produces its own snapshot.
834
+ *
835
+ * The breakdown describes the post-prune prompt: counts from the
836
+ * kept context, message tokens derived from the same calibrated
837
+ * budget math as `remainingContextTokens` (the index map is keyed
838
+ * by pre-prune state indices, so summing it over `context` would
839
+ * missum); `prePruneContextTokens` carries the pre-prune metric. */
840
+ const usageBreakdown = agentContext.getTokenBudgetBreakdown(messages);
841
+ usageBreakdown.messageCount = context.length;
842
+ contextUsage = {
843
+ runId: this.runId,
844
+ agentId,
845
+ breakdown: usageBreakdown,
846
+ contextBudget,
847
+ effectiveInstructionTokens,
848
+ prePruneContextTokens,
849
+ remainingContextTokens,
850
+ calibrationRatio: agentContext.calibrationRatio
851
+ };
852
+ syncBudgetDerivedFields(contextUsage);
787
853
  if (agentContext.summarizationEnabled === true && Array.isArray(messagesToRefine) && messagesToRefine.length > 0) {
788
854
  const shouldSkip = agentContext.shouldSkipSummarization(messages.length);
789
855
  if (!shouldSkip && require_index$6.shouldTriggerSummarization({
@@ -850,6 +916,27 @@ var StandardGraph = class StandardGraph extends Graph {
850
916
  }
851
917
  }
852
918
  let finalMessages = messagesToUse;
919
+ /** Tail snapshot for the dispatch-time usage delta: in-place
920
+ * formatters (artifact appends, Bedrock content rewrites, legacy
921
+ * string conversion) mutate without changing length or identity —
922
+ * capture before they run. Legacy string conversion can also touch
923
+ * messages before the tail, so those convertible indices are
924
+ * tracked separately (none exist in the common case). */
925
+ const tailStart = trailingMutationStart(messagesToUse);
926
+ let preFormatTailTokens = null;
927
+ let legacyIndices = null;
928
+ let preFormatLegacyTokens = 0;
929
+ if (contextUsage != null && agentContext.tokenCounter != null) {
930
+ preFormatTailTokens = 0;
931
+ for (const message of messagesToUse.slice(tailStart)) preFormatTailTokens += agentContext.tokenCounter(message);
932
+ if (agentContext.useLegacyContent) {
933
+ legacyIndices = [];
934
+ for (let i = 0; i < tailStart; i++) if (require_content.isLegacyConvertible(messagesToUse[i])) {
935
+ legacyIndices.push(i);
936
+ preFormatLegacyTokens += agentContext.tokenCounter(messagesToUse[i]);
937
+ }
938
+ }
939
+ }
853
940
  if (agentContext.useLegacyContent) finalMessages = require_content.formatContentStrings(finalMessages);
854
941
  const lastMessageX = finalMessages.length >= 2 ? finalMessages[finalMessages.length - 2] : null;
855
942
  const lastMessageY = finalMessages.length >= 1 ? finalMessages[finalMessages.length - 1] : null;
@@ -927,6 +1014,36 @@ var StandardGraph = class StandardGraph extends Graph {
927
1014
  info: `Message pruning removed all messages as none fit in the context window. ${guidance}\n${breakdown}`
928
1015
  }));
929
1016
  }
1017
+ /** Past the empty-prompt guard — a model call is now guaranteed */
1018
+ if (contextUsage != null) {
1019
+ const usageRatio = contextUsage.calibrationRatio != null && contextUsage.calibrationRatio > 0 ? contextUsage.calibrationRatio : 1;
1020
+ if (agentContext.tokenCounter != null && finalMessages.length !== messagesToUse.length) {
1021
+ /** Post-prune formatting restructured the payload (e.g. thinking
1022
+ * placeholder collapse, orphan drops) — recount so the gauge
1023
+ * reflects what is actually sent */
1024
+ let rawTokens = 0;
1025
+ for (const message of finalMessages) rawTokens += agentContext.tokenCounter(message);
1026
+ contextUsage.breakdown.messageCount = finalMessages.length;
1027
+ if (contextUsage.contextBudget != null && contextUsage.effectiveInstructionTokens != null) contextUsage.remainingContextTokens = Math.max(0, contextUsage.contextBudget - contextUsage.effectiveInstructionTokens - Math.round(rawTokens * usageRatio));
1028
+ } else if (preFormatTailTokens != null && agentContext.tokenCounter != null && contextUsage.remainingContextTokens != null) {
1029
+ /** Same-length formatting can still mutate in place — the trailing
1030
+ * tool batch (artifacts, Bedrock rewrites) and any legacy-converted
1031
+ * messages before it — adjust remaining by the calibrated delta */
1032
+ let postFormatTailTokens = 0;
1033
+ for (const message of finalMessages.slice(tailStart)) postFormatTailTokens += agentContext.tokenCounter(message);
1034
+ let formatDelta = postFormatTailTokens - preFormatTailTokens;
1035
+ if (legacyIndices != null && legacyIndices.length > 0) {
1036
+ let postFormatLegacyTokens = 0;
1037
+ for (const index of legacyIndices) postFormatLegacyTokens += agentContext.tokenCounter(finalMessages[index]);
1038
+ formatDelta += postFormatLegacyTokens - preFormatLegacyTokens;
1039
+ }
1040
+ if (formatDelta !== 0) contextUsage.remainingContextTokens = Math.max(0, Math.min(contextUsage.contextBudget ?? Number.MAX_SAFE_INTEGER, contextUsage.remainingContextTokens - Math.round(formatDelta * usageRatio)));
1041
+ }
1042
+ syncBudgetDerivedFields(contextUsage);
1043
+ /** Awaited so async host handlers receive the pre-invoke snapshot
1044
+ * before any model deltas are emitted */
1045
+ await require_events.safeDispatchCustomEvent("on_context_usage", contextUsage, config);
1046
+ }
930
1047
  const invokeStart = Date.now();
931
1048
  const invokeMeta = {
932
1049
  runId: this.runId,
@@ -1118,6 +1235,7 @@ var StandardGraph = class StandardGraph extends Graph {
1118
1235
  parentAgentId: agentContext.agentId,
1119
1236
  langfuse: this.langfuse,
1120
1237
  tokenCounter: agentContext.tokenCounter,
1238
+ usageSink: this.subagentUsageSink,
1121
1239
  maxDepth: effectiveSubagentDepth,
1122
1240
  createChildGraph: (input) => {
1123
1241
  const childGraph = new StandardGraph(input);