@librechat/agents 3.2.33 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +21 -2
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +6 -0
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +48 -1
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +19 -0
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +20 -2
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +41 -4
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +22 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +6 -0
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +48 -1
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +19 -0
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +21 -3
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +41 -4
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
- package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/llm/vertexai/index.d.ts +10 -0
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/ToolNode.d.ts +8 -0
- package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/types/tools.d.ts +10 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/bedrock/index.ts +40 -0
- package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
- package/src/llm/bedrock/utils/index.ts +1 -0
- package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
- package/src/llm/bedrock/utils/message_outputs.ts +43 -0
- package/src/llm/google/utils/common.test.ts +64 -0
- package/src/llm/google/utils/common.ts +18 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/llm/openai/index.ts +95 -1
- package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
- package/src/llm/vertexai/index.ts +31 -0
- package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
- package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/stream.ts +40 -6
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/ToolNode.ts +85 -3
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
- package/src/tools/streamedToolCallSeals.ts +37 -9
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/types/tools.ts +10 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -4,6 +4,7 @@ const require_callbacks = require("../utils/callbacks.cjs");
|
|
|
4
4
|
const require_enum = require("../common/enum.cjs");
|
|
5
5
|
require("../common/index.cjs");
|
|
6
6
|
const require_instrumentation = require("../instrumentation.cjs");
|
|
7
|
+
const require_tokens = require("../utils/tokens.cjs");
|
|
7
8
|
const require_core = require("../messages/core.cjs");
|
|
8
9
|
const require_ids = require("../messages/ids.cjs");
|
|
9
10
|
const require_prune = require("../messages/prune.cjs");
|
|
@@ -48,6 +49,35 @@ let nanoid = require("nanoid");
|
|
|
48
49
|
const { AGENT, TOOLS, SUMMARIZE } = require_enum.GraphNodeKeys;
|
|
49
50
|
/** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
|
|
50
51
|
const CALIBRATION_VARIANCE_THRESHOLD = .15;
|
|
52
|
+
/**
|
|
53
|
+
* Start index of the span post-prune formatters can mutate in place: the
|
|
54
|
+
* trailing tool batch plus its owning AI message (artifact formatting touches
|
|
55
|
+
* every tool result after the last AI tool call; Bedrock rewrites the AI
|
|
56
|
+
* message before a trailing tool result). Capped so the usage-snapshot
|
|
57
|
+
* recount stays constant-cost.
|
|
58
|
+
*/
|
|
59
|
+
function trailingMutationStart(messages) {
|
|
60
|
+
const MAX_SPAN = 16;
|
|
61
|
+
let index = messages.length - 1;
|
|
62
|
+
while (index >= 0 && messages[index]?.getType() === "tool" && messages.length - index < MAX_SPAN) index--;
|
|
63
|
+
return Math.max(0, Math.min(index, messages.length - 2));
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Re-derives the breakdown fields coupled to the calibrated budget math so
|
|
67
|
+
* the snapshot stays internally consistent: the aggregate
|
|
68
|
+
* `instructionTokens`/`availableForMessages` reflect the pruner's effective
|
|
69
|
+
* (calibrated) overhead — component fields remain local estimates — and
|
|
70
|
+
* `messageTokens` mirrors `contextBudget - instructions - remaining`.
|
|
71
|
+
*/
|
|
72
|
+
function syncBudgetDerivedFields(usage) {
|
|
73
|
+
const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
|
|
74
|
+
if (effectiveInstructionTokens == null) return;
|
|
75
|
+
breakdown.instructionTokens = effectiveInstructionTokens;
|
|
76
|
+
if (contextBudget == null) return;
|
|
77
|
+
breakdown.availableForMessages = Math.max(0, contextBudget - effectiveInstructionTokens);
|
|
78
|
+
if (usage.remainingContextTokens == null) return;
|
|
79
|
+
breakdown.messageTokens = Math.max(0, contextBudget - effectiveInstructionTokens - usage.remainingContextTokens);
|
|
80
|
+
}
|
|
51
81
|
function getHandlerDispatchedEventKey(eventName, stepId) {
|
|
52
82
|
return `${eventName}:${stepId}`;
|
|
53
83
|
}
|
|
@@ -435,11 +465,19 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
435
465
|
agentContexts = /* @__PURE__ */ new Map();
|
|
436
466
|
/** Default agent ID to use */
|
|
437
467
|
defaultAgentId;
|
|
438
|
-
|
|
468
|
+
/**
|
|
469
|
+
* Host sink for model usage emitted inside subagent child runs. Threaded
|
|
470
|
+
* into each `SubagentExecutor` this graph creates (and from there into
|
|
471
|
+
* child graphs, so nested subagents report too). See
|
|
472
|
+
* {@link t.StandardGraphInput.subagentUsageSink}.
|
|
473
|
+
*/
|
|
474
|
+
subagentUsageSink;
|
|
475
|
+
constructor({ runId, signal, agents, langfuse, tokenCounter, indexTokenCountMap, calibrationRatio, subagentUsageSink }) {
|
|
439
476
|
super();
|
|
440
477
|
this.runId = runId;
|
|
441
478
|
this.signal = signal;
|
|
442
479
|
this.langfuse = langfuse;
|
|
480
|
+
this.subagentUsageSink = subagentUsageSink;
|
|
443
481
|
if (agents.length === 0) throw new Error("At least one agent configuration is required");
|
|
444
482
|
for (const agentConfig of agents) {
|
|
445
483
|
const agentContext = require_AgentContext.AgentContext.fromConfig(agentConfig, tokenCounter, indexTokenCountMap);
|
|
@@ -748,6 +786,7 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
748
786
|
if (!config.signal) config.signal = this.signal;
|
|
749
787
|
this.config = config;
|
|
750
788
|
let messagesToUse = messages;
|
|
789
|
+
let contextUsage = null;
|
|
751
790
|
if (!agentContext.pruneMessages && agentContext.tokenCounter && agentContext.maxContextTokens != null) agentContext.pruneMessages = require_prune.createPruneMessages({
|
|
752
791
|
startIndex: agentContext.indexTokenCountMap[0] != null ? this.startIndex : 0,
|
|
753
792
|
provider: agentContext.provider,
|
|
@@ -768,7 +807,7 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
768
807
|
}
|
|
769
808
|
});
|
|
770
809
|
if (agentContext.pruneMessages) {
|
|
771
|
-
const { context, indexTokenCountMap, messagesToRefine, prePruneContextTokens, remainingContextTokens, originalToolContent, calibrationRatio, resolvedInstructionOverhead } = agentContext.pruneMessages({
|
|
810
|
+
const { context, indexTokenCountMap, messagesToRefine, prePruneContextTokens, remainingContextTokens, originalToolContent, calibrationRatio, resolvedInstructionOverhead, contextBudget, effectiveInstructionTokens } = agentContext.pruneMessages({
|
|
772
811
|
messages,
|
|
773
812
|
usageMetadata: agentContext.currentUsage,
|
|
774
813
|
lastCallUsage: agentContext.lastCallUsage,
|
|
@@ -781,9 +820,36 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
781
820
|
const nonToolOverhead = agentContext.instructionTokens - agentContext.toolSchemaTokens;
|
|
782
821
|
const calibratedToolTokens = Math.max(0, resolvedInstructionOverhead - nonToolOverhead);
|
|
783
822
|
const currentToolTokens = agentContext.toolSchemaTokens;
|
|
784
|
-
if ((currentToolTokens > 0 ? Math.abs(calibratedToolTokens - currentToolTokens) / currentToolTokens : 1) > CALIBRATION_VARIANCE_THRESHOLD)
|
|
823
|
+
if ((currentToolTokens > 0 ? Math.abs(calibratedToolTokens - currentToolTokens) / currentToolTokens : 1) > CALIBRATION_VARIANCE_THRESHOLD) {
|
|
824
|
+
agentContext.toolSchemaTokens = calibratedToolTokens;
|
|
825
|
+
/** Largest-remainder apportionment keeps the per-tool breakdown
|
|
826
|
+
* summing exactly to the calibrated aggregate */
|
|
827
|
+
if (agentContext.toolTokenCounts != null && currentToolTokens > 0) agentContext.toolTokenCounts = require_tokens.apportionTokenCounts(agentContext.toolTokenCounts, calibratedToolTokens / currentToolTokens, calibratedToolTokens);
|
|
828
|
+
}
|
|
785
829
|
}
|
|
786
830
|
messagesToUse = context;
|
|
831
|
+
/** Dispatched right before the model invoke — a summarization
|
|
832
|
+
* detour returns from this node without an LLM call, and the
|
|
833
|
+
* post-summary retry produces its own snapshot.
|
|
834
|
+
*
|
|
835
|
+
* The breakdown describes the post-prune prompt: counts from the
|
|
836
|
+
* kept context, message tokens derived from the same calibrated
|
|
837
|
+
* budget math as `remainingContextTokens` (the index map is keyed
|
|
838
|
+
* by pre-prune state indices, so summing it over `context` would
|
|
839
|
+
* missum); `prePruneContextTokens` carries the pre-prune metric. */
|
|
840
|
+
const usageBreakdown = agentContext.getTokenBudgetBreakdown(messages);
|
|
841
|
+
usageBreakdown.messageCount = context.length;
|
|
842
|
+
contextUsage = {
|
|
843
|
+
runId: this.runId,
|
|
844
|
+
agentId,
|
|
845
|
+
breakdown: usageBreakdown,
|
|
846
|
+
contextBudget,
|
|
847
|
+
effectiveInstructionTokens,
|
|
848
|
+
prePruneContextTokens,
|
|
849
|
+
remainingContextTokens,
|
|
850
|
+
calibrationRatio: agentContext.calibrationRatio
|
|
851
|
+
};
|
|
852
|
+
syncBudgetDerivedFields(contextUsage);
|
|
787
853
|
if (agentContext.summarizationEnabled === true && Array.isArray(messagesToRefine) && messagesToRefine.length > 0) {
|
|
788
854
|
const shouldSkip = agentContext.shouldSkipSummarization(messages.length);
|
|
789
855
|
if (!shouldSkip && require_index$6.shouldTriggerSummarization({
|
|
@@ -850,6 +916,27 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
850
916
|
}
|
|
851
917
|
}
|
|
852
918
|
let finalMessages = messagesToUse;
|
|
919
|
+
/** Tail snapshot for the dispatch-time usage delta: in-place
|
|
920
|
+
* formatters (artifact appends, Bedrock content rewrites, legacy
|
|
921
|
+
* string conversion) mutate without changing length or identity —
|
|
922
|
+
* capture before they run. Legacy string conversion can also touch
|
|
923
|
+
* messages before the tail, so those convertible indices are
|
|
924
|
+
* tracked separately (none exist in the common case). */
|
|
925
|
+
const tailStart = trailingMutationStart(messagesToUse);
|
|
926
|
+
let preFormatTailTokens = null;
|
|
927
|
+
let legacyIndices = null;
|
|
928
|
+
let preFormatLegacyTokens = 0;
|
|
929
|
+
if (contextUsage != null && agentContext.tokenCounter != null) {
|
|
930
|
+
preFormatTailTokens = 0;
|
|
931
|
+
for (const message of messagesToUse.slice(tailStart)) preFormatTailTokens += agentContext.tokenCounter(message);
|
|
932
|
+
if (agentContext.useLegacyContent) {
|
|
933
|
+
legacyIndices = [];
|
|
934
|
+
for (let i = 0; i < tailStart; i++) if (require_content.isLegacyConvertible(messagesToUse[i])) {
|
|
935
|
+
legacyIndices.push(i);
|
|
936
|
+
preFormatLegacyTokens += agentContext.tokenCounter(messagesToUse[i]);
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
}
|
|
853
940
|
if (agentContext.useLegacyContent) finalMessages = require_content.formatContentStrings(finalMessages);
|
|
854
941
|
const lastMessageX = finalMessages.length >= 2 ? finalMessages[finalMessages.length - 2] : null;
|
|
855
942
|
const lastMessageY = finalMessages.length >= 1 ? finalMessages[finalMessages.length - 1] : null;
|
|
@@ -927,6 +1014,36 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
927
1014
|
info: `Message pruning removed all messages as none fit in the context window. ${guidance}\n${breakdown}`
|
|
928
1015
|
}));
|
|
929
1016
|
}
|
|
1017
|
+
/** Past the empty-prompt guard — a model call is now guaranteed */
|
|
1018
|
+
if (contextUsage != null) {
|
|
1019
|
+
const usageRatio = contextUsage.calibrationRatio != null && contextUsage.calibrationRatio > 0 ? contextUsage.calibrationRatio : 1;
|
|
1020
|
+
if (agentContext.tokenCounter != null && finalMessages.length !== messagesToUse.length) {
|
|
1021
|
+
/** Post-prune formatting restructured the payload (e.g. thinking
|
|
1022
|
+
* placeholder collapse, orphan drops) — recount so the gauge
|
|
1023
|
+
* reflects what is actually sent */
|
|
1024
|
+
let rawTokens = 0;
|
|
1025
|
+
for (const message of finalMessages) rawTokens += agentContext.tokenCounter(message);
|
|
1026
|
+
contextUsage.breakdown.messageCount = finalMessages.length;
|
|
1027
|
+
if (contextUsage.contextBudget != null && contextUsage.effectiveInstructionTokens != null) contextUsage.remainingContextTokens = Math.max(0, contextUsage.contextBudget - contextUsage.effectiveInstructionTokens - Math.round(rawTokens * usageRatio));
|
|
1028
|
+
} else if (preFormatTailTokens != null && agentContext.tokenCounter != null && contextUsage.remainingContextTokens != null) {
|
|
1029
|
+
/** Same-length formatting can still mutate in place — the trailing
|
|
1030
|
+
* tool batch (artifacts, Bedrock rewrites) and any legacy-converted
|
|
1031
|
+
* messages before it — adjust remaining by the calibrated delta */
|
|
1032
|
+
let postFormatTailTokens = 0;
|
|
1033
|
+
for (const message of finalMessages.slice(tailStart)) postFormatTailTokens += agentContext.tokenCounter(message);
|
|
1034
|
+
let formatDelta = postFormatTailTokens - preFormatTailTokens;
|
|
1035
|
+
if (legacyIndices != null && legacyIndices.length > 0) {
|
|
1036
|
+
let postFormatLegacyTokens = 0;
|
|
1037
|
+
for (const index of legacyIndices) postFormatLegacyTokens += agentContext.tokenCounter(finalMessages[index]);
|
|
1038
|
+
formatDelta += postFormatLegacyTokens - preFormatLegacyTokens;
|
|
1039
|
+
}
|
|
1040
|
+
if (formatDelta !== 0) contextUsage.remainingContextTokens = Math.max(0, Math.min(contextUsage.contextBudget ?? Number.MAX_SAFE_INTEGER, contextUsage.remainingContextTokens - Math.round(formatDelta * usageRatio)));
|
|
1041
|
+
}
|
|
1042
|
+
syncBudgetDerivedFields(contextUsage);
|
|
1043
|
+
/** Awaited so async host handlers receive the pre-invoke snapshot
|
|
1044
|
+
* before any model deltas are emitted */
|
|
1045
|
+
await require_events.safeDispatchCustomEvent("on_context_usage", contextUsage, config);
|
|
1046
|
+
}
|
|
930
1047
|
const invokeStart = Date.now();
|
|
931
1048
|
const invokeMeta = {
|
|
932
1049
|
runId: this.runId,
|
|
@@ -1118,6 +1235,7 @@ var StandardGraph = class StandardGraph extends Graph {
|
|
|
1118
1235
|
parentAgentId: agentContext.agentId,
|
|
1119
1236
|
langfuse: this.langfuse,
|
|
1120
1237
|
tokenCounter: agentContext.tokenCounter,
|
|
1238
|
+
usageSink: this.subagentUsageSink,
|
|
1121
1239
|
maxDepth: effectiveSubagentDepth,
|
|
1122
1240
|
createChildGraph: (input) => {
|
|
1123
1241
|
const childGraph = new StandardGraph(input);
|