@librechat/agents 3.2.33 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +21 -2
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +6 -0
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +48 -1
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +19 -0
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +20 -2
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +41 -4
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +22 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +6 -0
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +48 -1
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +19 -0
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +21 -3
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +41 -4
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
- package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/llm/vertexai/index.d.ts +10 -0
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/ToolNode.d.ts +8 -0
- package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/types/tools.d.ts +10 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/bedrock/index.ts +40 -0
- package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
- package/src/llm/bedrock/utils/index.ts +1 -0
- package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
- package/src/llm/bedrock/utils/message_outputs.ts +43 -0
- package/src/llm/google/utils/common.test.ts +64 -0
- package/src/llm/google/utils/common.ts +18 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/llm/openai/index.ts +95 -1
- package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
- package/src/llm/vertexai/index.ts +31 -0
- package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
- package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/stream.ts +40 -6
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/ToolNode.ts +85 -3
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
- package/src/tools/streamedToolCallSeals.ts +37 -9
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/types/tools.ts +10 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
addCacheControlToStablePrefixMessages,
|
|
22
22
|
} from '@/messages/cache';
|
|
23
23
|
import { createSchemaOnlyTools } from '@/tools/schema';
|
|
24
|
+
import { apportionTokenCounts } from '@/utils/tokens';
|
|
24
25
|
import { DEFAULT_RESERVE_RATIO } from '@/messages';
|
|
25
26
|
import { toJsonSchema } from '@/utils/schema';
|
|
26
27
|
|
|
@@ -191,6 +192,11 @@ export class AgentContext {
|
|
|
191
192
|
dynamicInstructionTokens: number = 0;
|
|
192
193
|
/** Token count for tool schemas only. */
|
|
193
194
|
toolSchemaTokens: number = 0;
|
|
195
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name.
|
|
196
|
+
* `undefined` when not calculated (e.g. cached aggregate schema tokens). */
|
|
197
|
+
toolTokenCounts?: Record<string, number>;
|
|
198
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
199
|
+
deferredToolNames: string[] = [];
|
|
194
200
|
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
195
201
|
calibrationRatio: number = 1;
|
|
196
202
|
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
@@ -894,6 +900,8 @@ export class AgentContext {
|
|
|
894
900
|
this.systemMessageTokens = 0;
|
|
895
901
|
this.dynamicInstructionTokens = 0;
|
|
896
902
|
this.toolSchemaTokens = 0;
|
|
903
|
+
this.toolTokenCounts = undefined;
|
|
904
|
+
this.deferredToolNames = [];
|
|
897
905
|
this.cachedSystemRunnable = undefined;
|
|
898
906
|
this.systemRunnableStale = true;
|
|
899
907
|
this.lastToken = undefined;
|
|
@@ -1006,6 +1014,10 @@ export class AgentContext {
|
|
|
1006
1014
|
): Promise<void> {
|
|
1007
1015
|
let toolTokens = 0;
|
|
1008
1016
|
const countedToolNames = new Set<string>();
|
|
1017
|
+
/** Prototype-free: external tool names like `toString` must not hit
|
|
1018
|
+
* inherited properties during accumulation */
|
|
1019
|
+
const rawToolTokenCounts: Record<string, number> = Object.create(null);
|
|
1020
|
+
const deferredCountedNames = new Set<string>();
|
|
1009
1021
|
|
|
1010
1022
|
/**
|
|
1011
1023
|
* Iterate both `tools` (user-provided instance tools) and `graphTools`
|
|
@@ -1040,11 +1052,14 @@ export class AgentContext {
|
|
|
1040
1052
|
toolName,
|
|
1041
1053
|
(genericTool.description as string | undefined) ?? ''
|
|
1042
1054
|
);
|
|
1043
|
-
|
|
1055
|
+
const schemaTokens = tokenCounter(
|
|
1044
1056
|
new SystemMessage(JSON.stringify(jsonSchema))
|
|
1045
1057
|
);
|
|
1058
|
+
toolTokens += schemaTokens;
|
|
1046
1059
|
if (toolName) {
|
|
1047
1060
|
countedToolNames.add(toolName);
|
|
1061
|
+
rawToolTokenCounts[toolName] =
|
|
1062
|
+
(rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
|
|
1048
1063
|
}
|
|
1049
1064
|
}
|
|
1050
1065
|
}
|
|
@@ -1062,7 +1077,16 @@ export class AgentContext {
|
|
|
1062
1077
|
parameters: def.parameters ?? {},
|
|
1063
1078
|
},
|
|
1064
1079
|
};
|
|
1065
|
-
|
|
1080
|
+
const schemaTokens = tokenCounter(
|
|
1081
|
+
new SystemMessage(JSON.stringify(schema))
|
|
1082
|
+
);
|
|
1083
|
+
toolTokens += schemaTokens;
|
|
1084
|
+
countedToolNames.add(def.name);
|
|
1085
|
+
rawToolTokenCounts[def.name] =
|
|
1086
|
+
(rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
|
|
1087
|
+
if (def.defer_loading === true) {
|
|
1088
|
+
deferredCountedNames.add(def.name);
|
|
1089
|
+
}
|
|
1066
1090
|
}
|
|
1067
1091
|
|
|
1068
1092
|
const isAnthropic =
|
|
@@ -1077,6 +1101,25 @@ export class AgentContext {
|
|
|
1077
1101
|
? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
|
|
1078
1102
|
: DEFAULT_TOOL_TOKEN_MULTIPLIER;
|
|
1079
1103
|
this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
|
|
1104
|
+
|
|
1105
|
+
/** Largest-remainder apportionment keeps the per-tool counts summing
|
|
1106
|
+
* exactly to the aggregate despite per-entry rounding */
|
|
1107
|
+
const toolTokenCounts = apportionTokenCounts(
|
|
1108
|
+
rawToolTokenCounts,
|
|
1109
|
+
toolTokenMultiplier,
|
|
1110
|
+
this.toolSchemaTokens
|
|
1111
|
+
);
|
|
1112
|
+
const deferredToolNames: string[] = [];
|
|
1113
|
+
for (const name of Object.keys(rawToolTokenCounts)) {
|
|
1114
|
+
if (
|
|
1115
|
+
deferredCountedNames.has(name) ||
|
|
1116
|
+
this.toolRegistry?.get(name)?.defer_loading === true
|
|
1117
|
+
) {
|
|
1118
|
+
deferredToolNames.push(name);
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
this.toolTokenCounts = toolTokenCounts;
|
|
1122
|
+
this.deferredToolNames = deferredToolNames;
|
|
1080
1123
|
}
|
|
1081
1124
|
|
|
1082
1125
|
/**
|
|
@@ -1212,9 +1255,8 @@ export class AgentContext {
|
|
|
1212
1255
|
* Returns a structured breakdown of how the context token budget is consumed.
|
|
1213
1256
|
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
1214
1257
|
*
|
|
1215
|
-
* Note: `
|
|
1216
|
-
*
|
|
1217
|
-
* recomputed when `markToolsAsDiscovered` is called mid-run.
|
|
1258
|
+
* Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
|
|
1259
|
+
* so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
|
|
1218
1260
|
*/
|
|
1219
1261
|
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
|
|
1220
1262
|
const maxContextTokens = this.maxContextTokens ?? 0;
|
|
@@ -1238,7 +1280,14 @@ export class AgentContext {
|
|
|
1238
1280
|
}
|
|
1239
1281
|
}
|
|
1240
1282
|
|
|
1241
|
-
|
|
1283
|
+
/** Mirror the pruner's reserve math so availableForMessages agrees
|
|
1284
|
+
* with the contextBudget computed during pruning */
|
|
1285
|
+
const reserveRatio =
|
|
1286
|
+
this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
|
|
1287
|
+
const reserveTokens =
|
|
1288
|
+
reserveRatio > 0 && reserveRatio < 1
|
|
1289
|
+
? Math.round(maxContextTokens * reserveRatio)
|
|
1290
|
+
: 0;
|
|
1242
1291
|
const availableForMessages = Math.max(
|
|
1243
1292
|
0,
|
|
1244
1293
|
maxContextTokens - reserveTokens - this.instructionTokens
|
|
@@ -1255,6 +1304,12 @@ export class AgentContext {
|
|
|
1255
1304
|
messageCount,
|
|
1256
1305
|
messageTokens,
|
|
1257
1306
|
availableForMessages,
|
|
1307
|
+
toolTokenCounts:
|
|
1308
|
+
this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
|
|
1309
|
+
deferredToolNames:
|
|
1310
|
+
this.deferredToolNames.length > 0
|
|
1311
|
+
? [...this.deferredToolNames]
|
|
1312
|
+
: undefined,
|
|
1258
1313
|
};
|
|
1259
1314
|
}
|
|
1260
1315
|
|
|
@@ -1324,6 +1379,14 @@ export class AgentContext {
|
|
|
1324
1379
|
}
|
|
1325
1380
|
if (hasNewDiscoveries) {
|
|
1326
1381
|
this.systemRunnableStale = true;
|
|
1382
|
+
/** Refresh schema token accounting so the next call's budget and
|
|
1383
|
+
* per-tool breakdown include the newly discovered tools; awaited
|
|
1384
|
+
* via tokenCalculationPromise before the next model call */
|
|
1385
|
+
if (this.tokenCounter) {
|
|
1386
|
+
this.tokenCalculationPromise = this.calculateInstructionTokens(
|
|
1387
|
+
this.tokenCounter
|
|
1388
|
+
);
|
|
1389
|
+
}
|
|
1327
1390
|
}
|
|
1328
1391
|
return hasNewDiscoveries;
|
|
1329
1392
|
}
|
|
@@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
|
|
|
1414
1414
|
expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
|
|
1415
1415
|
});
|
|
1416
1416
|
|
|
1417
|
-
it('toolSchemaTokens
|
|
1417
|
+
it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
|
|
1418
1418
|
const toolDefinitions: t.LCTool[] = [
|
|
1419
1419
|
{
|
|
1420
1420
|
name: 'deferred',
|
|
@@ -1431,9 +1431,13 @@ describe('AgentContext', () => {
|
|
|
1431
1431
|
|
|
1432
1432
|
await ctx.tokenCalculationPromise;
|
|
1433
1433
|
expect(ctx.toolSchemaTokens).toBe(0);
|
|
1434
|
+
expect(ctx.toolTokenCounts).toEqual({});
|
|
1434
1435
|
|
|
1435
1436
|
ctx.markToolsAsDiscovered(['deferred']);
|
|
1436
|
-
|
|
1437
|
+
await ctx.tokenCalculationPromise;
|
|
1438
|
+
expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
|
|
1439
|
+
expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
|
|
1440
|
+
expect(ctx.deferredToolNames).toContain('deferred');
|
|
1437
1441
|
});
|
|
1438
1442
|
});
|
|
1439
1443
|
|
package/src/common/enum.ts
CHANGED
|
@@ -31,6 +31,8 @@ export enum GraphEvents {
|
|
|
31
31
|
ON_SUBAGENT_UPDATE = 'on_subagent_update',
|
|
32
32
|
/** [Custom] Diagnostic logging event for context management observability */
|
|
33
33
|
ON_AGENT_LOG = 'on_agent_log',
|
|
34
|
+
/** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
|
|
35
|
+
ON_CONTEXT_USAGE = 'on_context_usage',
|
|
34
36
|
|
|
35
37
|
/* Official Events */
|
|
36
38
|
|
|
@@ -185,6 +187,17 @@ export enum Constants {
|
|
|
185
187
|
/** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
|
|
186
188
|
ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
|
|
187
189
|
SKILL_TOOL = 'skill',
|
|
190
|
+
/**
|
|
191
|
+
* Callback-metadata keys stamped by `attemptInvoke` /
|
|
192
|
+
* `tryFallbackProviders` carrying the provider (SDK `Providers` enum
|
|
193
|
+
* value) and configured model that actually served a model invocation.
|
|
194
|
+
* Unlike `ls_provider` — which derived providers inherit from their base
|
|
195
|
+
* class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
|
|
196
|
+
* SDK's own routing, including fallback-provider calls. Consumed by the
|
|
197
|
+
* subagent usage-capture handler to tag billing events.
|
|
198
|
+
*/
|
|
199
|
+
INVOKED_PROVIDER = '__invoked_provider',
|
|
200
|
+
INVOKED_MODEL = '__invoked_model',
|
|
188
201
|
READ_FILE = 'read_file',
|
|
189
202
|
BASH_TOOL = 'bash_tool',
|
|
190
203
|
BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
formatArtifactPayload,
|
|
24
24
|
enforceOriginalContentCap,
|
|
25
25
|
formatContentStrings,
|
|
26
|
+
isLegacyConvertible,
|
|
26
27
|
createPruneMessages,
|
|
27
28
|
addCacheControl,
|
|
28
29
|
getMessageId,
|
|
@@ -45,6 +46,7 @@ import {
|
|
|
45
46
|
isAnthropicLike,
|
|
46
47
|
isOpenAILike,
|
|
47
48
|
isGoogleLike,
|
|
49
|
+
apportionTokenCounts,
|
|
48
50
|
joinKeys,
|
|
49
51
|
sleep,
|
|
50
52
|
} from '@/utils';
|
|
@@ -89,6 +91,55 @@ const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
|
|
|
89
91
|
/** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
|
|
90
92
|
const CALIBRATION_VARIANCE_THRESHOLD = 0.15;
|
|
91
93
|
|
|
94
|
+
/**
|
|
95
|
+
* Start index of the span post-prune formatters can mutate in place: the
|
|
96
|
+
* trailing tool batch plus its owning AI message (artifact formatting touches
|
|
97
|
+
* every tool result after the last AI tool call; Bedrock rewrites the AI
|
|
98
|
+
* message before a trailing tool result). Capped so the usage-snapshot
|
|
99
|
+
* recount stays constant-cost.
|
|
100
|
+
*/
|
|
101
|
+
function trailingMutationStart(messages: BaseMessage[]): number {
|
|
102
|
+
const MAX_SPAN = 16;
|
|
103
|
+
let index = messages.length - 1;
|
|
104
|
+
while (
|
|
105
|
+
index >= 0 &&
|
|
106
|
+
messages[index]?.getType() === 'tool' &&
|
|
107
|
+
messages.length - index < MAX_SPAN
|
|
108
|
+
) {
|
|
109
|
+
index--;
|
|
110
|
+
}
|
|
111
|
+
return Math.max(0, Math.min(index, messages.length - 2));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Re-derives the breakdown fields coupled to the calibrated budget math so
|
|
116
|
+
* the snapshot stays internally consistent: the aggregate
|
|
117
|
+
* `instructionTokens`/`availableForMessages` reflect the pruner's effective
|
|
118
|
+
* (calibrated) overhead — component fields remain local estimates — and
|
|
119
|
+
* `messageTokens` mirrors `contextBudget - instructions - remaining`.
|
|
120
|
+
*/
|
|
121
|
+
function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
|
|
122
|
+
const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
|
|
123
|
+
if (effectiveInstructionTokens == null) {
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
breakdown.instructionTokens = effectiveInstructionTokens;
|
|
127
|
+
if (contextBudget == null) {
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
breakdown.availableForMessages = Math.max(
|
|
131
|
+
0,
|
|
132
|
+
contextBudget - effectiveInstructionTokens
|
|
133
|
+
);
|
|
134
|
+
if (usage.remainingContextTokens == null) {
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
breakdown.messageTokens = Math.max(
|
|
138
|
+
0,
|
|
139
|
+
contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
92
143
|
type ReasoningKey = 'reasoning_content' | 'reasoning';
|
|
93
144
|
type ReasoningSummary = { summary?: Array<{ text?: string }> };
|
|
94
145
|
type ReasoningDetail = { type?: string; text?: string };
|
|
@@ -825,6 +876,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
825
876
|
agentContexts: Map<string, AgentContext> = new Map();
|
|
826
877
|
/** Default agent ID to use */
|
|
827
878
|
defaultAgentId: string;
|
|
879
|
+
/**
|
|
880
|
+
* Host sink for model usage emitted inside subagent child runs. Threaded
|
|
881
|
+
* into each `SubagentExecutor` this graph creates (and from there into
|
|
882
|
+
* child graphs, so nested subagents report too). See
|
|
883
|
+
* {@link t.StandardGraphInput.subagentUsageSink}.
|
|
884
|
+
*/
|
|
885
|
+
subagentUsageSink?: t.SubagentUsageSink;
|
|
828
886
|
|
|
829
887
|
constructor({
|
|
830
888
|
runId,
|
|
@@ -834,11 +892,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
834
892
|
tokenCounter,
|
|
835
893
|
indexTokenCountMap,
|
|
836
894
|
calibrationRatio,
|
|
895
|
+
subagentUsageSink,
|
|
837
896
|
}: t.StandardGraphInput) {
|
|
838
897
|
super();
|
|
839
898
|
this.runId = runId;
|
|
840
899
|
this.signal = signal;
|
|
841
900
|
this.langfuse = langfuse;
|
|
901
|
+
this.subagentUsageSink = subagentUsageSink;
|
|
842
902
|
|
|
843
903
|
if (agents.length === 0) {
|
|
844
904
|
throw new Error('At least one agent configuration is required');
|
|
@@ -1423,6 +1483,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1423
1483
|
this.config = config;
|
|
1424
1484
|
|
|
1425
1485
|
let messagesToUse = messages;
|
|
1486
|
+
let contextUsage: t.ContextUsageEvent | null = null;
|
|
1426
1487
|
if (
|
|
1427
1488
|
!agentContext.pruneMessages &&
|
|
1428
1489
|
agentContext.tokenCounter &&
|
|
@@ -1462,6 +1523,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1462
1523
|
originalToolContent,
|
|
1463
1524
|
calibrationRatio,
|
|
1464
1525
|
resolvedInstructionOverhead,
|
|
1526
|
+
contextBudget,
|
|
1527
|
+
effectiveInstructionTokens,
|
|
1465
1528
|
} = agentContext.pruneMessages({
|
|
1466
1529
|
messages,
|
|
1467
1530
|
usageMetadata: agentContext.currentUsage,
|
|
@@ -1489,10 +1552,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1489
1552
|
: 1;
|
|
1490
1553
|
if (variance > CALIBRATION_VARIANCE_THRESHOLD) {
|
|
1491
1554
|
agentContext.toolSchemaTokens = calibratedToolTokens;
|
|
1555
|
+
/** Largest-remainder apportionment keeps the per-tool breakdown
|
|
1556
|
+
* summing exactly to the calibrated aggregate */
|
|
1557
|
+
if (agentContext.toolTokenCounts != null && currentToolTokens > 0) {
|
|
1558
|
+
agentContext.toolTokenCounts = apportionTokenCounts(
|
|
1559
|
+
agentContext.toolTokenCounts,
|
|
1560
|
+
calibratedToolTokens / currentToolTokens,
|
|
1561
|
+
calibratedToolTokens
|
|
1562
|
+
);
|
|
1563
|
+
}
|
|
1492
1564
|
}
|
|
1493
1565
|
}
|
|
1494
1566
|
messagesToUse = context;
|
|
1495
1567
|
|
|
1568
|
+
/** Dispatched right before the model invoke — a summarization
|
|
1569
|
+
* detour returns from this node without an LLM call, and the
|
|
1570
|
+
* post-summary retry produces its own snapshot.
|
|
1571
|
+
*
|
|
1572
|
+
* The breakdown describes the post-prune prompt: counts from the
|
|
1573
|
+
* kept context, message tokens derived from the same calibrated
|
|
1574
|
+
* budget math as `remainingContextTokens` (the index map is keyed
|
|
1575
|
+
* by pre-prune state indices, so summing it over `context` would
|
|
1576
|
+
* missum); `prePruneContextTokens` carries the pre-prune metric. */
|
|
1577
|
+
const usageBreakdown = agentContext.getTokenBudgetBreakdown(messages);
|
|
1578
|
+
usageBreakdown.messageCount = context.length;
|
|
1579
|
+
contextUsage = {
|
|
1580
|
+
runId: this.runId,
|
|
1581
|
+
agentId,
|
|
1582
|
+
breakdown: usageBreakdown,
|
|
1583
|
+
contextBudget,
|
|
1584
|
+
effectiveInstructionTokens,
|
|
1585
|
+
prePruneContextTokens,
|
|
1586
|
+
remainingContextTokens,
|
|
1587
|
+
calibrationRatio: agentContext.calibrationRatio,
|
|
1588
|
+
};
|
|
1589
|
+
syncBudgetDerivedFields(contextUsage);
|
|
1590
|
+
|
|
1496
1591
|
const hasPrunedMessages =
|
|
1497
1592
|
agentContext.summarizationEnabled === true &&
|
|
1498
1593
|
Array.isArray(messagesToRefine) &&
|
|
@@ -1598,6 +1693,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1598
1693
|
}
|
|
1599
1694
|
|
|
1600
1695
|
let finalMessages = messagesToUse;
|
|
1696
|
+
/** Tail snapshot for the dispatch-time usage delta: in-place
|
|
1697
|
+
* formatters (artifact appends, Bedrock content rewrites, legacy
|
|
1698
|
+
* string conversion) mutate without changing length or identity —
|
|
1699
|
+
* capture before they run. Legacy string conversion can also touch
|
|
1700
|
+
* messages before the tail, so those convertible indices are
|
|
1701
|
+
* tracked separately (none exist in the common case). */
|
|
1702
|
+
const tailStart = trailingMutationStart(messagesToUse);
|
|
1703
|
+
let preFormatTailTokens: number | null = null;
|
|
1704
|
+
let legacyIndices: number[] | null = null;
|
|
1705
|
+
let preFormatLegacyTokens = 0;
|
|
1706
|
+
if (contextUsage != null && agentContext.tokenCounter != null) {
|
|
1707
|
+
preFormatTailTokens = 0;
|
|
1708
|
+
for (const message of messagesToUse.slice(tailStart)) {
|
|
1709
|
+
preFormatTailTokens += agentContext.tokenCounter(message);
|
|
1710
|
+
}
|
|
1711
|
+
if (agentContext.useLegacyContent) {
|
|
1712
|
+
legacyIndices = [];
|
|
1713
|
+
for (let i = 0; i < tailStart; i++) {
|
|
1714
|
+
if (isLegacyConvertible(messagesToUse[i])) {
|
|
1715
|
+
legacyIndices.push(i);
|
|
1716
|
+
preFormatLegacyTokens += agentContext.tokenCounter(
|
|
1717
|
+
messagesToUse[i]
|
|
1718
|
+
);
|
|
1719
|
+
}
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1601
1723
|
if (agentContext.useLegacyContent) {
|
|
1602
1724
|
finalMessages = formatContentStrings(finalMessages);
|
|
1603
1725
|
}
|
|
@@ -1788,6 +1910,79 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1788
1910
|
);
|
|
1789
1911
|
}
|
|
1790
1912
|
|
|
1913
|
+
/** Past the empty-prompt guard — a model call is now guaranteed */
|
|
1914
|
+
if (contextUsage != null) {
|
|
1915
|
+
const usageRatio =
|
|
1916
|
+
contextUsage.calibrationRatio != null &&
|
|
1917
|
+
contextUsage.calibrationRatio > 0
|
|
1918
|
+
? contextUsage.calibrationRatio
|
|
1919
|
+
: 1;
|
|
1920
|
+
if (
|
|
1921
|
+
agentContext.tokenCounter != null &&
|
|
1922
|
+
finalMessages.length !== messagesToUse.length
|
|
1923
|
+
) {
|
|
1924
|
+
/** Post-prune formatting restructured the payload (e.g. thinking
|
|
1925
|
+
* placeholder collapse, orphan drops) — recount so the gauge
|
|
1926
|
+
* reflects what is actually sent */
|
|
1927
|
+
let rawTokens = 0;
|
|
1928
|
+
for (const message of finalMessages) {
|
|
1929
|
+
rawTokens += agentContext.tokenCounter(message);
|
|
1930
|
+
}
|
|
1931
|
+
contextUsage.breakdown.messageCount = finalMessages.length;
|
|
1932
|
+
if (
|
|
1933
|
+
contextUsage.contextBudget != null &&
|
|
1934
|
+
contextUsage.effectiveInstructionTokens != null
|
|
1935
|
+
) {
|
|
1936
|
+
contextUsage.remainingContextTokens = Math.max(
|
|
1937
|
+
0,
|
|
1938
|
+
contextUsage.contextBudget -
|
|
1939
|
+
contextUsage.effectiveInstructionTokens -
|
|
1940
|
+
Math.round(rawTokens * usageRatio)
|
|
1941
|
+
);
|
|
1942
|
+
}
|
|
1943
|
+
} else if (
|
|
1944
|
+
preFormatTailTokens != null &&
|
|
1945
|
+
agentContext.tokenCounter != null &&
|
|
1946
|
+
contextUsage.remainingContextTokens != null
|
|
1947
|
+
) {
|
|
1948
|
+
/** Same-length formatting can still mutate in place — the trailing
|
|
1949
|
+
* tool batch (artifacts, Bedrock rewrites) and any legacy-converted
|
|
1950
|
+
* messages before it — adjust remaining by the calibrated delta */
|
|
1951
|
+
let postFormatTailTokens = 0;
|
|
1952
|
+
for (const message of finalMessages.slice(tailStart)) {
|
|
1953
|
+
postFormatTailTokens += agentContext.tokenCounter(message);
|
|
1954
|
+
}
|
|
1955
|
+
let formatDelta = postFormatTailTokens - preFormatTailTokens;
|
|
1956
|
+
if (legacyIndices != null && legacyIndices.length > 0) {
|
|
1957
|
+
let postFormatLegacyTokens = 0;
|
|
1958
|
+
for (const index of legacyIndices) {
|
|
1959
|
+
postFormatLegacyTokens += agentContext.tokenCounter(
|
|
1960
|
+
finalMessages[index]
|
|
1961
|
+
);
|
|
1962
|
+
}
|
|
1963
|
+
formatDelta += postFormatLegacyTokens - preFormatLegacyTokens;
|
|
1964
|
+
}
|
|
1965
|
+
if (formatDelta !== 0) {
|
|
1966
|
+
contextUsage.remainingContextTokens = Math.max(
|
|
1967
|
+
0,
|
|
1968
|
+
Math.min(
|
|
1969
|
+
contextUsage.contextBudget ?? Number.MAX_SAFE_INTEGER,
|
|
1970
|
+
contextUsage.remainingContextTokens -
|
|
1971
|
+
Math.round(formatDelta * usageRatio)
|
|
1972
|
+
)
|
|
1973
|
+
);
|
|
1974
|
+
}
|
|
1975
|
+
}
|
|
1976
|
+
syncBudgetDerivedFields(contextUsage);
|
|
1977
|
+
/** Awaited so async host handlers receive the pre-invoke snapshot
|
|
1978
|
+
* before any model deltas are emitted */
|
|
1979
|
+
await safeDispatchCustomEvent(
|
|
1980
|
+
GraphEvents.ON_CONTEXT_USAGE,
|
|
1981
|
+
contextUsage,
|
|
1982
|
+
config
|
|
1983
|
+
);
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1791
1986
|
const invokeStart = Date.now();
|
|
1792
1987
|
const invokeMeta = { runId: this.runId, agentId };
|
|
1793
1988
|
emitAgentLog(
|
|
@@ -2063,6 +2258,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
2063
2258
|
parentAgentId: agentContext.agentId,
|
|
2064
2259
|
langfuse: this.langfuse,
|
|
2065
2260
|
tokenCounter: agentContext.tokenCounter,
|
|
2261
|
+
usageSink: this.subagentUsageSink,
|
|
2066
2262
|
maxDepth: effectiveSubagentDepth,
|
|
2067
2263
|
createChildGraph: (input): StandardGraph => {
|
|
2068
2264
|
const childGraph = new StandardGraph(input);
|
package/src/llm/bedrock/index.ts
CHANGED
|
@@ -34,6 +34,7 @@ import type { BaseMessage, ResponseMetadata } from '@langchain/core/messages';
|
|
|
34
34
|
import type { ChatBedrockConverseInput } from '@langchain/aws';
|
|
35
35
|
import {
|
|
36
36
|
convertToConverseMessages,
|
|
37
|
+
createConverseToolUseStopChunk,
|
|
37
38
|
handleConverseStreamContentBlockStart,
|
|
38
39
|
handleConverseStreamContentBlockDelta,
|
|
39
40
|
handleConverseStreamMetadata,
|
|
@@ -224,6 +225,15 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
224
225
|
}
|
|
225
226
|
|
|
226
227
|
const seenBlockIndices = new Set<number>();
|
|
228
|
+
const toolUseBlockIndices = new Set<number>();
|
|
229
|
+
/**
|
|
230
|
+
* Guardrails can reject an already-streamed toolUse block at
|
|
231
|
+
* `messageStop` (`guardrail_intervened`), after `contentBlockStop` has
|
|
232
|
+
* passed. Only emit eager-execution seals when no guardrails are
|
|
233
|
+
* configured, so a later intervention can't race an eagerly started tool.
|
|
234
|
+
*/
|
|
235
|
+
const sealToolUseOnStop =
|
|
236
|
+
options.guardrailConfig == null && this.guardrailConfig == null;
|
|
227
237
|
|
|
228
238
|
for await (const event of response.stream) {
|
|
229
239
|
if (event.contentBlockStart != null) {
|
|
@@ -234,8 +244,23 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
234
244
|
const idx = event.contentBlockStart.contentBlockIndex;
|
|
235
245
|
if (idx != null) {
|
|
236
246
|
seenBlockIndices.add(idx);
|
|
247
|
+
if (event.contentBlockStart.start?.toolUse != null) {
|
|
248
|
+
toolUseBlockIndices.add(idx);
|
|
249
|
+
}
|
|
237
250
|
}
|
|
238
251
|
yield this.enrichChunk(startChunk, seenBlockIndices);
|
|
252
|
+
|
|
253
|
+
// Registered stream handlers receive chunks through callback
|
|
254
|
+
// events, not the yielded generator — dispatch the start chunk so
|
|
255
|
+
// they see the tool call's id/name (eager chunk state needs both).
|
|
256
|
+
await runManager?.handleLLMNewToken(
|
|
257
|
+
startChunk.text,
|
|
258
|
+
undefined,
|
|
259
|
+
undefined,
|
|
260
|
+
undefined,
|
|
261
|
+
undefined,
|
|
262
|
+
{ chunk: startChunk }
|
|
263
|
+
);
|
|
239
264
|
}
|
|
240
265
|
} else if (event.contentBlockDelta != null) {
|
|
241
266
|
const deltaChunk = handleConverseStreamContentBlockDelta(
|
|
@@ -263,6 +288,21 @@ export class CustomChatBedrockConverse extends ChatBedrockConverse {
|
|
|
263
288
|
const stopIdx = event.contentBlockStop.contentBlockIndex;
|
|
264
289
|
if (stopIdx != null) {
|
|
265
290
|
seenBlockIndices.add(stopIdx);
|
|
291
|
+
if (sealToolUseOnStop && toolUseBlockIndices.has(stopIdx)) {
|
|
292
|
+
// Converse guarantees the block's input is complete at stop, so
|
|
293
|
+
// emit an explicit seal chunk for eager tool execution — through
|
|
294
|
+
// the callback path too, for registered stream handlers.
|
|
295
|
+
const sealChunk = createConverseToolUseStopChunk(stopIdx);
|
|
296
|
+
yield sealChunk;
|
|
297
|
+
await runManager?.handleLLMNewToken(
|
|
298
|
+
sealChunk.text,
|
|
299
|
+
undefined,
|
|
300
|
+
undefined,
|
|
301
|
+
undefined,
|
|
302
|
+
undefined,
|
|
303
|
+
{ chunk: sealChunk }
|
|
304
|
+
);
|
|
305
|
+
}
|
|
266
306
|
}
|
|
267
307
|
} else {
|
|
268
308
|
yield new ChatGenerationChunk({
|