@librechat/agents 3.2.34 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { BaseMessage, AIMessageChunk, SystemMessage } from '@langchain/core/messages';
|
|
1
|
+
import type { BaseMessage, AIMessageChunk, SystemMessage, UsageMetadata } from '@langchain/core/messages';
|
|
2
2
|
import type { BindToolsInput } from '@langchain/core/language_models/chat_models';
|
|
3
3
|
import type { START, StateGraph, StateGraphArgs } from '@langchain/langgraph';
|
|
4
4
|
import type { RunnableConfig, Runnable } from '@langchain/core/runnables';
|
|
@@ -7,10 +7,10 @@ import type { GoogleAIToolType } from '@langchain/google-common';
|
|
|
7
7
|
import type { SummarizationNodeInput, SummarizeCompleteEvent, SummarizationConfig, SummarizeStartEvent, SummarizeDeltaEvent } from '@/types/summarize';
|
|
8
8
|
import type { ToolMap, ToolEndEvent, GenericTool, LCTool, ToolExecuteBatchRequest } from '@/types/tools';
|
|
9
9
|
import type { RunStep, RunStepDeltaEvent, MessageDeltaEvent, ReasoningDeltaEvent } from '@/types/stream';
|
|
10
|
+
import type { TokenCounter, TokenBudgetBreakdown } from '@/types/run';
|
|
10
11
|
import type { Providers, Callback, GraphNodeKeys } from '@/common';
|
|
11
12
|
import type { StandardGraph, MultiAgentGraph } from '@/graphs';
|
|
12
13
|
import type { ClientOptions } from '@/types/llm';
|
|
13
|
-
import type { TokenCounter } from '@/types/run';
|
|
14
14
|
/** Interface for bound model with stream and invoke methods */
|
|
15
15
|
export interface ChatModel {
|
|
16
16
|
stream?: (messages: BaseMessage[], config?: RunnableConfig) => Promise<AsyncIterable<AIMessageChunk>>;
|
|
@@ -44,8 +44,31 @@ export interface AgentLogEvent {
|
|
|
44
44
|
runId?: string;
|
|
45
45
|
agentId?: string;
|
|
46
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Per-model-call context window usage snapshot, dispatched after pruning and
|
|
49
|
+
* before the model invocation. Dispatched once per `callModel` invocation:
|
|
50
|
+
* fallback retries reuse the snapshot since the prompt is identical — budget
|
|
51
|
+
* numbers reflect the primary provider's tokenizer, and the calibration
|
|
52
|
+
* ratio self-corrects from whichever provider reports usage.
|
|
53
|
+
*/
|
|
54
|
+
export interface ContextUsageEvent {
|
|
55
|
+
runId?: string;
|
|
56
|
+
agentId?: string;
|
|
57
|
+
/** Structural token budget snapshot from AgentContext.getTokenBudgetBreakdown */
|
|
58
|
+
breakdown: TokenBudgetBreakdown;
|
|
59
|
+
/** Usable budget this call: maxContextTokens minus output reserve */
|
|
60
|
+
contextBudget?: number;
|
|
61
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
62
|
+
effectiveInstructionTokens?: number;
|
|
63
|
+
/** Calibrated message tokens before pruning (excluding instructions) */
|
|
64
|
+
prePruneContextTokens?: number;
|
|
65
|
+
/** Tokens still free after instructions + pruned messages */
|
|
66
|
+
remainingContextTokens?: number;
|
|
67
|
+
/** EMA ratio of provider-reported vs locally estimated token counts */
|
|
68
|
+
calibrationRatio?: number;
|
|
69
|
+
}
|
|
47
70
|
export interface EventHandler {
|
|
48
|
-
handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | SubagentUpdateEvent | AgentLogEvent | ToolExecuteBatchRequest | {
|
|
71
|
+
handle(event: string, data: StreamEventData | ModelEndData | RunStep | RunStepDeltaEvent | MessageDeltaEvent | ReasoningDeltaEvent | SummarizeStartEvent | SummarizeDeltaEvent | SummarizeCompleteEvent | SubagentUpdateEvent | AgentLogEvent | ContextUsageEvent | ToolExecuteBatchRequest | {
|
|
49
72
|
result: ToolEndEvent;
|
|
50
73
|
}, metadata?: Record<string, unknown>, graph?: StandardGraph | MultiAgentGraph): void | Promise<void>;
|
|
51
74
|
}
|
|
@@ -199,6 +222,17 @@ export type StandardGraphInput = {
|
|
|
199
222
|
tokenCounter?: TokenCounter;
|
|
200
223
|
indexTokenCountMap?: Record<string, number>;
|
|
201
224
|
calibrationRatio?: number;
|
|
225
|
+
/**
|
|
226
|
+
* Receives a {@link SubagentUsageEvent} for every model call made inside
|
|
227
|
+
* a subagent child run spawned from this graph (including nested
|
|
228
|
+
* subagents and child-side summarization calls). Child graphs run via
|
|
229
|
+
* `invoke()` outside the host's `streamEvents` loop, so their
|
|
230
|
+
* `on_chat_model_end` events never reach the run's handler registry —
|
|
231
|
+
* this sink is the only way hosts can observe child token usage for
|
|
232
|
+
* billing/accounting. Parent-graph model calls are NOT reported here;
|
|
233
|
+
* they already flow through the registry's `CHAT_MODEL_END` handler.
|
|
234
|
+
*/
|
|
235
|
+
subagentUsageSink?: SubagentUsageSink;
|
|
202
236
|
};
|
|
203
237
|
export type GraphEdge = {
|
|
204
238
|
/** Agent ID, use a list for multiple sources */
|
|
@@ -289,6 +323,58 @@ export interface SubagentUpdateEvent {
|
|
|
289
323
|
/** ISO timestamp for ordering / display. */
|
|
290
324
|
timestamp: string;
|
|
291
325
|
}
|
|
326
|
+
/**
|
|
327
|
+
* Token usage for a single model call made inside a subagent child run.
|
|
328
|
+
* Emitted through {@link SubagentUsageSink} as each call completes, so
|
|
329
|
+
* hosts can bill child-run model usage that never reaches the parent
|
|
330
|
+
* run's `CHAT_MODEL_END` handler (child graphs execute via `invoke()`
|
|
331
|
+
* outside the host's `streamEvents` loop).
|
|
332
|
+
*/
|
|
333
|
+
export interface SubagentUsageEvent {
|
|
334
|
+
/** Usage metadata reported by the child's model call. */
|
|
335
|
+
usage: UsageMetadata;
|
|
336
|
+
/**
|
|
337
|
+
* Model that produced this usage. Per-call `ls_model_name` from the
|
|
338
|
+
* model's callback metadata when available (covers child-side
|
|
339
|
+
* summarization or any call that differs from the configured model),
|
|
340
|
+
* then the fallback-invocation's configured model (`INVOKED_MODEL`
|
|
341
|
+
* metadata), then the subagent config's `clientOptions` model.
|
|
342
|
+
*/
|
|
343
|
+
model?: string;
|
|
344
|
+
/**
|
|
345
|
+
* Provider that actually served this call — the SDK `Providers` enum
|
|
346
|
+
* value stamped per-invocation by `attemptInvoke` (`INVOKED_PROVIDER`
|
|
347
|
+
* metadata), so fallback-served calls are attributed to the fallback
|
|
348
|
+
* provider, not the configured primary. Falls back to the subagent
|
|
349
|
+
* config's provider. Never LangSmith's `ls_provider` string — derived
|
|
350
|
+
* providers inherit that from their base class, and hosts key
|
|
351
|
+
* pricing/cache semantics off the enum.
|
|
352
|
+
*/
|
|
353
|
+
provider?: string;
|
|
354
|
+
/** Subagent `type` identifier from the SubagentConfig. */
|
|
355
|
+
subagentType: string;
|
|
356
|
+
/** Child run ID (unique per subagent execution). */
|
|
357
|
+
subagentRunId: string;
|
|
358
|
+
/** Child agent ID assigned to this subagent execution. */
|
|
359
|
+
subagentAgentId: string;
|
|
360
|
+
/**
|
|
361
|
+
* ROOT run ID of the host run that owns billing. For nested subagents
|
|
362
|
+
* each forwarding layer rewrites this upward, so events from any depth
|
|
363
|
+
* surface with the outermost run's ID — never an intermediate
|
|
364
|
+
* `*_sub_*` child id (use {@link subagentRunId} to identify the
|
|
365
|
+
* emitting child).
|
|
366
|
+
*/
|
|
367
|
+
runId: string;
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Host-provided callback receiving {@link SubagentUsageEvent}s. Invoked as
|
|
371
|
+
* each child model call completes. May return a promise — the executor
|
|
372
|
+
* awaits each dispatch (so all usage is recorded before the child's result
|
|
373
|
+
* resolves to the parent) and swallows both synchronous throws and
|
|
374
|
+
* rejections; implementations should still be cheap, as they sit on the
|
|
375
|
+
* child's model-call path.
|
|
376
|
+
*/
|
|
377
|
+
export type SubagentUsageSink = (event: SubagentUsageEvent) => void | Promise<void>;
|
|
292
378
|
export type LangfuseToolOutputTracingConfig = {
|
|
293
379
|
/**
|
|
294
380
|
* Whether tool outputs should be exported to Langfuse. Defaults to
|
|
@@ -111,6 +111,15 @@ export type RunConfig = {
|
|
|
111
111
|
*/
|
|
112
112
|
langfuse?: g.LangfuseConfig;
|
|
113
113
|
customHandlers?: Record<string, g.EventHandler>;
|
|
114
|
+
/**
|
|
115
|
+
* Receives token usage for every model call made inside subagent child
|
|
116
|
+
* runs (including nested subagents). Child graphs execute via `invoke()`
|
|
117
|
+
* outside this run's `streamEvents` loop, so their model-end events never
|
|
118
|
+
* reach `customHandlers` — without this sink, child usage is invisible to
|
|
119
|
+
* the host. Parent-graph calls are not reported here; they flow through
|
|
120
|
+
* the registered `CHAT_MODEL_END` handler as usual.
|
|
121
|
+
*/
|
|
122
|
+
subagentUsageSink?: g.SubagentUsageSink;
|
|
114
123
|
/**
|
|
115
124
|
* Pre-constructed hook registry for this run. Hooks fire at lifecycle
|
|
116
125
|
* points in `processStream` (RunStart, UserPromptSubmit, Stop,
|
|
@@ -225,6 +234,10 @@ export type TokenBudgetBreakdown = {
|
|
|
225
234
|
messageTokens: number;
|
|
226
235
|
/** Tokens available for messages after instructions. */
|
|
227
236
|
availableForMessages: number;
|
|
237
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name. */
|
|
238
|
+
toolTokenCounts?: Record<string, number>;
|
|
239
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
240
|
+
deferredToolNames?: string[];
|
|
228
241
|
};
|
|
229
242
|
export type EventStreamOptions = {
|
|
230
243
|
callbacks?: g.ClientCallbacks;
|
|
@@ -15,6 +15,13 @@ export declare function estimateAnthropicImageTokens(width: number, height: numb
|
|
|
15
15
|
export declare function estimateOpenAIImageTokens(width: number, height: number, detail?: string): number;
|
|
16
16
|
export declare function encodingForModel(model: string): EncodingName;
|
|
17
17
|
export declare function getTokenCountForMessage(message: BaseMessage, getTokenCount: (text: string) => number, encoding?: EncodingName): number;
|
|
18
|
+
/**
|
|
19
|
+
* Largest-remainder apportionment: scales each count by `multiplier` and
|
|
20
|
+
* distributes the rounding remainder so the results sum exactly to
|
|
21
|
+
* `targetTotal`. Keeps per-item breakdowns reconciled with an aggregate
|
|
22
|
+
* computed as a single rounded product of the summed raw counts.
|
|
23
|
+
*/
|
|
24
|
+
export declare function apportionTokenCounts(rawCounts: Record<string, number>, multiplier: number, targetTotal: number): Record<string, number>;
|
|
18
25
|
/**
|
|
19
26
|
* Creates a token counter function using the specified encoding.
|
|
20
27
|
* Lazily loads the encoding data on first use via dynamic import.
|
package/package.json
CHANGED
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
addCacheControlToStablePrefixMessages,
|
|
22
22
|
} from '@/messages/cache';
|
|
23
23
|
import { createSchemaOnlyTools } from '@/tools/schema';
|
|
24
|
+
import { apportionTokenCounts } from '@/utils/tokens';
|
|
24
25
|
import { DEFAULT_RESERVE_RATIO } from '@/messages';
|
|
25
26
|
import { toJsonSchema } from '@/utils/schema';
|
|
26
27
|
|
|
@@ -191,6 +192,11 @@ export class AgentContext {
|
|
|
191
192
|
dynamicInstructionTokens: number = 0;
|
|
192
193
|
/** Token count for tool schemas only. */
|
|
193
194
|
toolSchemaTokens: number = 0;
|
|
195
|
+
/** Per-tool schema token counts (post-multiplier), keyed by tool name.
|
|
196
|
+
* `undefined` when not calculated (e.g. cached aggregate schema tokens). */
|
|
197
|
+
toolTokenCounts?: Record<string, number>;
|
|
198
|
+
/** Names of counted tools that are deferred (`defer_loading`) and discovered. */
|
|
199
|
+
deferredToolNames: string[] = [];
|
|
194
200
|
/** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
|
|
195
201
|
calibrationRatio: number = 1;
|
|
196
202
|
/** Provider-observed instruction overhead from the pruner's best-variance turn. */
|
|
@@ -894,6 +900,8 @@ export class AgentContext {
|
|
|
894
900
|
this.systemMessageTokens = 0;
|
|
895
901
|
this.dynamicInstructionTokens = 0;
|
|
896
902
|
this.toolSchemaTokens = 0;
|
|
903
|
+
this.toolTokenCounts = undefined;
|
|
904
|
+
this.deferredToolNames = [];
|
|
897
905
|
this.cachedSystemRunnable = undefined;
|
|
898
906
|
this.systemRunnableStale = true;
|
|
899
907
|
this.lastToken = undefined;
|
|
@@ -1006,6 +1014,10 @@ export class AgentContext {
|
|
|
1006
1014
|
): Promise<void> {
|
|
1007
1015
|
let toolTokens = 0;
|
|
1008
1016
|
const countedToolNames = new Set<string>();
|
|
1017
|
+
/** Prototype-free: external tool names like `toString` must not hit
|
|
1018
|
+
* inherited properties during accumulation */
|
|
1019
|
+
const rawToolTokenCounts: Record<string, number> = Object.create(null);
|
|
1020
|
+
const deferredCountedNames = new Set<string>();
|
|
1009
1021
|
|
|
1010
1022
|
/**
|
|
1011
1023
|
* Iterate both `tools` (user-provided instance tools) and `graphTools`
|
|
@@ -1040,11 +1052,14 @@ export class AgentContext {
|
|
|
1040
1052
|
toolName,
|
|
1041
1053
|
(genericTool.description as string | undefined) ?? ''
|
|
1042
1054
|
);
|
|
1043
|
-
|
|
1055
|
+
const schemaTokens = tokenCounter(
|
|
1044
1056
|
new SystemMessage(JSON.stringify(jsonSchema))
|
|
1045
1057
|
);
|
|
1058
|
+
toolTokens += schemaTokens;
|
|
1046
1059
|
if (toolName) {
|
|
1047
1060
|
countedToolNames.add(toolName);
|
|
1061
|
+
rawToolTokenCounts[toolName] =
|
|
1062
|
+
(rawToolTokenCounts[toolName] ?? 0) + schemaTokens;
|
|
1048
1063
|
}
|
|
1049
1064
|
}
|
|
1050
1065
|
}
|
|
@@ -1062,7 +1077,16 @@ export class AgentContext {
|
|
|
1062
1077
|
parameters: def.parameters ?? {},
|
|
1063
1078
|
},
|
|
1064
1079
|
};
|
|
1065
|
-
|
|
1080
|
+
const schemaTokens = tokenCounter(
|
|
1081
|
+
new SystemMessage(JSON.stringify(schema))
|
|
1082
|
+
);
|
|
1083
|
+
toolTokens += schemaTokens;
|
|
1084
|
+
countedToolNames.add(def.name);
|
|
1085
|
+
rawToolTokenCounts[def.name] =
|
|
1086
|
+
(rawToolTokenCounts[def.name] ?? 0) + schemaTokens;
|
|
1087
|
+
if (def.defer_loading === true) {
|
|
1088
|
+
deferredCountedNames.add(def.name);
|
|
1089
|
+
}
|
|
1066
1090
|
}
|
|
1067
1091
|
|
|
1068
1092
|
const isAnthropic =
|
|
@@ -1077,6 +1101,25 @@ export class AgentContext {
|
|
|
1077
1101
|
? ANTHROPIC_TOOL_TOKEN_MULTIPLIER
|
|
1078
1102
|
: DEFAULT_TOOL_TOKEN_MULTIPLIER;
|
|
1079
1103
|
this.toolSchemaTokens = Math.ceil(toolTokens * toolTokenMultiplier);
|
|
1104
|
+
|
|
1105
|
+
/** Largest-remainder apportionment keeps the per-tool counts summing
|
|
1106
|
+
* exactly to the aggregate despite per-entry rounding */
|
|
1107
|
+
const toolTokenCounts = apportionTokenCounts(
|
|
1108
|
+
rawToolTokenCounts,
|
|
1109
|
+
toolTokenMultiplier,
|
|
1110
|
+
this.toolSchemaTokens
|
|
1111
|
+
);
|
|
1112
|
+
const deferredToolNames: string[] = [];
|
|
1113
|
+
for (const name of Object.keys(rawToolTokenCounts)) {
|
|
1114
|
+
if (
|
|
1115
|
+
deferredCountedNames.has(name) ||
|
|
1116
|
+
this.toolRegistry?.get(name)?.defer_loading === true
|
|
1117
|
+
) {
|
|
1118
|
+
deferredToolNames.push(name);
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
this.toolTokenCounts = toolTokenCounts;
|
|
1122
|
+
this.deferredToolNames = deferredToolNames;
|
|
1080
1123
|
}
|
|
1081
1124
|
|
|
1082
1125
|
/**
|
|
@@ -1212,9 +1255,8 @@ export class AgentContext {
|
|
|
1212
1255
|
* Returns a structured breakdown of how the context token budget is consumed.
|
|
1213
1256
|
* Useful for diagnostics when context overflow or pruning issues occur.
|
|
1214
1257
|
*
|
|
1215
|
-
* Note: `
|
|
1216
|
-
*
|
|
1217
|
-
* recomputed when `markToolsAsDiscovered` is called mid-run.
|
|
1258
|
+
* Note: `markToolsAsDiscovered` re-triggers `calculateInstructionTokens`,
|
|
1259
|
+
* so `toolSchemaTokens`/`toolTokenCounts` refresh before the next call.
|
|
1218
1260
|
*/
|
|
1219
1261
|
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
|
|
1220
1262
|
const maxContextTokens = this.maxContextTokens ?? 0;
|
|
@@ -1238,7 +1280,14 @@ export class AgentContext {
|
|
|
1238
1280
|
}
|
|
1239
1281
|
}
|
|
1240
1282
|
|
|
1241
|
-
|
|
1283
|
+
/** Mirror the pruner's reserve math so availableForMessages agrees
|
|
1284
|
+
* with the contextBudget computed during pruning */
|
|
1285
|
+
const reserveRatio =
|
|
1286
|
+
this.summarizationConfig?.reserveRatio ?? DEFAULT_RESERVE_RATIO;
|
|
1287
|
+
const reserveTokens =
|
|
1288
|
+
reserveRatio > 0 && reserveRatio < 1
|
|
1289
|
+
? Math.round(maxContextTokens * reserveRatio)
|
|
1290
|
+
: 0;
|
|
1242
1291
|
const availableForMessages = Math.max(
|
|
1243
1292
|
0,
|
|
1244
1293
|
maxContextTokens - reserveTokens - this.instructionTokens
|
|
@@ -1255,6 +1304,12 @@ export class AgentContext {
|
|
|
1255
1304
|
messageCount,
|
|
1256
1305
|
messageTokens,
|
|
1257
1306
|
availableForMessages,
|
|
1307
|
+
toolTokenCounts:
|
|
1308
|
+
this.toolTokenCounts != null ? { ...this.toolTokenCounts } : undefined,
|
|
1309
|
+
deferredToolNames:
|
|
1310
|
+
this.deferredToolNames.length > 0
|
|
1311
|
+
? [...this.deferredToolNames]
|
|
1312
|
+
: undefined,
|
|
1258
1313
|
};
|
|
1259
1314
|
}
|
|
1260
1315
|
|
|
@@ -1324,6 +1379,14 @@ export class AgentContext {
|
|
|
1324
1379
|
}
|
|
1325
1380
|
if (hasNewDiscoveries) {
|
|
1326
1381
|
this.systemRunnableStale = true;
|
|
1382
|
+
/** Refresh schema token accounting so the next call's budget and
|
|
1383
|
+
* per-tool breakdown include the newly discovered tools; awaited
|
|
1384
|
+
* via tokenCalculationPromise before the next model call */
|
|
1385
|
+
if (this.tokenCounter) {
|
|
1386
|
+
this.tokenCalculationPromise = this.calculateInstructionTokens(
|
|
1387
|
+
this.tokenCounter
|
|
1388
|
+
);
|
|
1389
|
+
}
|
|
1327
1390
|
}
|
|
1328
1391
|
return hasNewDiscoveries;
|
|
1329
1392
|
}
|
|
@@ -1414,7 +1414,7 @@ describe('AgentContext', () => {
|
|
|
1414
1414
|
expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
|
|
1415
1415
|
});
|
|
1416
1416
|
|
|
1417
|
-
it('toolSchemaTokens
|
|
1417
|
+
it('refreshes toolSchemaTokens and per-tool counts after markToolsAsDiscovered', async () => {
|
|
1418
1418
|
const toolDefinitions: t.LCTool[] = [
|
|
1419
1419
|
{
|
|
1420
1420
|
name: 'deferred',
|
|
@@ -1431,9 +1431,13 @@ describe('AgentContext', () => {
|
|
|
1431
1431
|
|
|
1432
1432
|
await ctx.tokenCalculationPromise;
|
|
1433
1433
|
expect(ctx.toolSchemaTokens).toBe(0);
|
|
1434
|
+
expect(ctx.toolTokenCounts).toEqual({});
|
|
1434
1435
|
|
|
1435
1436
|
ctx.markToolsAsDiscovered(['deferred']);
|
|
1436
|
-
|
|
1437
|
+
await ctx.tokenCalculationPromise;
|
|
1438
|
+
expect(ctx.toolSchemaTokens).toBeGreaterThan(0);
|
|
1439
|
+
expect(ctx.toolTokenCounts?.deferred).toBeGreaterThan(0);
|
|
1440
|
+
expect(ctx.deferredToolNames).toContain('deferred');
|
|
1437
1441
|
});
|
|
1438
1442
|
});
|
|
1439
1443
|
|
package/src/common/enum.ts
CHANGED
|
@@ -31,6 +31,8 @@ export enum GraphEvents {
|
|
|
31
31
|
ON_SUBAGENT_UPDATE = 'on_subagent_update',
|
|
32
32
|
/** [Custom] Diagnostic logging event for context management observability */
|
|
33
33
|
ON_AGENT_LOG = 'on_agent_log',
|
|
34
|
+
/** [Custom] Per-model-call context window usage snapshot (post-prune token budget) */
|
|
35
|
+
ON_CONTEXT_USAGE = 'on_context_usage',
|
|
34
36
|
|
|
35
37
|
/* Official Events */
|
|
36
38
|
|
|
@@ -185,6 +187,17 @@ export enum Constants {
|
|
|
185
187
|
/** Anthropic server tool ID prefix (web_search, code_execution, etc.) */
|
|
186
188
|
ANTHROPIC_SERVER_TOOL_PREFIX = 'srvtoolu_',
|
|
187
189
|
SKILL_TOOL = 'skill',
|
|
190
|
+
/**
|
|
191
|
+
* Callback-metadata keys stamped by `attemptInvoke` /
|
|
192
|
+
* `tryFallbackProviders` carrying the provider (SDK `Providers` enum
|
|
193
|
+
* value) and configured model that actually served a model invocation.
|
|
194
|
+
* Unlike `ls_provider` — which derived providers inherit from their base
|
|
195
|
+
* class (e.g. DeepSeek/OpenRouter report `'openai'`) — these reflect the
|
|
196
|
+
* SDK's own routing, including fallback-provider calls. Consumed by the
|
|
197
|
+
* subagent usage-capture handler to tag billing events.
|
|
198
|
+
*/
|
|
199
|
+
INVOKED_PROVIDER = '__invoked_provider',
|
|
200
|
+
INVOKED_MODEL = '__invoked_model',
|
|
188
201
|
READ_FILE = 'read_file',
|
|
189
202
|
BASH_TOOL = 'bash_tool',
|
|
190
203
|
BASH_PROGRAMMATIC_TOOL_CALLING = 'run_tools_with_bash',
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
formatArtifactPayload,
|
|
24
24
|
enforceOriginalContentCap,
|
|
25
25
|
formatContentStrings,
|
|
26
|
+
isLegacyConvertible,
|
|
26
27
|
createPruneMessages,
|
|
27
28
|
addCacheControl,
|
|
28
29
|
getMessageId,
|
|
@@ -45,6 +46,7 @@ import {
|
|
|
45
46
|
isAnthropicLike,
|
|
46
47
|
isOpenAILike,
|
|
47
48
|
isGoogleLike,
|
|
49
|
+
apportionTokenCounts,
|
|
48
50
|
joinKeys,
|
|
49
51
|
sleep,
|
|
50
52
|
} from '@/utils';
|
|
@@ -89,6 +91,55 @@ const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
|
|
|
89
91
|
/** Minimum relative variance before calibrated toolSchemaTokens overrides current value. */
|
|
90
92
|
const CALIBRATION_VARIANCE_THRESHOLD = 0.15;
|
|
91
93
|
|
|
94
|
+
/**
|
|
95
|
+
* Start index of the span post-prune formatters can mutate in place: the
|
|
96
|
+
* trailing tool batch plus its owning AI message (artifact formatting touches
|
|
97
|
+
* every tool result after the last AI tool call; Bedrock rewrites the AI
|
|
98
|
+
* message before a trailing tool result). Capped so the usage-snapshot
|
|
99
|
+
* recount stays constant-cost.
|
|
100
|
+
*/
|
|
101
|
+
function trailingMutationStart(messages: BaseMessage[]): number {
|
|
102
|
+
const MAX_SPAN = 16;
|
|
103
|
+
let index = messages.length - 1;
|
|
104
|
+
while (
|
|
105
|
+
index >= 0 &&
|
|
106
|
+
messages[index]?.getType() === 'tool' &&
|
|
107
|
+
messages.length - index < MAX_SPAN
|
|
108
|
+
) {
|
|
109
|
+
index--;
|
|
110
|
+
}
|
|
111
|
+
return Math.max(0, Math.min(index, messages.length - 2));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Re-derives the breakdown fields coupled to the calibrated budget math so
|
|
116
|
+
* the snapshot stays internally consistent: the aggregate
|
|
117
|
+
* `instructionTokens`/`availableForMessages` reflect the pruner's effective
|
|
118
|
+
* (calibrated) overhead — component fields remain local estimates — and
|
|
119
|
+
* `messageTokens` mirrors `contextBudget - instructions - remaining`.
|
|
120
|
+
*/
|
|
121
|
+
function syncBudgetDerivedFields(usage: t.ContextUsageEvent): void {
|
|
122
|
+
const { breakdown, contextBudget, effectiveInstructionTokens } = usage;
|
|
123
|
+
if (effectiveInstructionTokens == null) {
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
breakdown.instructionTokens = effectiveInstructionTokens;
|
|
127
|
+
if (contextBudget == null) {
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
breakdown.availableForMessages = Math.max(
|
|
131
|
+
0,
|
|
132
|
+
contextBudget - effectiveInstructionTokens
|
|
133
|
+
);
|
|
134
|
+
if (usage.remainingContextTokens == null) {
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
breakdown.messageTokens = Math.max(
|
|
138
|
+
0,
|
|
139
|
+
contextBudget - effectiveInstructionTokens - usage.remainingContextTokens
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
92
143
|
type ReasoningKey = 'reasoning_content' | 'reasoning';
|
|
93
144
|
type ReasoningSummary = { summary?: Array<{ text?: string }> };
|
|
94
145
|
type ReasoningDetail = { type?: string; text?: string };
|
|
@@ -825,6 +876,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
825
876
|
agentContexts: Map<string, AgentContext> = new Map();
|
|
826
877
|
/** Default agent ID to use */
|
|
827
878
|
defaultAgentId: string;
|
|
879
|
+
/**
|
|
880
|
+
* Host sink for model usage emitted inside subagent child runs. Threaded
|
|
881
|
+
* into each `SubagentExecutor` this graph creates (and from there into
|
|
882
|
+
* child graphs, so nested subagents report too). See
|
|
883
|
+
* {@link t.StandardGraphInput.subagentUsageSink}.
|
|
884
|
+
*/
|
|
885
|
+
subagentUsageSink?: t.SubagentUsageSink;
|
|
828
886
|
|
|
829
887
|
constructor({
|
|
830
888
|
runId,
|
|
@@ -834,11 +892,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
834
892
|
tokenCounter,
|
|
835
893
|
indexTokenCountMap,
|
|
836
894
|
calibrationRatio,
|
|
895
|
+
subagentUsageSink,
|
|
837
896
|
}: t.StandardGraphInput) {
|
|
838
897
|
super();
|
|
839
898
|
this.runId = runId;
|
|
840
899
|
this.signal = signal;
|
|
841
900
|
this.langfuse = langfuse;
|
|
901
|
+
this.subagentUsageSink = subagentUsageSink;
|
|
842
902
|
|
|
843
903
|
if (agents.length === 0) {
|
|
844
904
|
throw new Error('At least one agent configuration is required');
|
|
@@ -1423,6 +1483,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1423
1483
|
this.config = config;
|
|
1424
1484
|
|
|
1425
1485
|
let messagesToUse = messages;
|
|
1486
|
+
let contextUsage: t.ContextUsageEvent | null = null;
|
|
1426
1487
|
if (
|
|
1427
1488
|
!agentContext.pruneMessages &&
|
|
1428
1489
|
agentContext.tokenCounter &&
|
|
@@ -1462,6 +1523,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1462
1523
|
originalToolContent,
|
|
1463
1524
|
calibrationRatio,
|
|
1464
1525
|
resolvedInstructionOverhead,
|
|
1526
|
+
contextBudget,
|
|
1527
|
+
effectiveInstructionTokens,
|
|
1465
1528
|
} = agentContext.pruneMessages({
|
|
1466
1529
|
messages,
|
|
1467
1530
|
usageMetadata: agentContext.currentUsage,
|
|
@@ -1489,10 +1552,42 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1489
1552
|
: 1;
|
|
1490
1553
|
if (variance > CALIBRATION_VARIANCE_THRESHOLD) {
|
|
1491
1554
|
agentContext.toolSchemaTokens = calibratedToolTokens;
|
|
1555
|
+
/** Largest-remainder apportionment keeps the per-tool breakdown
|
|
1556
|
+
* summing exactly to the calibrated aggregate */
|
|
1557
|
+
if (agentContext.toolTokenCounts != null && currentToolTokens > 0) {
|
|
1558
|
+
agentContext.toolTokenCounts = apportionTokenCounts(
|
|
1559
|
+
agentContext.toolTokenCounts,
|
|
1560
|
+
calibratedToolTokens / currentToolTokens,
|
|
1561
|
+
calibratedToolTokens
|
|
1562
|
+
);
|
|
1563
|
+
}
|
|
1492
1564
|
}
|
|
1493
1565
|
}
|
|
1494
1566
|
messagesToUse = context;
|
|
1495
1567
|
|
|
1568
|
+
/** Dispatched right before the model invoke — a summarization
|
|
1569
|
+
* detour returns from this node without an LLM call, and the
|
|
1570
|
+
* post-summary retry produces its own snapshot.
|
|
1571
|
+
*
|
|
1572
|
+
* The breakdown describes the post-prune prompt: counts from the
|
|
1573
|
+
* kept context, message tokens derived from the same calibrated
|
|
1574
|
+
* budget math as `remainingContextTokens` (the index map is keyed
|
|
1575
|
+
* by pre-prune state indices, so summing it over `context` would
|
|
1576
|
+
* missum); `prePruneContextTokens` carries the pre-prune metric. */
|
|
1577
|
+
const usageBreakdown = agentContext.getTokenBudgetBreakdown(messages);
|
|
1578
|
+
usageBreakdown.messageCount = context.length;
|
|
1579
|
+
contextUsage = {
|
|
1580
|
+
runId: this.runId,
|
|
1581
|
+
agentId,
|
|
1582
|
+
breakdown: usageBreakdown,
|
|
1583
|
+
contextBudget,
|
|
1584
|
+
effectiveInstructionTokens,
|
|
1585
|
+
prePruneContextTokens,
|
|
1586
|
+
remainingContextTokens,
|
|
1587
|
+
calibrationRatio: agentContext.calibrationRatio,
|
|
1588
|
+
};
|
|
1589
|
+
syncBudgetDerivedFields(contextUsage);
|
|
1590
|
+
|
|
1496
1591
|
const hasPrunedMessages =
|
|
1497
1592
|
agentContext.summarizationEnabled === true &&
|
|
1498
1593
|
Array.isArray(messagesToRefine) &&
|
|
@@ -1598,6 +1693,33 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1598
1693
|
}
|
|
1599
1694
|
|
|
1600
1695
|
let finalMessages = messagesToUse;
|
|
1696
|
+
/** Tail snapshot for the dispatch-time usage delta: in-place
|
|
1697
|
+
* formatters (artifact appends, Bedrock content rewrites, legacy
|
|
1698
|
+
* string conversion) mutate without changing length or identity —
|
|
1699
|
+
* capture before they run. Legacy string conversion can also touch
|
|
1700
|
+
* messages before the tail, so those convertible indices are
|
|
1701
|
+
* tracked separately (none exist in the common case). */
|
|
1702
|
+
const tailStart = trailingMutationStart(messagesToUse);
|
|
1703
|
+
let preFormatTailTokens: number | null = null;
|
|
1704
|
+
let legacyIndices: number[] | null = null;
|
|
1705
|
+
let preFormatLegacyTokens = 0;
|
|
1706
|
+
if (contextUsage != null && agentContext.tokenCounter != null) {
|
|
1707
|
+
preFormatTailTokens = 0;
|
|
1708
|
+
for (const message of messagesToUse.slice(tailStart)) {
|
|
1709
|
+
preFormatTailTokens += agentContext.tokenCounter(message);
|
|
1710
|
+
}
|
|
1711
|
+
if (agentContext.useLegacyContent) {
|
|
1712
|
+
legacyIndices = [];
|
|
1713
|
+
for (let i = 0; i < tailStart; i++) {
|
|
1714
|
+
if (isLegacyConvertible(messagesToUse[i])) {
|
|
1715
|
+
legacyIndices.push(i);
|
|
1716
|
+
preFormatLegacyTokens += agentContext.tokenCounter(
|
|
1717
|
+
messagesToUse[i]
|
|
1718
|
+
);
|
|
1719
|
+
}
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1601
1723
|
if (agentContext.useLegacyContent) {
|
|
1602
1724
|
finalMessages = formatContentStrings(finalMessages);
|
|
1603
1725
|
}
|
|
@@ -1788,6 +1910,79 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1788
1910
|
);
|
|
1789
1911
|
}
|
|
1790
1912
|
|
|
1913
|
+
/** Past the empty-prompt guard — a model call is now guaranteed */
|
|
1914
|
+
if (contextUsage != null) {
|
|
1915
|
+
const usageRatio =
|
|
1916
|
+
contextUsage.calibrationRatio != null &&
|
|
1917
|
+
contextUsage.calibrationRatio > 0
|
|
1918
|
+
? contextUsage.calibrationRatio
|
|
1919
|
+
: 1;
|
|
1920
|
+
if (
|
|
1921
|
+
agentContext.tokenCounter != null &&
|
|
1922
|
+
finalMessages.length !== messagesToUse.length
|
|
1923
|
+
) {
|
|
1924
|
+
/** Post-prune formatting restructured the payload (e.g. thinking
|
|
1925
|
+
* placeholder collapse, orphan drops) — recount so the gauge
|
|
1926
|
+
* reflects what is actually sent */
|
|
1927
|
+
let rawTokens = 0;
|
|
1928
|
+
for (const message of finalMessages) {
|
|
1929
|
+
rawTokens += agentContext.tokenCounter(message);
|
|
1930
|
+
}
|
|
1931
|
+
contextUsage.breakdown.messageCount = finalMessages.length;
|
|
1932
|
+
if (
|
|
1933
|
+
contextUsage.contextBudget != null &&
|
|
1934
|
+
contextUsage.effectiveInstructionTokens != null
|
|
1935
|
+
) {
|
|
1936
|
+
contextUsage.remainingContextTokens = Math.max(
|
|
1937
|
+
0,
|
|
1938
|
+
contextUsage.contextBudget -
|
|
1939
|
+
contextUsage.effectiveInstructionTokens -
|
|
1940
|
+
Math.round(rawTokens * usageRatio)
|
|
1941
|
+
);
|
|
1942
|
+
}
|
|
1943
|
+
} else if (
|
|
1944
|
+
preFormatTailTokens != null &&
|
|
1945
|
+
agentContext.tokenCounter != null &&
|
|
1946
|
+
contextUsage.remainingContextTokens != null
|
|
1947
|
+
) {
|
|
1948
|
+
/** Same-length formatting can still mutate in place — the trailing
|
|
1949
|
+
* tool batch (artifacts, Bedrock rewrites) and any legacy-converted
|
|
1950
|
+
* messages before it — adjust remaining by the calibrated delta */
|
|
1951
|
+
let postFormatTailTokens = 0;
|
|
1952
|
+
for (const message of finalMessages.slice(tailStart)) {
|
|
1953
|
+
postFormatTailTokens += agentContext.tokenCounter(message);
|
|
1954
|
+
}
|
|
1955
|
+
let formatDelta = postFormatTailTokens - preFormatTailTokens;
|
|
1956
|
+
if (legacyIndices != null && legacyIndices.length > 0) {
|
|
1957
|
+
let postFormatLegacyTokens = 0;
|
|
1958
|
+
for (const index of legacyIndices) {
|
|
1959
|
+
postFormatLegacyTokens += agentContext.tokenCounter(
|
|
1960
|
+
finalMessages[index]
|
|
1961
|
+
);
|
|
1962
|
+
}
|
|
1963
|
+
formatDelta += postFormatLegacyTokens - preFormatLegacyTokens;
|
|
1964
|
+
}
|
|
1965
|
+
if (formatDelta !== 0) {
|
|
1966
|
+
contextUsage.remainingContextTokens = Math.max(
|
|
1967
|
+
0,
|
|
1968
|
+
Math.min(
|
|
1969
|
+
contextUsage.contextBudget ?? Number.MAX_SAFE_INTEGER,
|
|
1970
|
+
contextUsage.remainingContextTokens -
|
|
1971
|
+
Math.round(formatDelta * usageRatio)
|
|
1972
|
+
)
|
|
1973
|
+
);
|
|
1974
|
+
}
|
|
1975
|
+
}
|
|
1976
|
+
syncBudgetDerivedFields(contextUsage);
|
|
1977
|
+
/** Awaited so async host handlers receive the pre-invoke snapshot
|
|
1978
|
+
* before any model deltas are emitted */
|
|
1979
|
+
await safeDispatchCustomEvent(
|
|
1980
|
+
GraphEvents.ON_CONTEXT_USAGE,
|
|
1981
|
+
contextUsage,
|
|
1982
|
+
config
|
|
1983
|
+
);
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1791
1986
|
const invokeStart = Date.now();
|
|
1792
1987
|
const invokeMeta = { runId: this.runId, agentId };
|
|
1793
1988
|
emitAgentLog(
|
|
@@ -2063,6 +2258,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
2063
2258
|
parentAgentId: agentContext.agentId,
|
|
2064
2259
|
langfuse: this.langfuse,
|
|
2065
2260
|
tokenCounter: agentContext.tokenCounter,
|
|
2261
|
+
usageSink: this.subagentUsageSink,
|
|
2066
2262
|
maxDepth: effectiveSubagentDepth,
|
|
2067
2263
|
createChildGraph: (input): StandardGraph => {
|
|
2068
2264
|
const childGraph = new StandardGraph(input);
|