@librechat/agents 3.2.33 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +21 -2
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +38 -2
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +6 -0
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +48 -1
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +19 -0
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +20 -2
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +41 -4
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs +30 -1
- package/dist/cjs/tools/streamedToolCallSeals.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +22 -3
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +38 -3
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +6 -0
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +48 -1
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +19 -0
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +21 -3
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +41 -4
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/streamedToolCallSeals.mjs +25 -2
- package/dist/esm/tools/streamedToolCallSeals.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/bedrock/utils/index.d.ts +1 -1
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +9 -0
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/llm/vertexai/index.d.ts +10 -0
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/ToolNode.d.ts +8 -0
- package/dist/types/tools/streamedToolCallSeals.d.ts +5 -1
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/types/tools.d.ts +10 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/__tests__/stream.eagerEventExecution.test.ts +703 -0
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/bedrock/index.ts +40 -0
- package/src/llm/bedrock/streamSealDispatch.test.ts +158 -0
- package/src/llm/bedrock/utils/index.ts +1 -0
- package/src/llm/bedrock/utils/message_outputs.test.ts +85 -0
- package/src/llm/bedrock/utils/message_outputs.ts +43 -0
- package/src/llm/google/utils/common.test.ts +64 -0
- package/src/llm/google/utils/common.ts +18 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/llm/openai/index.ts +95 -1
- package/src/llm/openai/sequentialToolCallSeals.test.ts +199 -0
- package/src/llm/vertexai/index.ts +31 -0
- package/src/llm/vertexai/sealStreamedToolCalls.test.ts +88 -0
- package/src/llm/vertexai/streamSealDispatch.test.ts +148 -0
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/stream.ts +40 -6
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/ToolNode.ts +85 -3
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/__tests__/ToolNode.onResultCompletion.test.ts +368 -0
- package/src/tools/streamedToolCallSeals.ts +37 -9
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/types/tools.ts +10 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
// src/specs/context-usage.live.test.ts
|
|
2
|
+
/**
|
|
3
|
+
* Live ON_CONTEXT_USAGE / usage accounting verification with real Anthropic
|
|
4
|
+
* calls — single agent, multi-agent handoff, and subagent isolation.
|
|
5
|
+
*
|
|
6
|
+
* Run with:
|
|
7
|
+
* RUN_CONTEXT_USAGE_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- context-usage.live.test.ts --runInBand
|
|
8
|
+
*/
|
|
9
|
+
import { config as dotenvConfig } from 'dotenv';
|
|
10
|
+
dotenvConfig();
|
|
11
|
+
|
|
12
|
+
import { HumanMessage } from '@langchain/core/messages';
|
|
13
|
+
import { describe, expect, it, jest } from '@jest/globals';
|
|
14
|
+
import type { ToolCall } from '@langchain/core/messages/tool';
|
|
15
|
+
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
16
|
+
import type * as t from '@/types';
|
|
17
|
+
import { createTokenCounter, TokenEncoderManager } from '@/utils/tokens';
|
|
18
|
+
import { Constants, GraphEvents, Providers } from '@/common';
|
|
19
|
+
import { ModelEndHandler } from '@/events';
|
|
20
|
+
import { Run } from '@/run';
|
|
21
|
+
|
|
22
|
+
const shouldRunLive =
|
|
23
|
+
process.env.RUN_CONTEXT_USAGE_LIVE_TESTS === '1' &&
|
|
24
|
+
process.env.ANTHROPIC_API_KEY != null &&
|
|
25
|
+
process.env.ANTHROPIC_API_KEY !== '';
|
|
26
|
+
|
|
27
|
+
const describeIfLive = shouldRunLive ? describe : describe.skip;
|
|
28
|
+
const modelName =
|
|
29
|
+
process.env.ANTHROPIC_CONTEXT_LIVE_MODEL ?? 'claude-haiku-4-5';
|
|
30
|
+
|
|
31
|
+
const MAX_CONTEXT_TOKENS = 8000;
|
|
32
|
+
|
|
33
|
+
function createAnthropicAgent(
|
|
34
|
+
agentId: string,
|
|
35
|
+
instructions: string,
|
|
36
|
+
extras: Partial<t.AgentInputs> = {}
|
|
37
|
+
): t.AgentInputs {
|
|
38
|
+
return {
|
|
39
|
+
agentId,
|
|
40
|
+
provider: Providers.ANTHROPIC,
|
|
41
|
+
clientOptions: {
|
|
42
|
+
modelName,
|
|
43
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
44
|
+
temperature: 0,
|
|
45
|
+
maxTokens: 128,
|
|
46
|
+
streaming: true,
|
|
47
|
+
streamUsage: true,
|
|
48
|
+
},
|
|
49
|
+
instructions,
|
|
50
|
+
maxContextTokens: MAX_CONTEXT_TOKENS,
|
|
51
|
+
...extras,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function createStreamConfig(threadId: string): Partial<RunnableConfig> & {
|
|
56
|
+
version: 'v1' | 'v2';
|
|
57
|
+
streamMode: string;
|
|
58
|
+
} {
|
|
59
|
+
return {
|
|
60
|
+
configurable: { thread_id: threadId },
|
|
61
|
+
streamMode: 'values',
|
|
62
|
+
version: 'v2',
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
interface CapturedEvents {
|
|
67
|
+
contextEvents: t.ContextUsageEvent[];
|
|
68
|
+
subagentUpdates: unknown[];
|
|
69
|
+
collectedUsage: Array<Record<string, number | undefined>>;
|
|
70
|
+
handlers: Record<string, t.EventHandler>;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function createCapture(): CapturedEvents {
|
|
74
|
+
const contextEvents: t.ContextUsageEvent[] = [];
|
|
75
|
+
const subagentUpdates: unknown[] = [];
|
|
76
|
+
const collectedUsage: Array<Record<string, number | undefined>> = [];
|
|
77
|
+
const handlers: Record<string, t.EventHandler> = {
|
|
78
|
+
[GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage as never),
|
|
79
|
+
[GraphEvents.ON_CONTEXT_USAGE]: {
|
|
80
|
+
handle: (_event, data): void => {
|
|
81
|
+
contextEvents.push(data as unknown as t.ContextUsageEvent);
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
[GraphEvents.ON_SUBAGENT_UPDATE]: {
|
|
85
|
+
handle: (_event, data): void => {
|
|
86
|
+
subagentUpdates.push(data);
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
return { contextEvents, subagentUpdates, collectedUsage, handlers };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
describeIfLive('Context usage live integration', () => {
|
|
94
|
+
jest.setTimeout(180_000);
|
|
95
|
+
|
|
96
|
+
let tokenCounter: t.TokenCounter;
|
|
97
|
+
|
|
98
|
+
beforeAll(async () => {
|
|
99
|
+
tokenCounter = await createTokenCounter();
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
afterAll(() => {
|
|
103
|
+
TokenEncoderManager.reset();
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it('emits a snapshot whose estimate tracks real provider input tokens', async () => {
|
|
107
|
+
const capture = createCapture();
|
|
108
|
+
const run = await Run.create<t.IState>({
|
|
109
|
+
runId: `ctx-live-single-${Date.now()}`,
|
|
110
|
+
graphConfig: {
|
|
111
|
+
type: 'standard',
|
|
112
|
+
agents: [
|
|
113
|
+
createAnthropicAgent(
|
|
114
|
+
'solo',
|
|
115
|
+
'You are concise. Reply with one short sentence.'
|
|
116
|
+
),
|
|
117
|
+
],
|
|
118
|
+
},
|
|
119
|
+
returnContent: true,
|
|
120
|
+
skipCleanup: true,
|
|
121
|
+
customHandlers: capture.handlers,
|
|
122
|
+
tokenCounter,
|
|
123
|
+
indexTokenCountMap: {},
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
await run.processStream(
|
|
127
|
+
{ messages: [new HumanMessage('Say hello in five words or fewer.')] },
|
|
128
|
+
createStreamConfig(`ctx-live-single-${Date.now()}`)
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
expect(capture.contextEvents).toHaveLength(1);
|
|
132
|
+
const event = capture.contextEvents[0];
|
|
133
|
+
expect(event.agentId).toBe('solo');
|
|
134
|
+
expect(event.breakdown.maxContextTokens).toBe(MAX_CONTEXT_TOKENS);
|
|
135
|
+
expect(event.contextBudget).toBeLessThanOrEqual(MAX_CONTEXT_TOKENS);
|
|
136
|
+
|
|
137
|
+
expect(capture.collectedUsage).toHaveLength(1);
|
|
138
|
+
const usage = capture.collectedUsage[0];
|
|
139
|
+
expect(usage.input_tokens ?? 0).toBeGreaterThan(0);
|
|
140
|
+
expect(usage.output_tokens ?? 0).toBeGreaterThan(0);
|
|
141
|
+
|
|
142
|
+
/** The gauge shows `contextBudget - remaining` as occupancy; with a real
|
|
143
|
+
* tokenizer it should land in the same ballpark as the provider count */
|
|
144
|
+
const estimatedUsed =
|
|
145
|
+
(event.contextBudget ?? 0) - (event.remainingContextTokens ?? 0);
|
|
146
|
+
const providerInput = usage.input_tokens ?? 0;
|
|
147
|
+
expect(estimatedUsed).toBeGreaterThan(0);
|
|
148
|
+
expect(estimatedUsed / providerInput).toBeGreaterThan(0.3);
|
|
149
|
+
expect(estimatedUsed / providerInput).toBeLessThan(3);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('emits per-agent snapshots and usage across a real handoff', async () => {
|
|
153
|
+
const capture = createCapture();
|
|
154
|
+
const nonce = `ctx-live-handoff-${Date.now()}`;
|
|
155
|
+
const expectedReply = `${nonce}-confirmed`;
|
|
156
|
+
const handoffToolName = `${Constants.LC_TRANSFER_TO_}specialist`;
|
|
157
|
+
|
|
158
|
+
const run = await Run.create<t.IState>({
|
|
159
|
+
runId: `${nonce}-run`,
|
|
160
|
+
graphConfig: {
|
|
161
|
+
type: 'multi-agent',
|
|
162
|
+
agents: [
|
|
163
|
+
createAnthropicAgent(
|
|
164
|
+
'router',
|
|
165
|
+
`You are a routing agent. For every user request, your only valid action is to call the handoff tool named ${handoffToolName}. Do not answer directly.
|
|
166
|
+
|
|
167
|
+
When you call the handoff tool, include instructions telling the specialist to reply exactly with this marker and no extra words: ${expectedReply}`
|
|
168
|
+
),
|
|
169
|
+
createAnthropicAgent(
|
|
170
|
+
'specialist',
|
|
171
|
+
'You are the specialist. When you receive handoff instructions with a marker, reply exactly with that marker and no extra words.'
|
|
172
|
+
),
|
|
173
|
+
],
|
|
174
|
+
edges: [
|
|
175
|
+
{
|
|
176
|
+
from: 'router',
|
|
177
|
+
to: 'specialist',
|
|
178
|
+
edgeType: 'handoff',
|
|
179
|
+
description: 'Transfer to the specialist for the final response',
|
|
180
|
+
prompt:
|
|
181
|
+
'Instructions for the specialist. Include any exact marker that must be returned.',
|
|
182
|
+
promptKey: 'instructions',
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
},
|
|
186
|
+
returnContent: true,
|
|
187
|
+
skipCleanup: true,
|
|
188
|
+
customHandlers: capture.handlers,
|
|
189
|
+
tokenCounter,
|
|
190
|
+
indexTokenCountMap: {},
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
await run.processStream(
|
|
194
|
+
{
|
|
195
|
+
messages: [
|
|
196
|
+
new HumanMessage(
|
|
197
|
+
`Please delegate this to the specialist. The final answer must be exactly: ${expectedReply}`
|
|
198
|
+
),
|
|
199
|
+
],
|
|
200
|
+
},
|
|
201
|
+
createStreamConfig(`${nonce}-thread`)
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
const agentIds = new Set(
|
|
205
|
+
capture.contextEvents.map((event) => event.agentId)
|
|
206
|
+
);
|
|
207
|
+
expect(agentIds.has('router')).toBe(true);
|
|
208
|
+
expect(agentIds.has('specialist')).toBe(true);
|
|
209
|
+
|
|
210
|
+
for (const event of capture.contextEvents) {
|
|
211
|
+
expect(event.breakdown.maxContextTokens).toBe(MAX_CONTEXT_TOKENS);
|
|
212
|
+
expect(event.contextBudget).toBeLessThanOrEqual(MAX_CONTEXT_TOKENS);
|
|
213
|
+
expect(event.remainingContextTokens).toBeGreaterThan(0);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/** One snapshot per real model call — no ghost snapshots */
|
|
217
|
+
expect(capture.collectedUsage.length).toBe(capture.contextEvents.length);
|
|
218
|
+
expect(capture.collectedUsage.length).toBeGreaterThanOrEqual(2);
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
it('keeps subagent runs isolated from parent context/usage events', async () => {
|
|
222
|
+
const capture = createCapture();
|
|
223
|
+
const parent = createAnthropicAgent(
|
|
224
|
+
'parent',
|
|
225
|
+
'You are a supervisor. Delegate research tasks using the subagent tool.',
|
|
226
|
+
{
|
|
227
|
+
subagentConfigs: [
|
|
228
|
+
{
|
|
229
|
+
type: 'researcher',
|
|
230
|
+
name: 'Research Agent',
|
|
231
|
+
description: 'Researches and summarizes information',
|
|
232
|
+
agentInputs: createAnthropicAgent(
|
|
233
|
+
'researcher',
|
|
234
|
+
'You are a research agent. Answer in one short sentence.'
|
|
235
|
+
),
|
|
236
|
+
},
|
|
237
|
+
],
|
|
238
|
+
}
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
const run = await Run.create<t.IState>({
|
|
242
|
+
runId: `ctx-live-subagent-${Date.now()}`,
|
|
243
|
+
graphConfig: { type: 'standard', agents: [parent] },
|
|
244
|
+
returnContent: true,
|
|
245
|
+
skipCleanup: true,
|
|
246
|
+
customHandlers: capture.handlers,
|
|
247
|
+
tokenCounter,
|
|
248
|
+
indexTokenCountMap: {},
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
/** Parent is a fake forced to call the subagent tool — the child run
|
|
252
|
+
* executes on the real provider, exercising real isolation */
|
|
253
|
+
const subagentToolCall: ToolCall = {
|
|
254
|
+
id: 'call_subagent_live',
|
|
255
|
+
name: Constants.SUBAGENT,
|
|
256
|
+
args: {
|
|
257
|
+
description: 'What is the capital of France? One short sentence.',
|
|
258
|
+
subagent_type: 'researcher',
|
|
259
|
+
},
|
|
260
|
+
type: 'tool_call',
|
|
261
|
+
};
|
|
262
|
+
run.Graph?.overrideTestModel(
|
|
263
|
+
['Delegating to the researcher.', 'The researcher confirmed the answer.'],
|
|
264
|
+
10,
|
|
265
|
+
[subagentToolCall]
|
|
266
|
+
);
|
|
267
|
+
|
|
268
|
+
await run.processStream(
|
|
269
|
+
{ messages: [new HumanMessage('What is the capital of France?')] },
|
|
270
|
+
createStreamConfig(`ctx-live-subagent-${Date.now()}`)
|
|
271
|
+
);
|
|
272
|
+
|
|
273
|
+
/** Child progress arrives only as wrapped subagent updates */
|
|
274
|
+
expect(capture.subagentUpdates.length).toBeGreaterThan(0);
|
|
275
|
+
|
|
276
|
+
/** No raw child snapshots leak into the parent handler registry */
|
|
277
|
+
const childContextEvents = capture.contextEvents.filter(
|
|
278
|
+
(event) => event.agentId !== 'parent'
|
|
279
|
+
);
|
|
280
|
+
expect(childContextEvents).toHaveLength(0);
|
|
281
|
+
for (const event of capture.contextEvents) {
|
|
282
|
+
expect(event.agentId).toBe('parent');
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/** Documented isolation: child model-call usage does not reach the
|
|
286
|
+
* parent's collected usage (fake parent emits no usage_metadata) */
|
|
287
|
+
expect(capture.collectedUsage).toHaveLength(0);
|
|
288
|
+
|
|
289
|
+
const toolMessage = (run.getRunMessages() ?? []).find(
|
|
290
|
+
(message) =>
|
|
291
|
+
message.getType() === 'tool' &&
|
|
292
|
+
(message as { name?: string }).name === Constants.SUBAGENT
|
|
293
|
+
);
|
|
294
|
+
expect(toolMessage).toBeDefined();
|
|
295
|
+
expect(String(toolMessage?.content ?? '').toLowerCase()).toContain('paris');
|
|
296
|
+
});
|
|
297
|
+
});
|
package/src/specs/prune.test.ts
CHANGED
|
@@ -472,6 +472,53 @@ describe('Prune Messages Tests', () => {
|
|
|
472
472
|
expect(typeof result.remainingContextTokens).toBe('number');
|
|
473
473
|
});
|
|
474
474
|
|
|
475
|
+
it('should return remaining tokens in calibrated units when pruning with calibration', () => {
|
|
476
|
+
const tokenCounter = createTestTokenCounter();
|
|
477
|
+
const messages = [
|
|
478
|
+
new SystemMessage('System instruction'),
|
|
479
|
+
new HumanMessage('Message 1'),
|
|
480
|
+
new AIMessage('Response 1'),
|
|
481
|
+
new HumanMessage('Message 2'),
|
|
482
|
+
new AIMessage('Response 2'),
|
|
483
|
+
];
|
|
484
|
+
|
|
485
|
+
const indexTokenCountMap = {
|
|
486
|
+
0: tokenCounter(messages[0]),
|
|
487
|
+
1: tokenCounter(messages[1]),
|
|
488
|
+
2: tokenCounter(messages[2]),
|
|
489
|
+
3: tokenCounter(messages[3]),
|
|
490
|
+
4: tokenCounter(messages[4]),
|
|
491
|
+
};
|
|
492
|
+
|
|
493
|
+
const calibrationRatio = 2;
|
|
494
|
+
const maxTokens = 80;
|
|
495
|
+
const pruneMessages = createPruneMessages({
|
|
496
|
+
maxTokens,
|
|
497
|
+
startIndex: 0,
|
|
498
|
+
tokenCounter,
|
|
499
|
+
indexTokenCountMap,
|
|
500
|
+
reserveRatio: 0,
|
|
501
|
+
calibrationRatio,
|
|
502
|
+
});
|
|
503
|
+
|
|
504
|
+
const result = pruneMessages({ messages });
|
|
505
|
+
|
|
506
|
+
expect(result.messagesToRefine?.length).toBeGreaterThan(0);
|
|
507
|
+
|
|
508
|
+
/** Pruning selects within rawSpaceBudget = maxTokens / ratio (raw units,
|
|
509
|
+
* minus the 3-token assistant label); the returned remaining must be
|
|
510
|
+
* scaled back so `budget - remaining` reflects provider-space usage */
|
|
511
|
+
const keptRaw = result.context.reduce(
|
|
512
|
+
(sum, msg) => sum + tokenCounter(msg),
|
|
513
|
+
0
|
|
514
|
+
);
|
|
515
|
+
const rawSpaceBudget = Math.round(maxTokens / calibrationRatio);
|
|
516
|
+
const expectedRemaining =
|
|
517
|
+
(rawSpaceBudget - keptRaw - 3) * calibrationRatio;
|
|
518
|
+
expect(result.remainingContextTokens).toBe(expectedRemaining);
|
|
519
|
+
expect(result.contextBudget).toBe(maxTokens);
|
|
520
|
+
});
|
|
521
|
+
|
|
475
522
|
it('should respect startType parameter', () => {
|
|
476
523
|
const tokenCounter = createTestTokenCounter();
|
|
477
524
|
const messages = [
|
|
@@ -1397,7 +1444,10 @@ describe('Prune Messages Tests', () => {
|
|
|
1397
1444
|
expect(result.context).toEqual([]);
|
|
1398
1445
|
expect(result.messagesToRefine).toEqual([]);
|
|
1399
1446
|
expect(result.prePruneContextTokens).toBe(0);
|
|
1400
|
-
|
|
1447
|
+
/** Reserve-adjusted budget (8000 − 5%) minus instruction overhead */
|
|
1448
|
+
expect(result.contextBudget).toBe(7600);
|
|
1449
|
+
expect(result.effectiveInstructionTokens).toBe(4000);
|
|
1450
|
+
expect(result.remainingContextTokens).toBe(3600);
|
|
1401
1451
|
});
|
|
1402
1452
|
});
|
|
1403
1453
|
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
+
import { ChatGenerationChunk } from '@langchain/core/outputs';
|
|
1
2
|
import { FakeListChatModel } from '@langchain/core/utils/testing';
|
|
2
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
AIMessage,
|
|
5
|
+
AIMessageChunk,
|
|
6
|
+
HumanMessage,
|
|
7
|
+
} from '@langchain/core/messages';
|
|
3
8
|
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
9
|
+
import type { UsageMetadata } from '@langchain/core/messages';
|
|
4
10
|
import type { ToolCall } from '@langchain/core/messages/tool';
|
|
5
11
|
import type * as t from '@/types';
|
|
6
12
|
import {
|
|
@@ -388,4 +394,121 @@ describe('Subagent Integration', () => {
|
|
|
388
394
|
contextWithout!.toolSchemaTokens
|
|
389
395
|
);
|
|
390
396
|
});
|
|
397
|
+
|
|
398
|
+
it('reports child model usage through subagentUsageSink', async () => {
|
|
399
|
+
const CHILD_USAGE = {
|
|
400
|
+
input_tokens: 11,
|
|
401
|
+
output_tokens: 7,
|
|
402
|
+
total_tokens: 18,
|
|
403
|
+
};
|
|
404
|
+
/**
|
|
405
|
+
* The default mock (FakeListChatModel) reports no usage. Re-mock with a
|
|
406
|
+
* subclass that reports `usage_metadata` the way live providers do:
|
|
407
|
+
* stamped on the generation in the invoke path, and carried on a final
|
|
408
|
+
* zero-content chunk in the stream path (the graph's `attemptInvoke`
|
|
409
|
+
* prefers `model.stream()`, and chunk concatenation folds the usage
|
|
410
|
+
* into the aggregated message that `handleLLMEnd` receives).
|
|
411
|
+
*/
|
|
412
|
+
getChatModelClassSpy.mockImplementation(((provider: Providers) => {
|
|
413
|
+
if (provider === Providers.OPENAI) {
|
|
414
|
+
return class extends FakeListChatModel {
|
|
415
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
416
|
+
constructor(_options: any) {
|
|
417
|
+
super({ responses: [CHILD_RESPONSE] });
|
|
418
|
+
}
|
|
419
|
+
async _generate(
|
|
420
|
+
...args: Parameters<FakeListChatModel['_generate']>
|
|
421
|
+
): ReturnType<FakeListChatModel['_generate']> {
|
|
422
|
+
const result = await super._generate(...args);
|
|
423
|
+
for (const generation of result.generations) {
|
|
424
|
+
(generation.message as AIMessage).usage_metadata = {
|
|
425
|
+
...CHILD_USAGE,
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
return result;
|
|
429
|
+
}
|
|
430
|
+
async *_streamResponseChunks(
|
|
431
|
+
...args: Parameters<FakeListChatModel['_streamResponseChunks']>
|
|
432
|
+
): ReturnType<FakeListChatModel['_streamResponseChunks']> {
|
|
433
|
+
yield* super._streamResponseChunks(...args);
|
|
434
|
+
yield new ChatGenerationChunk({
|
|
435
|
+
text: '',
|
|
436
|
+
message: new AIMessageChunk({
|
|
437
|
+
content: '',
|
|
438
|
+
usage_metadata: { ...CHILD_USAGE },
|
|
439
|
+
}),
|
|
440
|
+
});
|
|
441
|
+
}
|
|
442
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
443
|
+
} as any;
|
|
444
|
+
}
|
|
445
|
+
return originalGetChatModelClass(provider);
|
|
446
|
+
}) as typeof providers.getChatModelClass);
|
|
447
|
+
|
|
448
|
+
const collectedUsage: UsageMetadata[] = [];
|
|
449
|
+
const sunkEvents: t.SubagentUsageEvent[] = [];
|
|
450
|
+
const customHandlers: Record<string, t.EventHandler> = {
|
|
451
|
+
[GraphEvents.TOOL_END]: new ToolEndHandler(),
|
|
452
|
+
[GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
const runId = `subagent-usage-${Date.now()}`;
|
|
456
|
+
const run = await Run.create<t.IState>({
|
|
457
|
+
runId,
|
|
458
|
+
graphConfig: {
|
|
459
|
+
type: 'standard',
|
|
460
|
+
agents: [createParentAgent()],
|
|
461
|
+
},
|
|
462
|
+
returnContent: true,
|
|
463
|
+
skipCleanup: true,
|
|
464
|
+
customHandlers,
|
|
465
|
+
subagentUsageSink: (event) => {
|
|
466
|
+
sunkEvents.push(event);
|
|
467
|
+
},
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
const subagentToolCall: ToolCall = {
|
|
471
|
+
id: 'call_subagent_usage',
|
|
472
|
+
name: Constants.SUBAGENT,
|
|
473
|
+
args: {
|
|
474
|
+
description: 'What is the capital of France?',
|
|
475
|
+
subagent_type: 'researcher',
|
|
476
|
+
},
|
|
477
|
+
type: 'tool_call',
|
|
478
|
+
};
|
|
479
|
+
|
|
480
|
+
run.Graph?.overrideTestModel(
|
|
481
|
+
[
|
|
482
|
+
'Let me delegate this research task.',
|
|
483
|
+
`Based on the research: ${CHILD_RESPONSE}`,
|
|
484
|
+
],
|
|
485
|
+
10,
|
|
486
|
+
[subagentToolCall]
|
|
487
|
+
);
|
|
488
|
+
|
|
489
|
+
await run.processStream(
|
|
490
|
+
{ messages: [new HumanMessage('What is the capital of France?')] },
|
|
491
|
+
callerConfig
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
/** Child made exactly one model call; all events are child-tagged. */
|
|
495
|
+
expect(sunkEvents).toHaveLength(1);
|
|
496
|
+
const event = sunkEvents[0];
|
|
497
|
+
/** Chunk concat adds empty `*_token_details` — match on the counts. */
|
|
498
|
+
expect(event.usage).toMatchObject(CHILD_USAGE);
|
|
499
|
+
expect(event.subagentType).toBe('researcher');
|
|
500
|
+
expect(event.subagentAgentId).toBe('researcher');
|
|
501
|
+
expect(event.provider).toBe(Providers.OPENAI);
|
|
502
|
+
/** FakeListChatModel emits no ls_model_name → config fallback. */
|
|
503
|
+
expect(event.model).toBe('gpt-4o-mini');
|
|
504
|
+
expect(event.runId).toBe(runId);
|
|
505
|
+
expect(event.subagentRunId).toContain(`${runId}_sub_`);
|
|
506
|
+
/**
|
|
507
|
+
* The parent's own calls must NOT be routed through the sink — they
|
|
508
|
+
* flow through the registered CHAT_MODEL_END handler. (The fake
|
|
509
|
+
* override model reports no usage, so collectedUsage stays empty;
|
|
510
|
+
* the load-bearing assertion is that the sink saw no parent calls.)
|
|
511
|
+
*/
|
|
512
|
+
expect(sunkEvents.every((e) => e.subagentType === 'researcher')).toBe(true);
|
|
513
|
+
});
|
|
391
514
|
});
|
package/src/stream.ts
CHANGED
|
@@ -5,6 +5,12 @@ import type { AIMessageChunk } from '@langchain/core/messages';
|
|
|
5
5
|
import type { AgentContext } from '@/agents/AgentContext';
|
|
6
6
|
import type { StandardGraph } from '@/graphs';
|
|
7
7
|
import type * as t from '@/types';
|
|
8
|
+
import {
|
|
9
|
+
getStreamedToolCallSeal,
|
|
10
|
+
getStreamedToolCallAdapter,
|
|
11
|
+
streamedToolCallAdapterAllowsSequentialSeal,
|
|
12
|
+
type StreamedToolCallSeal,
|
|
13
|
+
} from '@/tools/streamedToolCallSeals';
|
|
8
14
|
import {
|
|
9
15
|
ToolCallTypes,
|
|
10
16
|
ContentTypes,
|
|
@@ -15,11 +21,6 @@ import {
|
|
|
15
21
|
CODE_EXECUTION_TOOLS,
|
|
16
22
|
LOCAL_CODING_BUNDLE_NAMES,
|
|
17
23
|
} from '@/common';
|
|
18
|
-
import {
|
|
19
|
-
getStreamedToolCallSeal,
|
|
20
|
-
getStreamedToolCallAdapter,
|
|
21
|
-
type StreamedToolCallSeal,
|
|
22
|
-
} from '@/tools/streamedToolCallSeals';
|
|
23
24
|
import {
|
|
24
25
|
buildToolExecutionRequestPlan,
|
|
25
26
|
coerceRecordArgs,
|
|
@@ -265,6 +266,21 @@ function hasExplicitStreamedToolCallSeals(
|
|
|
265
266
|
);
|
|
266
267
|
}
|
|
267
268
|
|
|
269
|
+
/**
|
|
270
|
+
* True when a provider adapter marked every tool call on this chunk as
|
|
271
|
+
* complete on arrival (seal kind `all`), e.g. Google GenAI / Vertex AI, whose
|
|
272
|
+
* protocol delivers function calls as whole objects rather than arg deltas.
|
|
273
|
+
*/
|
|
274
|
+
function hasOnArrivalToolCallSeal(chunk: Partial<AIMessageChunk>): boolean {
|
|
275
|
+
const metadata = chunk.response_metadata as
|
|
276
|
+
| Record<string, unknown>
|
|
277
|
+
| undefined;
|
|
278
|
+
return (
|
|
279
|
+
getStreamedToolCallAdapter(metadata) != null &&
|
|
280
|
+
getStreamedToolCallSeal(metadata)?.kind === 'all'
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
|
|
268
284
|
function hasDirectToolCallInBatch(args: {
|
|
269
285
|
graph: StandardGraph;
|
|
270
286
|
agentContext?: AgentContext;
|
|
@@ -1405,6 +1421,21 @@ export class ChatModelStreamHandler implements t.EventHandler {
|
|
|
1405
1421
|
if (!hasToolCallChunks) {
|
|
1406
1422
|
pruneEagerToolCallChunkStates({ graph, stepKey, clearStep: true });
|
|
1407
1423
|
}
|
|
1424
|
+
} else if (
|
|
1425
|
+
hasOnArrivalToolCallSeal(chunk) &&
|
|
1426
|
+
!hasPotentialDirectToolInStreamContext({ graph, agentContext })
|
|
1427
|
+
) {
|
|
1428
|
+
// Providers like Google never signal `tool_calls`/`tool_use` as the
|
|
1429
|
+
// finish reason, but their adapters seal calls on arrival — prestart
|
|
1430
|
+
// these mid-stream under the same direct-tool guard as streamed
|
|
1431
|
+
// chunk sealing.
|
|
1432
|
+
startEagerToolExecutions({
|
|
1433
|
+
graph,
|
|
1434
|
+
metadata,
|
|
1435
|
+
agentContext,
|
|
1436
|
+
toolCalls: chunk.tool_calls,
|
|
1437
|
+
skipExisting: true,
|
|
1438
|
+
});
|
|
1408
1439
|
}
|
|
1409
1440
|
}
|
|
1410
1441
|
|
|
@@ -1435,7 +1466,10 @@ export class ChatModelStreamHandler implements t.EventHandler {
|
|
|
1435
1466
|
chunk.response_metadata as Record<string, unknown> | undefined
|
|
1436
1467
|
);
|
|
1437
1468
|
const allowSequentialSeal =
|
|
1438
|
-
canPrestartSequentialStreamedToolChunks(agentContext)
|
|
1469
|
+
canPrestartSequentialStreamedToolChunks(agentContext) ||
|
|
1470
|
+
streamedToolCallAdapterAllowsSequentialSeal(
|
|
1471
|
+
chunk.response_metadata as Record<string, unknown> | undefined
|
|
1472
|
+
);
|
|
1439
1473
|
const canStreamEager =
|
|
1440
1474
|
(allowSequentialSeal || hasExplicitStreamedToolCallSeals(chunk)) &&
|
|
1441
1475
|
!hasPotentialDirectToolInStreamContext({ graph, agentContext }) &&
|
|
@@ -6,8 +6,8 @@ import {
|
|
|
6
6
|
DEFAULT_SUMMARIZATION_PROMPT,
|
|
7
7
|
DEFAULT_UPDATE_SUMMARIZATION_PROMPT,
|
|
8
8
|
} from '@/summarization/node';
|
|
9
|
+
import { Constants, GraphEvents, Providers } from '@/common';
|
|
9
10
|
import { AgentContext } from '@/agents/AgentContext';
|
|
10
|
-
import { GraphEvents, Providers } from '@/common';
|
|
11
11
|
import * as providers from '@/llm/providers';
|
|
12
12
|
import * as eventUtils from '@/utils/events';
|
|
13
13
|
|
|
@@ -216,6 +216,65 @@ describe('createSummarizeNode', () => {
|
|
|
216
216
|
).toBeUndefined();
|
|
217
217
|
});
|
|
218
218
|
|
|
219
|
+
it('stamps INVOKED_MODEL/INVOKED_PROVIDER metadata for a dedicated summarizer model', async () => {
|
|
220
|
+
captureEvents();
|
|
221
|
+
|
|
222
|
+
const capturedConfigs: unknown[] = [];
|
|
223
|
+
jest.spyOn(providers, 'getChatModelClass').mockReturnValue(
|
|
224
|
+
class {
|
|
225
|
+
constructor() {
|
|
226
|
+
return {
|
|
227
|
+
invoke: jest
|
|
228
|
+
.fn()
|
|
229
|
+
.mockImplementation(
|
|
230
|
+
async (_messages: unknown, config?: unknown) => {
|
|
231
|
+
capturedConfigs.push(config);
|
|
232
|
+
return { content: 'Summary text' };
|
|
233
|
+
}
|
|
234
|
+
),
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
} as never
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
const agentContext = createAgentContext({
|
|
241
|
+
summarizationConfig: {
|
|
242
|
+
retainRecent: { turns: 0 },
|
|
243
|
+
model: 'gpt-4.1-mini',
|
|
244
|
+
},
|
|
245
|
+
});
|
|
246
|
+
const graph = mockGraph();
|
|
247
|
+
const node = createSummarizeNode({
|
|
248
|
+
agentContext,
|
|
249
|
+
graph,
|
|
250
|
+
generateStepId,
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
await node(
|
|
254
|
+
{
|
|
255
|
+
messages: [new HumanMessage('Hello'), new HumanMessage('World')],
|
|
256
|
+
summarizationRequest: {
|
|
257
|
+
remainingContextTokens: 1000,
|
|
258
|
+
agentId: 'agent_0',
|
|
259
|
+
},
|
|
260
|
+
},
|
|
261
|
+
{} as RunnableConfig
|
|
262
|
+
);
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Usage consumers (the subagent usage-capture handler) attribute the
|
|
266
|
+
* call from these keys — without them, a summarizer model that differs
|
|
267
|
+
* from the agent's primary would be billed against the primary config.
|
|
268
|
+
*/
|
|
269
|
+
const config = capturedConfigs[0] as {
|
|
270
|
+
metadata?: Record<string, unknown>;
|
|
271
|
+
};
|
|
272
|
+
expect(config.metadata?.[Constants.INVOKED_MODEL]).toBe('gpt-4.1-mini');
|
|
273
|
+
expect(config.metadata?.[Constants.INVOKED_PROVIDER]).toBe(
|
|
274
|
+
Providers.OPENAI
|
|
275
|
+
);
|
|
276
|
+
});
|
|
277
|
+
|
|
219
278
|
it('collects streamed text when model supports stream()', async () => {
|
|
220
279
|
captureEvents();
|
|
221
280
|
|
|
@@ -10,7 +10,13 @@ import type { AgentContext } from '@/agents/AgentContext';
|
|
|
10
10
|
import type { HookRegistry } from '@/hooks';
|
|
11
11
|
import type { OnChunk } from '@/llm/invoke';
|
|
12
12
|
import type * as t from '@/types';
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
Constants,
|
|
15
|
+
ContentTypes,
|
|
16
|
+
GraphEvents,
|
|
17
|
+
StepTypes,
|
|
18
|
+
Providers,
|
|
19
|
+
} from '@/common';
|
|
14
20
|
import { safeDispatchCustomEvent, emitAgentLog } from '@/utils/events';
|
|
15
21
|
import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
|
|
16
22
|
import { createRemoveAllMessage } from '@/messages/reducer';
|
|
@@ -938,6 +944,19 @@ export function createSummarizeNode({
|
|
|
938
944
|
agent_id: request.agentId,
|
|
939
945
|
summarization_provider: clientConfig.provider,
|
|
940
946
|
summarization_model: clientConfig.modelName,
|
|
947
|
+
/**
|
|
948
|
+
* Per-call model attribution for usage consumers (the subagent
|
|
949
|
+
* usage-capture handler): the summarizer's model can differ from
|
|
950
|
+
* the agent's primary, and providers that emit no `ls_model_name`
|
|
951
|
+
* would otherwise be billed against the primary config's model.
|
|
952
|
+
* Omitted for self-summarize (no explicit model — the primary
|
|
953
|
+
* config fallback is then correct). `tryFallbackProviders`
|
|
954
|
+
* overrides this per fallback attempt; `INVOKED_PROVIDER` is
|
|
955
|
+
* stamped by `attemptInvoke` itself.
|
|
956
|
+
*/
|
|
957
|
+
...(clientConfig.modelName != null && clientConfig.modelName !== ''
|
|
958
|
+
? { [Constants.INVOKED_MODEL]: clientConfig.modelName }
|
|
959
|
+
: {}),
|
|
941
960
|
},
|
|
942
961
|
}
|
|
943
962
|
: undefined;
|