@librechat/agents 3.2.34 → 3.2.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +47 -10
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +121 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/invoke.cjs +49 -8
- package/dist/cjs/llm/invoke.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/messages/content.cjs +12 -14
- package/dist/cjs/messages/content.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +31 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +7 -2
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +12 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +138 -2
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +30 -0
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +47 -10
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +122 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/invoke.mjs +49 -8
- package/dist/esm/llm/invoke.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -3
- package/dist/esm/messages/content.mjs +12 -15
- package/dist/esm/messages/content.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +31 -13
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +7 -2
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +12 -1
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +138 -2
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +30 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +7 -3
- package/dist/types/common/enum.d.ts +13 -0
- package/dist/types/graphs/Graph.d.ts +8 -1
- package/dist/types/llm/invoke.d.ts +1 -1
- package/dist/types/messages/content.d.ts +5 -0
- package/dist/types/messages/prune.d.ts +4 -0
- package/dist/types/run.d.ts +1 -0
- package/dist/types/tools/subagent/SubagentExecutor.d.ts +11 -1
- package/dist/types/types/graph.d.ts +89 -3
- package/dist/types/types/run.d.ts +13 -0
- package/dist/types/utils/tokens.d.ts +7 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +69 -6
- package/src/agents/__tests__/AgentContext.test.ts +6 -2
- package/src/common/enum.ts +13 -0
- package/src/graphs/Graph.ts +196 -0
- package/src/llm/invoke.test.ts +79 -1
- package/src/llm/invoke.ts +58 -4
- package/src/messages/content.ts +24 -32
- package/src/messages/prune.ts +39 -2
- package/src/run.ts +5 -0
- package/src/scripts/subagent-usage-sink.ts +176 -0
- package/src/specs/context-accuracy.live.test.ts +409 -0
- package/src/specs/context-usage-event.test.ts +117 -0
- package/src/specs/context-usage.live.test.ts +297 -0
- package/src/specs/prune.test.ts +51 -1
- package/src/specs/subagent.test.ts +124 -1
- package/src/summarization/__tests__/node.test.ts +60 -1
- package/src/summarization/node.ts +20 -1
- package/src/tools/__tests__/SubagentExecutor.test.ts +443 -1
- package/src/tools/subagent/SubagentExecutor.ts +221 -3
- package/src/types/graph.ts +94 -1
- package/src/types/run.ts +13 -0
- package/src/utils/__tests__/apportion.test.ts +32 -0
- package/src/utils/tokens.ts +33 -0
package/src/llm/invoke.test.ts
CHANGED
|
@@ -12,8 +12,8 @@ import type { BaseMessage } from '@langchain/core/messages';
|
|
|
12
12
|
import type * as t from '@/types';
|
|
13
13
|
import { ToolOutputReferenceRegistry } from '@/tools/toolOutputReferences';
|
|
14
14
|
import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
|
|
15
|
+
import { Constants, Providers } from '@/common';
|
|
15
16
|
import { ToolNode } from '@/tools/ToolNode';
|
|
16
|
-
import { Providers } from '@/common';
|
|
17
17
|
|
|
18
18
|
/**
|
|
19
19
|
* Minimal stub model shape `attemptInvoke` reads. Either `invoke` or
|
|
@@ -341,6 +341,84 @@ describe('tryFallbackProviders applies the same lazy annotation transform', () =
|
|
|
341
341
|
});
|
|
342
342
|
});
|
|
343
343
|
|
|
344
|
+
describe('invocation attribution metadata', () => {
|
|
345
|
+
it('stamps INVOKED_PROVIDER on the config passed to the model', async () => {
|
|
346
|
+
const capturedConfigs: unknown[] = [];
|
|
347
|
+
const model: StubModel = {
|
|
348
|
+
invoke: jest.fn(
|
|
349
|
+
async (_m: BaseMessage[], config?: unknown): Promise<AIMessage> => {
|
|
350
|
+
capturedConfigs.push(config);
|
|
351
|
+
return new AIMessage({ content: 'ok' });
|
|
352
|
+
}
|
|
353
|
+
),
|
|
354
|
+
};
|
|
355
|
+
|
|
356
|
+
await attemptInvoke(
|
|
357
|
+
{
|
|
358
|
+
model: model as t.ChatModel,
|
|
359
|
+
messages: [new HumanMessage('hi')],
|
|
360
|
+
/** A ChatOpenAI-derived provider — `ls_provider` would lie here. */
|
|
361
|
+
provider: Providers.DEEPSEEK,
|
|
362
|
+
},
|
|
363
|
+
{ configurable: { run_id: 'run-attr' }, metadata: { existing: true } }
|
|
364
|
+
);
|
|
365
|
+
|
|
366
|
+
const config = capturedConfigs[0] as {
|
|
367
|
+
metadata?: Record<string, unknown>;
|
|
368
|
+
};
|
|
369
|
+
expect(config.metadata?.[Constants.INVOKED_PROVIDER]).toBe(
|
|
370
|
+
Providers.DEEPSEEK
|
|
371
|
+
);
|
|
372
|
+
/** Pre-existing metadata is preserved, not replaced. */
|
|
373
|
+
expect(config.metadata?.existing).toBe(true);
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
it('stamps INVOKED_MODEL from the fallback clientOptions in tryFallbackProviders', async () => {
|
|
377
|
+
const capturedConfigs: unknown[] = [];
|
|
378
|
+
const model: StubModel = {
|
|
379
|
+
invoke: jest.fn(
|
|
380
|
+
async (_m: BaseMessage[], config?: unknown): Promise<AIMessage> => {
|
|
381
|
+
capturedConfigs.push(config);
|
|
382
|
+
return new AIMessage({ content: 'ok' });
|
|
383
|
+
}
|
|
384
|
+
),
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
jest.doMock('@/llm/init', () => ({
|
|
388
|
+
initializeModel: (): unknown => model,
|
|
389
|
+
}));
|
|
390
|
+
jest.resetModules();
|
|
391
|
+
const { tryFallbackProviders: freshTry } = (await import(
|
|
392
|
+
'@/llm/invoke'
|
|
393
|
+
)) as { tryFallbackProviders: typeof tryFallbackProviders };
|
|
394
|
+
|
|
395
|
+
await freshTry({
|
|
396
|
+
fallbacks: [
|
|
397
|
+
{
|
|
398
|
+
provider: Providers.ANTHROPIC,
|
|
399
|
+
clientOptions: { model: 'claude-fallback-1' },
|
|
400
|
+
},
|
|
401
|
+
],
|
|
402
|
+
messages: [new HumanMessage('hi')],
|
|
403
|
+
primaryError: new Error('primary failed'),
|
|
404
|
+
config: { configurable: { run_id: 'run-attr-fb' } },
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
const config = capturedConfigs[0] as {
|
|
408
|
+
metadata?: Record<string, unknown>;
|
|
409
|
+
};
|
|
410
|
+
expect(config.metadata?.[Constants.INVOKED_MODEL]).toBe(
|
|
411
|
+
'claude-fallback-1'
|
|
412
|
+
);
|
|
413
|
+
expect(config.metadata?.[Constants.INVOKED_PROVIDER]).toBe(
|
|
414
|
+
Providers.ANTHROPIC
|
|
415
|
+
);
|
|
416
|
+
|
|
417
|
+
jest.dontMock('@/llm/init');
|
|
418
|
+
jest.resetModules();
|
|
419
|
+
});
|
|
420
|
+
});
|
|
421
|
+
|
|
344
422
|
describe('cross-run hydration through ToolNode + attemptInvoke', () => {
|
|
345
423
|
it('annotates run 2 refs but leaves hydrated run 1 ToolMessages untouched', async () => {
|
|
346
424
|
/**
|
package/src/llm/invoke.ts
CHANGED
|
@@ -6,10 +6,10 @@ import type { BaseMessage } from '@langchain/core/messages';
|
|
|
6
6
|
import type { ToolOutputReferenceRegistry } from '@/tools/toolOutputReferences';
|
|
7
7
|
import type * as t from '@/types';
|
|
8
8
|
import { annotateMessagesForLLM } from '@/tools/toolOutputReferences';
|
|
9
|
+
import { Constants, GraphEvents, Providers } from '@/common';
|
|
9
10
|
import { manualToolStreamProviders } from '@/llm/providers';
|
|
10
11
|
import { modifyDeltaProperties } from '@/messages';
|
|
11
12
|
import { ChatModelStreamHandler } from '@/stream';
|
|
12
|
-
import { GraphEvents, Providers } from '@/common';
|
|
13
13
|
import { initializeModel } from '@/llm/init';
|
|
14
14
|
|
|
15
15
|
/**
|
|
@@ -208,6 +208,23 @@ export async function attemptInvoke(
|
|
|
208
208
|
const runId = config?.configurable?.run_id as string | undefined;
|
|
209
209
|
const messagesForProvider = annotateMessagesForLLM(messages, registry, runId);
|
|
210
210
|
|
|
211
|
+
/**
|
|
212
|
+
* Stamp the provider that is ACTUALLY serving this invocation onto the
|
|
213
|
+
* callback metadata. `attemptInvoke` is the single funnel for primary,
|
|
214
|
+
* fallback, and summarization model calls, so consumers that need
|
|
215
|
+
* provider attribution per call (the subagent usage-capture handler)
|
|
216
|
+
* read this key instead of trusting static agent config — which is
|
|
217
|
+
* wrong for fallback-served calls — or `ls_provider` — which derived
|
|
218
|
+
* providers inherit from their base class.
|
|
219
|
+
*/
|
|
220
|
+
config = {
|
|
221
|
+
...config,
|
|
222
|
+
metadata: {
|
|
223
|
+
...(config?.metadata ?? {}),
|
|
224
|
+
[Constants.INVOKED_PROVIDER]: provider,
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
|
|
211
228
|
if (model.stream) {
|
|
212
229
|
const stream = await model.stream(messagesForProvider, config);
|
|
213
230
|
let finalChunk: AIMessageChunk | undefined;
|
|
@@ -224,7 +241,7 @@ export async function attemptInvoke(
|
|
|
224
241
|
});
|
|
225
242
|
}
|
|
226
243
|
} else if (registeredStreamHandler == null) {
|
|
227
|
-
const metadata = config
|
|
244
|
+
const metadata = config.metadata as Record<string, unknown> | undefined;
|
|
228
245
|
const streamHandler = new ChatModelStreamHandler();
|
|
229
246
|
for await (const chunk of stream) {
|
|
230
247
|
const handlingChunk = getStreamHandlingChunk({
|
|
@@ -247,7 +264,7 @@ export async function attemptInvoke(
|
|
|
247
264
|
});
|
|
248
265
|
}
|
|
249
266
|
} else {
|
|
250
|
-
const metadata = config
|
|
267
|
+
const metadata = config.metadata as Record<string, unknown> | undefined;
|
|
251
268
|
for await (const chunk of stream) {
|
|
252
269
|
const handlingChunk = getStreamHandlingChunk({
|
|
253
270
|
current: finalChunk,
|
|
@@ -292,6 +309,25 @@ export async function attemptInvoke(
|
|
|
292
309
|
return { messages: [finalMessage] };
|
|
293
310
|
}
|
|
294
311
|
|
|
312
|
+
/**
|
|
313
|
+
* Best-effort read of the configured model name from client options.
|
|
314
|
+
* Providers disagree on the key (`model` vs `modelName`).
|
|
315
|
+
*/
|
|
316
|
+
function extractClientOptionsModel(
|
|
317
|
+
clientOptions: t.ClientOptions | undefined
|
|
318
|
+
): string | undefined {
|
|
319
|
+
const options = clientOptions as
|
|
320
|
+
| { model?: unknown; modelName?: unknown }
|
|
321
|
+
| undefined;
|
|
322
|
+
if (typeof options?.model === 'string' && options.model !== '') {
|
|
323
|
+
return options.model;
|
|
324
|
+
}
|
|
325
|
+
if (typeof options?.modelName === 'string' && options.modelName !== '') {
|
|
326
|
+
return options.modelName;
|
|
327
|
+
}
|
|
328
|
+
return undefined;
|
|
329
|
+
}
|
|
330
|
+
|
|
295
331
|
/**
|
|
296
332
|
* Attempts each fallback provider in order until one succeeds.
|
|
297
333
|
* Throws the last error if all fallbacks fail.
|
|
@@ -321,6 +357,24 @@ export async function tryFallbackProviders({
|
|
|
321
357
|
clientOptions: fb.clientOptions,
|
|
322
358
|
tools,
|
|
323
359
|
});
|
|
360
|
+
/**
|
|
361
|
+
* Stamp the fallback's configured model onto callback metadata so
|
|
362
|
+
* per-call attribution (subagent usage capture) doesn't fall back to
|
|
363
|
+
* the PRIMARY config's model when the provider reports no
|
|
364
|
+
* `ls_model_name`. The serving provider is stamped uniformly by
|
|
365
|
+
* `attemptInvoke` (`INVOKED_PROVIDER`).
|
|
366
|
+
*/
|
|
367
|
+
const fbModelName = extractClientOptionsModel(fb.clientOptions);
|
|
368
|
+
const fbConfig: RunnableConfig | undefined =
|
|
369
|
+
fbModelName == null
|
|
370
|
+
? config
|
|
371
|
+
: {
|
|
372
|
+
...config,
|
|
373
|
+
metadata: {
|
|
374
|
+
...(config?.metadata ?? {}),
|
|
375
|
+
[Constants.INVOKED_MODEL]: fbModelName,
|
|
376
|
+
},
|
|
377
|
+
};
|
|
324
378
|
const result = await attemptInvoke(
|
|
325
379
|
{
|
|
326
380
|
model: fbModel as t.ChatModel,
|
|
@@ -329,7 +383,7 @@ export async function tryFallbackProviders({
|
|
|
329
383
|
context,
|
|
330
384
|
onChunk,
|
|
331
385
|
},
|
|
332
|
-
|
|
386
|
+
fbConfig
|
|
333
387
|
);
|
|
334
388
|
return result;
|
|
335
389
|
} catch (e) {
|
package/src/messages/content.ts
CHANGED
|
@@ -1,6 +1,26 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type {
|
|
2
|
+
BaseMessage,
|
|
3
|
+
MessageContentComplex,
|
|
4
|
+
} from '@langchain/core/messages';
|
|
2
5
|
import { ContentTypes } from '@/common';
|
|
3
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Whether {@link formatContentStrings} will flatten this message's content:
|
|
9
|
+
* a human/ai/system message whose content is an array of text-only blocks.
|
|
10
|
+
*/
|
|
11
|
+
export const isLegacyConvertible = (message: BaseMessage): boolean => {
|
|
12
|
+
const messageType = message.getType();
|
|
13
|
+
const isValidMessage =
|
|
14
|
+
messageType === 'human' || messageType === 'ai' || messageType === 'system';
|
|
15
|
+
if (!isValidMessage) {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
if (!Array.isArray(message.content)) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
return message.content.every((block) => block.type === ContentTypes.TEXT);
|
|
22
|
+
};
|
|
23
|
+
|
|
4
24
|
/**
|
|
5
25
|
* Formats an array of messages for LangChain, making sure all content fields are strings
|
|
6
26
|
* @param {Array<HumanMessage | AIMessage | SystemMessage | ToolMessage>} payload - The array of messages to format.
|
|
@@ -13,42 +33,14 @@ export const formatContentStrings = (
|
|
|
13
33
|
const result: Array<BaseMessage> = [];
|
|
14
34
|
|
|
15
35
|
for (const message of payload) {
|
|
16
|
-
|
|
17
|
-
const isValidMessage =
|
|
18
|
-
messageType === 'human' ||
|
|
19
|
-
messageType === 'ai' ||
|
|
20
|
-
messageType === 'system';
|
|
21
|
-
|
|
22
|
-
if (!isValidMessage) {
|
|
23
|
-
result.push(message);
|
|
24
|
-
continue;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// If content is already a string, add as-is
|
|
28
|
-
if (typeof message.content === 'string') {
|
|
29
|
-
result.push(message);
|
|
30
|
-
continue;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
// If content is not an array, add as-is
|
|
34
|
-
if (!Array.isArray(message.content)) {
|
|
35
|
-
result.push(message);
|
|
36
|
-
continue;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Check if all content blocks are text type
|
|
40
|
-
const allTextBlocks = message.content.every(
|
|
41
|
-
(block) => block.type === ContentTypes.TEXT
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
// Only convert to string if all blocks are text type
|
|
45
|
-
if (!allTextBlocks) {
|
|
36
|
+
if (!isLegacyConvertible(message)) {
|
|
46
37
|
result.push(message);
|
|
47
38
|
continue;
|
|
48
39
|
}
|
|
49
40
|
|
|
50
41
|
// Reduce text types to a single string
|
|
51
|
-
const
|
|
42
|
+
const blocks = message.content as MessageContentComplex[];
|
|
43
|
+
const content = blocks.reduce((acc, curr) => {
|
|
52
44
|
if (curr.type === ContentTypes.TEXT) {
|
|
53
45
|
return `${acc}${curr[ContentTypes.TEXT] || ''}\n`;
|
|
54
46
|
}
|
package/src/messages/prune.ts
CHANGED
|
@@ -1312,16 +1312,36 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
1312
1312
|
originalToolContent?: Map<number, string>;
|
|
1313
1313
|
calibrationRatio?: number;
|
|
1314
1314
|
resolvedInstructionOverhead?: number;
|
|
1315
|
+
/** Usable budget this call: maxTokens minus output reserve */
|
|
1316
|
+
contextBudget?: number;
|
|
1317
|
+
/** Calibrated instruction overhead actually applied this call */
|
|
1318
|
+
effectiveInstructionTokens?: number;
|
|
1315
1319
|
} {
|
|
1316
1320
|
if (params.messages.length === 0) {
|
|
1321
|
+
/** Post-compaction calls still invoke the model — report the same
|
|
1322
|
+
* reserve-adjusted budget fields as the populated paths */
|
|
1323
|
+
const emptyInstructionTokens =
|
|
1324
|
+
factoryParams.getInstructionTokens?.() ?? 0;
|
|
1325
|
+
const emptyReserveRatio =
|
|
1326
|
+
factoryParams.reserveRatio ?? DEFAULT_RESERVE_RATIO;
|
|
1327
|
+
const emptyBudget =
|
|
1328
|
+
factoryParams.maxTokens -
|
|
1329
|
+
(emptyReserveRatio > 0 && emptyReserveRatio < 1
|
|
1330
|
+
? Math.round(factoryParams.maxTokens * emptyReserveRatio)
|
|
1331
|
+
: 0);
|
|
1317
1332
|
return {
|
|
1318
1333
|
context: [],
|
|
1319
1334
|
indexTokenCountMap,
|
|
1320
1335
|
messagesToRefine: [],
|
|
1321
1336
|
prePruneContextTokens: 0,
|
|
1322
|
-
remainingContextTokens:
|
|
1337
|
+
remainingContextTokens: Math.max(
|
|
1338
|
+
0,
|
|
1339
|
+
emptyBudget - emptyInstructionTokens
|
|
1340
|
+
),
|
|
1323
1341
|
calibrationRatio,
|
|
1324
1342
|
resolvedInstructionOverhead: bestInstructionOverhead,
|
|
1343
|
+
contextBudget: emptyBudget,
|
|
1344
|
+
effectiveInstructionTokens: emptyInstructionTokens,
|
|
1325
1345
|
};
|
|
1326
1346
|
}
|
|
1327
1347
|
|
|
@@ -1549,6 +1569,8 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
1549
1569
|
pruningBudget > 0 ? calibratedTotalTokens / pruningBudget : 0,
|
|
1550
1570
|
calibrationRatio,
|
|
1551
1571
|
resolvedInstructionOverhead: bestInstructionOverhead,
|
|
1572
|
+
contextBudget: pruningBudget,
|
|
1573
|
+
effectiveInstructionTokens: currentInstructionTokens,
|
|
1552
1574
|
};
|
|
1553
1575
|
}
|
|
1554
1576
|
|
|
@@ -1752,6 +1774,8 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
1752
1774
|
originalToolContent.size > 0 ? originalToolContent : undefined,
|
|
1753
1775
|
calibrationRatio,
|
|
1754
1776
|
resolvedInstructionOverhead: bestInstructionOverhead,
|
|
1777
|
+
contextBudget: pruningBudget,
|
|
1778
|
+
effectiveInstructionTokens: currentInstructionTokens,
|
|
1755
1779
|
};
|
|
1756
1780
|
}
|
|
1757
1781
|
|
|
@@ -2099,9 +2123,20 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
2099
2123
|
}
|
|
2100
2124
|
}
|
|
2101
2125
|
|
|
2126
|
+
/** Scale raw-space remaining back to calibrated/provider units so it is
|
|
2127
|
+
* directly comparable with pruningBudget and prePruneContextTokens */
|
|
2128
|
+
const rawRemaining = Math.max(
|
|
2129
|
+
0,
|
|
2130
|
+
initialRemainingContextTokens + reclaimedTokens
|
|
2131
|
+
);
|
|
2102
2132
|
const remainingContextTokens = Math.max(
|
|
2103
2133
|
0,
|
|
2104
|
-
Math.min(
|
|
2134
|
+
Math.min(
|
|
2135
|
+
pruningBudget,
|
|
2136
|
+
calibrationRatio > 0
|
|
2137
|
+
? Math.round(rawRemaining * calibrationRatio)
|
|
2138
|
+
: rawRemaining
|
|
2139
|
+
)
|
|
2105
2140
|
);
|
|
2106
2141
|
|
|
2107
2142
|
runThinkingStartIndex = thinkingStartIndex ?? -1;
|
|
@@ -2123,6 +2158,8 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
2123
2158
|
originalToolContent.size > 0 ? originalToolContent : undefined,
|
|
2124
2159
|
calibrationRatio,
|
|
2125
2160
|
resolvedInstructionOverhead: bestInstructionOverhead,
|
|
2161
|
+
contextBudget: pruningBudget,
|
|
2162
|
+
effectiveInstructionTokens: currentInstructionTokens,
|
|
2126
2163
|
};
|
|
2127
2164
|
};
|
|
2128
2165
|
}
|
package/src/run.ts
CHANGED
|
@@ -78,6 +78,7 @@ const CUSTOM_GRAPH_EVENTS = new Set<string>([
|
|
|
78
78
|
GraphEvents.ON_SUMMARIZE_COMPLETE,
|
|
79
79
|
GraphEvents.ON_SUBAGENT_UPDATE,
|
|
80
80
|
GraphEvents.ON_AGENT_LOG,
|
|
81
|
+
GraphEvents.ON_CONTEXT_USAGE,
|
|
81
82
|
GraphEvents.ON_CUSTOM_EVENT,
|
|
82
83
|
]);
|
|
83
84
|
|
|
@@ -129,6 +130,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
129
130
|
private toolOutputReferences?: t.ToolOutputReferencesConfig;
|
|
130
131
|
private eagerEventToolExecution?: t.EagerEventToolExecutionConfig;
|
|
131
132
|
private toolExecution?: t.ToolExecutionConfig;
|
|
133
|
+
private subagentUsageSink?: t.SubagentUsageSink;
|
|
132
134
|
private indexTokenCountMap?: Record<string, number>;
|
|
133
135
|
calibrationRatio: number = 1;
|
|
134
136
|
graphRunnable?: t.CompiledStateWorkflow;
|
|
@@ -176,6 +178,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
176
178
|
this.toolOutputReferences = config.toolOutputReferences;
|
|
177
179
|
this.eagerEventToolExecution = config.eagerEventToolExecution;
|
|
178
180
|
this.toolExecution = config.toolExecution;
|
|
181
|
+
this.subagentUsageSink = config.subagentUsageSink;
|
|
179
182
|
|
|
180
183
|
if (!config.graphConfig) {
|
|
181
184
|
throw new Error('Graph config not provided');
|
|
@@ -249,6 +252,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
249
252
|
tokenCounter: this.tokenCounter,
|
|
250
253
|
indexTokenCountMap: this.indexTokenCountMap,
|
|
251
254
|
calibrationRatio: this.calibrationRatio,
|
|
255
|
+
subagentUsageSink: this.subagentUsageSink,
|
|
252
256
|
});
|
|
253
257
|
/** Propagate compile options from graph config */
|
|
254
258
|
standardGraph.compileOptions = this.applyHITLCheckpointerFallback(
|
|
@@ -276,6 +280,7 @@ export class Run<_T extends t.BaseGraphState> {
|
|
|
276
280
|
tokenCounter: this.tokenCounter,
|
|
277
281
|
indexTokenCountMap: this.indexTokenCountMap,
|
|
278
282
|
calibrationRatio: this.calibrationRatio,
|
|
283
|
+
subagentUsageSink: this.subagentUsageSink,
|
|
279
284
|
});
|
|
280
285
|
|
|
281
286
|
multiAgentGraph.compileOptions =
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { config } from 'dotenv';
|
|
2
|
+
config();
|
|
3
|
+
|
|
4
|
+
import { HumanMessage } from '@langchain/core/messages';
|
|
5
|
+
import type { UsageMetadata } from '@langchain/core/messages';
|
|
6
|
+
import type * as t from '@/types';
|
|
7
|
+
import { ToolEndHandler, ModelEndHandler } from '@/events';
|
|
8
|
+
import { Providers, GraphEvents } from '@/common';
|
|
9
|
+
import { Run } from '@/run';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Live verification for `subagentUsageSink` (host billing of subagent
|
|
13
|
+
* child-run model usage).
|
|
14
|
+
*
|
|
15
|
+
* Runs a supervisor that MUST delegate to a "researcher" subagent, then
|
|
16
|
+
* asserts:
|
|
17
|
+
* 1. The host's CHAT_MODEL_END handler collected the PARENT's calls only.
|
|
18
|
+
* 2. The sink received one event per CHILD model call, tagged with the
|
|
19
|
+
* subagent type, child run id, and the child's model/provider.
|
|
20
|
+
* 3. Child usage has real token counts (the previously-unbilled tokens).
|
|
21
|
+
*
|
|
22
|
+
* Usage:
|
|
23
|
+
* OPENAI_API_KEY=... npx ts-node -r tsconfig-paths/register src/scripts/subagent-usage-sink.ts
|
|
24
|
+
*
|
|
25
|
+
* Or with Anthropic:
|
|
26
|
+
* ANTHROPIC_API_KEY=... npx ts-node -r tsconfig-paths/register src/scripts/subagent-usage-sink.ts --provider anthropic
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
const useAnthropic =
|
|
30
|
+
process.argv.includes('--provider') &&
|
|
31
|
+
process.argv[process.argv.indexOf('--provider') + 1] === 'anthropic';
|
|
32
|
+
|
|
33
|
+
const provider = useAnthropic ? Providers.ANTHROPIC : Providers.OPENAI;
|
|
34
|
+
const apiKey = useAnthropic
|
|
35
|
+
? process.env.ANTHROPIC_API_KEY
|
|
36
|
+
: process.env.OPENAI_API_KEY;
|
|
37
|
+
const modelName = useAnthropic ? 'claude-sonnet-4-20250514' : 'gpt-4o-mini';
|
|
38
|
+
|
|
39
|
+
if (!apiKey) {
|
|
40
|
+
console.error(
|
|
41
|
+
`Missing ${useAnthropic ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY'} environment variable`
|
|
42
|
+
);
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function main(): Promise<void> {
|
|
47
|
+
console.log('=== Subagent Usage Sink Live Verification ===\n');
|
|
48
|
+
console.log(`Provider: ${provider}`);
|
|
49
|
+
console.log(`Model: ${modelName}\n`);
|
|
50
|
+
|
|
51
|
+
const parentAgent: t.AgentInputs = {
|
|
52
|
+
agentId: 'supervisor',
|
|
53
|
+
provider,
|
|
54
|
+
clientOptions: { modelName, apiKey },
|
|
55
|
+
instructions: `You are a supervisor agent. For ANY user question, you MUST delegate to the "researcher" subagent via the subagent tool — never answer directly. After the subagent returns, give the user a one-sentence final answer.`,
|
|
56
|
+
maxContextTokens: 16000,
|
|
57
|
+
subagentConfigs: [
|
|
58
|
+
{
|
|
59
|
+
type: 'researcher',
|
|
60
|
+
name: 'Research Specialist',
|
|
61
|
+
description: 'Researches questions and returns concise answers.',
|
|
62
|
+
agentInputs: {
|
|
63
|
+
agentId: 'researcher',
|
|
64
|
+
provider,
|
|
65
|
+
clientOptions: { modelName, apiKey },
|
|
66
|
+
instructions:
|
|
67
|
+
'You are a research specialist. Answer the task in one or two sentences.',
|
|
68
|
+
maxContextTokens: 8000,
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
],
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
const collectedUsage: UsageMetadata[] = [];
|
|
75
|
+
const sunkEvents: t.SubagentUsageEvent[] = [];
|
|
76
|
+
|
|
77
|
+
const runId = `usage-sink-live-${Date.now()}`;
|
|
78
|
+
const run = await Run.create<t.IState>({
|
|
79
|
+
runId,
|
|
80
|
+
graphConfig: {
|
|
81
|
+
type: 'standard',
|
|
82
|
+
agents: [parentAgent],
|
|
83
|
+
},
|
|
84
|
+
returnContent: true,
|
|
85
|
+
customHandlers: {
|
|
86
|
+
[GraphEvents.TOOL_END]: new ToolEndHandler(),
|
|
87
|
+
[GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
|
|
88
|
+
},
|
|
89
|
+
subagentUsageSink: (event) => {
|
|
90
|
+
sunkEvents.push(event);
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
const callerConfig = {
|
|
95
|
+
configurable: { thread_id: `usage-sink-${Date.now()}` },
|
|
96
|
+
streamMode: 'values' as const,
|
|
97
|
+
version: 'v2' as const,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
await run.processStream(
|
|
101
|
+
{
|
|
102
|
+
messages: [
|
|
103
|
+
new HumanMessage(
|
|
104
|
+
'In what year was the Eiffel Tower completed? Use the researcher subagent.'
|
|
105
|
+
),
|
|
106
|
+
],
|
|
107
|
+
},
|
|
108
|
+
callerConfig
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
console.log('\n--- Parent collectedUsage (CHAT_MODEL_END handler) ---');
|
|
112
|
+
console.dir(collectedUsage, { depth: null });
|
|
113
|
+
|
|
114
|
+
console.log('\n--- Subagent usage sink events ---');
|
|
115
|
+
console.dir(sunkEvents, { depth: null });
|
|
116
|
+
|
|
117
|
+
const failures: string[] = [];
|
|
118
|
+
|
|
119
|
+
if (collectedUsage.length < 2) {
|
|
120
|
+
failures.push(
|
|
121
|
+
`expected >= 2 parent model calls in collectedUsage, got ${collectedUsage.length}`
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
if (sunkEvents.length === 0) {
|
|
125
|
+
failures.push('sink received NO child usage events');
|
|
126
|
+
}
|
|
127
|
+
for (const event of sunkEvents) {
|
|
128
|
+
if (event.subagentType !== 'researcher') {
|
|
129
|
+
failures.push(`unexpected subagentType: ${event.subagentType}`);
|
|
130
|
+
}
|
|
131
|
+
if (event.runId !== runId) {
|
|
132
|
+
failures.push(`event.runId mismatch: ${event.runId}`);
|
|
133
|
+
}
|
|
134
|
+
if (!event.subagentRunId.startsWith(`${runId}_sub_`)) {
|
|
135
|
+
failures.push(`event.subagentRunId mismatch: ${event.subagentRunId}`);
|
|
136
|
+
}
|
|
137
|
+
if (event.provider !== provider) {
|
|
138
|
+
failures.push(`event.provider mismatch: ${event.provider}`);
|
|
139
|
+
}
|
|
140
|
+
if (event.model == null || event.model === '') {
|
|
141
|
+
failures.push('event.model missing');
|
|
142
|
+
}
|
|
143
|
+
const input = Number(event.usage.input_tokens) || 0;
|
|
144
|
+
const output = Number(event.usage.output_tokens) || 0;
|
|
145
|
+
if (input <= 0 || output <= 0) {
|
|
146
|
+
failures.push(
|
|
147
|
+
`child usage has non-positive tokens: input=${input} output=${output}`
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const childTotal = sunkEvents.reduce(
|
|
153
|
+
(sum, e) =>
|
|
154
|
+
sum +
|
|
155
|
+
(Number(e.usage.input_tokens) || 0) +
|
|
156
|
+
(Number(e.usage.output_tokens) || 0),
|
|
157
|
+
0
|
|
158
|
+
);
|
|
159
|
+
console.log(
|
|
160
|
+
`\nChild tokens that were previously invisible to billing: ${childTotal}`
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
if (failures.length > 0) {
|
|
164
|
+
console.error('\nFAIL:');
|
|
165
|
+
for (const failure of failures) {
|
|
166
|
+
console.error(` - ${failure}`);
|
|
167
|
+
}
|
|
168
|
+
process.exit(1);
|
|
169
|
+
}
|
|
170
|
+
console.log('\nPASS: subagent child usage reported through the sink.');
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
main().catch((error) => {
|
|
174
|
+
console.error(error);
|
|
175
|
+
process.exit(1);
|
|
176
|
+
});
|