@librechat/agents 3.1.80 → 3.1.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +102 -35
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +13 -0
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/llm/openai/index.cjs +50 -13
  6. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  7. package/dist/cjs/llm/openrouter/index.cjs +17 -7
  8. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  9. package/dist/cjs/llm/openrouter/toolCache.cjs +55 -0
  10. package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -0
  11. package/dist/cjs/llm/vertexai/index.cjs +15 -15
  12. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +70 -12
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/esm/agents/AgentContext.mjs +101 -34
  16. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  17. package/dist/esm/graphs/Graph.mjs +13 -0
  18. package/dist/esm/graphs/Graph.mjs.map +1 -1
  19. package/dist/esm/llm/openai/index.mjs +50 -14
  20. package/dist/esm/llm/openai/index.mjs.map +1 -1
  21. package/dist/esm/llm/openrouter/index.mjs +17 -7
  22. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  23. package/dist/esm/llm/openrouter/toolCache.mjs +53 -0
  24. package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -0
  25. package/dist/esm/llm/vertexai/index.mjs +15 -16
  26. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  27. package/dist/esm/tools/ToolNode.mjs +70 -12
  28. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  29. package/dist/types/agents/AgentContext.d.ts +6 -1
  30. package/dist/types/llm/openrouter/index.d.ts +1 -0
  31. package/dist/types/llm/openrouter/toolCache.d.ts +2 -0
  32. package/dist/types/llm/vertexai/index.d.ts +18 -1
  33. package/dist/types/tools/ToolNode.d.ts +5 -0
  34. package/dist/types/types/run.d.ts +2 -0
  35. package/package.json +2 -1
  36. package/src/agents/AgentContext.ts +146 -38
  37. package/src/agents/__tests__/AgentContext.test.ts +198 -0
  38. package/src/graphs/Graph.ts +24 -0
  39. package/src/llm/custom-chat-models.smoke.test.ts +76 -0
  40. package/src/llm/openai/deepseek.test.ts +14 -1
  41. package/src/llm/openai/index.ts +38 -12
  42. package/src/llm/openrouter/index.ts +22 -7
  43. package/src/llm/openrouter/reasoning.test.ts +33 -0
  44. package/src/llm/openrouter/toolCache.test.ts +83 -0
  45. package/src/llm/openrouter/toolCache.ts +89 -0
  46. package/src/llm/vertexai/fixThoughtSignatures.test.ts +154 -0
  47. package/src/llm/vertexai/index.ts +16 -22
  48. package/src/messages/cache.test.ts +127 -0
  49. package/src/scripts/openrouter_prompt_cache_live.ts +310 -0
  50. package/src/specs/agent-handoffs.live.test.ts +140 -0
  51. package/src/specs/agent-handoffs.test.ts +266 -2
  52. package/src/specs/openrouter.simple.test.ts +15 -8
  53. package/src/tools/ToolNode.ts +92 -13
  54. package/src/types/run.ts +2 -0
@@ -65,50 +65,44 @@ type AdditionalKwargs =
65
65
  * - The signature for a functionCall part is an empty string
66
66
  *
67
67
  * This function correlates each "model" content block in the formatted request
68
- * back to its originating AI message, then re-attaches non-empty signatures
69
- * that the library failed to apply.
68
+ * back to its originating AI message by *position*, then re-attaches non-empty
69
+ * signatures that the library failed to apply. AI messages without signatures
70
+ * still consume their slot — filtering them out shifted later messages onto
71
+ * the wrong content block and dropped real signatures on the floor.
70
72
  */
71
- function fixThoughtSignatures(
73
+ export function fixThoughtSignatures(
72
74
  contents: GeminiContent[],
73
75
  input: BaseMessage[]
74
76
  ): void {
75
- // Collect AI messages that have signatures, in order
76
- const aiMessages = input.filter(
77
- (msg) =>
78
- isAIMessage(msg) &&
79
- Array.isArray((msg.additional_kwargs as AdditionalKwargs)?.signatures) &&
80
- (msg.additional_kwargs.signatures as string[]).length > 0
81
- );
82
-
83
- // Collect "model" content blocks from the formatted request, in order
77
+ // All AI messages, in order non-signature ones still consume positional
78
+ // slots so later messages line up with their model content blocks.
79
+ const aiMessages = input.filter(isAIMessage);
84
80
  const modelContents = contents.filter((c) => c.role === 'model');
85
81
 
86
- // They should correspond 1:1 in order (both derived from the same input sequence)
87
82
  const count = Math.min(aiMessages.length, modelContents.length);
88
83
  for (let i = 0; i < count; i++) {
89
- const msg = aiMessages[i];
90
- const content = modelContents[i];
91
- const signatures = (msg.additional_kwargs as AdditionalKwargs)?.signatures;
84
+ const signatures = (aiMessages[i].additional_kwargs as AdditionalKwargs)
85
+ ?.signatures;
86
+ if (!Array.isArray(signatures) || signatures.length === 0) continue;
92
87
 
93
- // Collect non-empty signatures that aren't already attached to any part
88
+ const content = modelContents[i];
94
89
  const attachedSignatures = new Set(
95
90
  content.parts
96
91
  .map((p) => p.thoughtSignature)
97
92
  .filter((s): s is string => s != null && s !== '')
98
93
  );
99
- const availableSignatures = signatures?.filter(
100
- (s) => s != null && s !== '' && !attachedSignatures.has(s)
94
+ const availableSignatures = signatures.filter(
95
+ (s): s is string => s != null && s !== '' && !attachedSignatures.has(s)
101
96
  );
102
97
 
103
- // Assign available signatures to functionCall parts missing one, in order
104
98
  let sigIdx = 0;
105
99
  for (const part of content.parts) {
106
100
  if (
107
101
  'functionCall' in part &&
108
102
  (part.thoughtSignature == null || part.thoughtSignature === '') &&
109
- sigIdx < (availableSignatures?.length ?? 0)
103
+ sigIdx < availableSignatures.length
110
104
  ) {
111
- part.thoughtSignature = availableSignatures?.[sigIdx];
105
+ part.thoughtSignature = availableSignatures[sigIdx];
112
106
  sigIdx++;
113
107
  }
114
108
  }
@@ -14,9 +14,14 @@ import {
14
14
  addBedrockCacheControl,
15
15
  addCacheControl,
16
16
  } from './cache';
17
+ import { _convertMessagesToOpenAIParams } from '@/llm/openai/utils';
17
18
  import { toLangChainContent } from './langchain';
18
19
  import { ContentTypes } from '@/common/enum';
19
20
 
21
+ type CacheControlBlock = MessageContentComplex & {
22
+ cache_control?: { type: 'ephemeral'; ttl?: '1h' };
23
+ };
24
+
20
25
  describe('addCacheControl', () => {
21
26
  test('should add cache control to the last two user messages with array content', () => {
22
27
  const messages: AnthropicMessages = [
@@ -1483,3 +1488,125 @@ describe('LangChain message type preservation', () => {
1483
1488
  expect((result[1] as AIMessage).tool_calls![0].name).toBe('navigate');
1484
1489
  });
1485
1490
  });
1491
+
1492
+ describe('OpenRouter prompt caching (reuses addCacheControl)', () => {
1493
+ it('adds cache_control to LangChain messages for OpenRouter (same format as Anthropic)', () => {
1494
+ const messages: BaseMessage[] = [
1495
+ new HumanMessage({ content: [{ type: 'text', text: 'System context' }] }),
1496
+ new AIMessage({ content: [{ type: 'text', text: 'Acknowledged' }] }),
1497
+ new HumanMessage({ content: [{ type: 'text', text: 'User query' }] }),
1498
+ ];
1499
+
1500
+ const result = addCacheControl(messages);
1501
+
1502
+ const firstContent = result[0].content as MessageContentComplex[];
1503
+ const lastContent = result[2].content as MessageContentComplex[];
1504
+
1505
+ expect((firstContent[0] as CacheControlBlock).cache_control).toEqual({
1506
+ type: 'ephemeral',
1507
+ });
1508
+ expect((lastContent[0] as CacheControlBlock).cache_control).toEqual({
1509
+ type: 'ephemeral',
1510
+ });
1511
+ });
1512
+
1513
+ it('preserves cache_control through OpenAI message conversion used by OpenRouter', () => {
1514
+ const messages: BaseMessage[] = [
1515
+ new HumanMessage({
1516
+ content: [
1517
+ {
1518
+ type: 'text',
1519
+ text: 'Hello',
1520
+ cache_control: { type: 'ephemeral' },
1521
+ },
1522
+ ],
1523
+ }),
1524
+ new AIMessage({ content: 'Hi there' }),
1525
+ new HumanMessage({
1526
+ content: [
1527
+ {
1528
+ type: 'text',
1529
+ text: 'Follow-up',
1530
+ cache_control: { type: 'ephemeral' },
1531
+ },
1532
+ ],
1533
+ }),
1534
+ ];
1535
+
1536
+ const converted = _convertMessagesToOpenAIParams(messages);
1537
+
1538
+ const firstUserContent = converted[0].content as CacheControlBlock[];
1539
+ const lastUserContent = converted[2].content as CacheControlBlock[];
1540
+
1541
+ expect(firstUserContent[0]).toHaveProperty('cache_control');
1542
+ expect(firstUserContent[0].cache_control).toEqual({ type: 'ephemeral' });
1543
+ expect(lastUserContent[0]).toHaveProperty('cache_control');
1544
+ expect(lastUserContent[0].cache_control).toEqual({ type: 'ephemeral' });
1545
+ });
1546
+
1547
+ it('end-to-end: addCacheControl then convert preserves breakpoints for OpenRouter', () => {
1548
+ const messages: BaseMessage[] = [
1549
+ new HumanMessage({ content: 'First message with context' }),
1550
+ new AIMessage({ content: 'Response' }),
1551
+ new HumanMessage({ content: 'Second question' }),
1552
+ ];
1553
+
1554
+ const cached = addCacheControl(messages);
1555
+ const converted = _convertMessagesToOpenAIParams(
1556
+ cached,
1557
+ 'anthropic/claude-sonnet-4-20250514'
1558
+ );
1559
+
1560
+ const firstUser = converted[0];
1561
+ const lastUser = converted[2];
1562
+
1563
+ expect(Array.isArray(firstUser.content)).toBe(true);
1564
+ expect(
1565
+ (firstUser.content as CacheControlBlock[])[0]
1566
+ ).toHaveProperty('cache_control');
1567
+
1568
+ expect(Array.isArray(lastUser.content)).toBe(true);
1569
+ expect(
1570
+ (lastUser.content as CacheControlBlock[])[0]
1571
+ ).toHaveProperty('cache_control');
1572
+ });
1573
+
1574
+ it('strips Bedrock cache before applying OpenRouter/Anthropic cache', () => {
1575
+ const messages: TestMsg[] = [
1576
+ {
1577
+ role: 'user',
1578
+ content: [
1579
+ { type: ContentTypes.TEXT, text: 'First message' },
1580
+ { cachePoint: { type: 'default' } },
1581
+ ],
1582
+ },
1583
+ {
1584
+ role: 'assistant',
1585
+ content: [
1586
+ { type: ContentTypes.TEXT, text: 'Response' },
1587
+ { cachePoint: { type: 'default' } },
1588
+ ],
1589
+ },
1590
+ {
1591
+ role: 'user',
1592
+ content: [{ type: ContentTypes.TEXT, text: 'Follow-up' }],
1593
+ },
1594
+ ];
1595
+
1596
+ /** @ts-expect-error - Testing cross-provider compatibility */
1597
+ const result = addCacheControl(messages);
1598
+
1599
+ for (const msg of result) {
1600
+ if (Array.isArray(msg.content)) {
1601
+ expect(
1602
+ (msg.content as MessageContentComplex[]).some(
1603
+ (b) => 'cachePoint' in b
1604
+ )
1605
+ ).toBe(false);
1606
+ }
1607
+ }
1608
+
1609
+ const lastContent = result[2].content as MessageContentComplex[];
1610
+ expect('cache_control' in lastContent[0]).toBe(true);
1611
+ });
1612
+ });
@@ -0,0 +1,310 @@
1
+ import { config as loadEnv } from 'dotenv';
2
+ import { HumanMessage, SystemMessage } from '@langchain/core/messages';
3
+ import type { AIMessage, BaseMessage } from '@langchain/core/messages';
4
+ import type { ClientOptions } from '@langchain/openai';
5
+ import type { GraphTools } from '@/types';
6
+ import type { ChatOpenRouterInput } from '@/llm/openrouter';
7
+ import { addCacheControl } from '@/messages/cache';
8
+ import { ChatOpenRouter } from '@/llm/openrouter';
9
+ import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
10
+
11
+ loadEnv({ path: process.env.DOTENV_CONFIG_PATH ?? '.env' });
12
+
13
+ type ModelCase = {
14
+ label: string;
15
+ model: string;
16
+ };
17
+
18
+ type CacheUsage = {
19
+ cacheCreation: number;
20
+ cacheRead: number;
21
+ inputTokens: number;
22
+ outputTokens: number;
23
+ totalTokens: number;
24
+ };
25
+
26
+ type OpenRouterTool = {
27
+ type: 'function';
28
+ function: {
29
+ name: string;
30
+ };
31
+ cache_control?: { type: 'ephemeral' };
32
+ };
33
+
34
+ const DEFAULT_MODEL_CASES: ModelCase[] = [
35
+ { label: 'Anthropic Claude', model: 'anthropic/claude-haiku-4.5' },
36
+ { label: 'Google Gemini', model: 'google/gemini-2.5-flash' },
37
+ { label: 'Alibaba Qwen', model: 'qwen/qwen3-coder-flash' },
38
+ ];
39
+
40
+ const apiKey = process.env.OPENROUTER_API_KEY;
41
+ const baseURL =
42
+ process.env.OPENROUTER_BASE_URL ?? 'https://openrouter.ai/api/v1';
43
+ const attempts = Number(process.env.OPENROUTER_PROMPT_CACHE_ATTEMPTS ?? '3');
44
+ const modelCases = (
45
+ process.env.OPENROUTER_PROMPT_CACHE_MODELS?.split(',').map((model) => ({
46
+ label: 'Custom',
47
+ model: model.trim(),
48
+ })) ?? DEFAULT_MODEL_CASES
49
+ ).filter(({ model }) => model.length > 0);
50
+
51
+ if (apiKey == null || apiKey.length === 0) {
52
+ throw new Error('OPENROUTER_API_KEY is required');
53
+ }
54
+
55
+ function buildStableReference(): string {
56
+ const paragraph =
57
+ 'LibreChat OpenRouter prompt caching live validation reference. This paragraph is deliberately stable across repeated requests so OpenRouter can route the conversation to the same provider endpoint and reuse cached prompt tokens. It describes cache breakpoints, provider sticky routing, cache write metrics, cache read metrics, model-specific minimum prompt sizes, and the expected behavior of explicit per-message cache_control markers for supported OpenRouter providers.';
58
+
59
+ return Array.from({ length: 90 }, (_, index) => {
60
+ const section = index + 1;
61
+ return `Section ${section}. ${paragraph} Verification key ${section}: OPENROUTER_PROMPT_CACHE_LIVE_REFERENCE_${section}.`;
62
+ }).join('\n');
63
+ }
64
+
65
+ function buildStableToolDescription(): string {
66
+ const paragraph =
67
+ 'Static OpenRouter tool contract for prompt cache validation. This tool description is stable across requests and intentionally verbose so provider-side prompt caching can write and then read a meaningful static tool-schema prefix while dynamic tools vary after the cache breakpoint.';
68
+
69
+ return Array.from({ length: 90 }, (_, index) => {
70
+ const section = index + 1;
71
+ return `Tool section ${section}. ${paragraph} Stable tool key ${section}: OPENROUTER_STATIC_TOOL_CACHE_REFERENCE_${section}.`;
72
+ }).join('\n');
73
+ }
74
+
75
+ function buildToolSet(attempt: number): GraphTools {
76
+ return [
77
+ {
78
+ type: 'function',
79
+ function: {
80
+ name: 'stable_reference_lookup',
81
+ description: buildStableToolDescription(),
82
+ parameters: {
83
+ type: 'object',
84
+ properties: {
85
+ query: {
86
+ type: 'string',
87
+ description: 'Stable lookup query.',
88
+ },
89
+ },
90
+ required: ['query'],
91
+ additionalProperties: false,
92
+ },
93
+ },
94
+ },
95
+ {
96
+ type: 'function',
97
+ function: {
98
+ name: `dynamic_runtime_tool_${attempt}`,
99
+ description: `Dynamic runtime tool ${attempt}; this varies between attempts and should sit after the cached static tool prefix.`,
100
+ parameters: {
101
+ type: 'object',
102
+ properties: {
103
+ value: {
104
+ type: 'string',
105
+ },
106
+ },
107
+ required: ['value'],
108
+ additionalProperties: false,
109
+ },
110
+ },
111
+ },
112
+ ] as GraphTools;
113
+ }
114
+
115
+ function buildMessages(model: string): BaseMessage[] {
116
+ const reference = buildStableReference();
117
+ const messages: BaseMessage[] = [
118
+ new SystemMessage(
119
+ 'You are validating prompt caching. Answer with one concise sentence.'
120
+ ),
121
+ new HumanMessage(
122
+ [
123
+ `For model ${model}, reply with exactly this format: cache live check ok.`,
124
+ 'Use the stable reference below only to make this request large enough to cache.',
125
+ reference,
126
+ ].join('\n\n')
127
+ ),
128
+ ];
129
+
130
+ return addCacheControl<BaseMessage>(messages);
131
+ }
132
+
133
+ function getCacheUsage(message: AIMessage): CacheUsage {
134
+ const usage = message.usage_metadata;
135
+ const inputDetails = usage?.input_token_details;
136
+
137
+ return {
138
+ inputTokens: usage?.input_tokens ?? 0,
139
+ outputTokens: usage?.output_tokens ?? 0,
140
+ totalTokens: usage?.total_tokens ?? 0,
141
+ cacheRead: inputDetails?.cache_read ?? 0,
142
+ cacheCreation: inputDetails?.cache_creation ?? 0,
143
+ };
144
+ }
145
+
146
+ function hasCacheHit(usages: CacheUsage[]): boolean {
147
+ return usages.some(({ cacheRead }) => cacheRead > 0);
148
+ }
149
+
150
+ function hasCacheActivity(usages: CacheUsage[]): boolean {
151
+ return usages.some(
152
+ ({ cacheCreation, cacheRead }) => cacheCreation > 0 || cacheRead > 0
153
+ );
154
+ }
155
+
156
+ function log(message = ''): void {
157
+ process.stdout.write(`${message}\n`);
158
+ }
159
+
160
+ function logError(message: string): void {
161
+ process.stderr.write(`${message}\n`);
162
+ }
163
+
164
+ async function runCase({ label, model }: ModelCase): Promise<CacheUsage[]> {
165
+ const llmInput: ChatOpenRouterInput & { configuration: ClientOptions } = {
166
+ model,
167
+ apiKey,
168
+ maxTokens: 12,
169
+ temperature: 0,
170
+ promptCache: true,
171
+ streamUsage: true,
172
+ configuration: {
173
+ baseURL,
174
+ defaultHeaders: {
175
+ 'HTTP-Referer': 'https://librechat.ai',
176
+ 'X-Title': 'LibreChat OpenRouter Prompt Cache Live Test',
177
+ },
178
+ },
179
+ };
180
+ const llm = new ChatOpenRouter(llmInput);
181
+ const messages = buildMessages(model);
182
+ const usages: CacheUsage[] = [];
183
+
184
+ log(`\n${label}: ${model}`);
185
+
186
+ for (let attempt = 1; attempt <= attempts; attempt++) {
187
+ const started = Date.now();
188
+ const response = (await llm.invoke(messages)) as AIMessage;
189
+ const usage = getCacheUsage(response);
190
+ usages.push(usage);
191
+
192
+ log(
193
+ [
194
+ `attempt=${attempt}`,
195
+ `ms=${Date.now() - started}`,
196
+ `input=${usage.inputTokens}`,
197
+ `output=${usage.outputTokens}`,
198
+ `write=${usage.cacheCreation}`,
199
+ `read=${usage.cacheRead}`,
200
+ `total=${usage.totalTokens}`,
201
+ ].join(' ')
202
+ );
203
+
204
+ if (hasCacheHit(usages)) {
205
+ return usages;
206
+ }
207
+ }
208
+
209
+ return usages;
210
+ }
211
+
212
+ async function runStaticToolCase(): Promise<CacheUsage[]> {
213
+ const model = 'anthropic/claude-haiku-4.5';
214
+ const usages: CacheUsage[] = [];
215
+
216
+ log(`\nStatic tools through OpenRouter: ${model}`);
217
+
218
+ for (let attempt = 1; attempt <= attempts; attempt++) {
219
+ const llmInput: ChatOpenRouterInput & { configuration: ClientOptions } = {
220
+ model,
221
+ apiKey,
222
+ maxTokens: 12,
223
+ temperature: 0,
224
+ promptCache: true,
225
+ streamUsage: true,
226
+ configuration: {
227
+ baseURL,
228
+ defaultHeaders: {
229
+ 'HTTP-Referer': 'https://librechat.ai',
230
+ 'X-Title': 'LibreChat OpenRouter Prompt Cache Live Test',
231
+ },
232
+ },
233
+ };
234
+ const llm = new ChatOpenRouter(llmInput);
235
+ const tools = partitionAndMarkOpenRouterToolCache(
236
+ buildToolSet(attempt),
237
+ (name) => name.startsWith('dynamic_runtime_tool_')
238
+ ) as OpenRouterTool[];
239
+ const markedTool = tools.find((entry) => entry.cache_control != null);
240
+ if (markedTool?.function.name !== 'stable_reference_lookup') {
241
+ throw new Error('Static tool cache marker was not applied as expected');
242
+ }
243
+
244
+ const modelWithTools = llm.bindTools(tools);
245
+ const started = Date.now();
246
+ const response = (await modelWithTools.invoke([
247
+ new SystemMessage('Reply with exactly: cache live check ok.'),
248
+ new HumanMessage(
249
+ `Attempt ${attempt}. Do not call tools; only answer with the requested text.`
250
+ ),
251
+ ])) as AIMessage;
252
+ const usage = getCacheUsage(response);
253
+ usages.push(usage);
254
+
255
+ log(
256
+ [
257
+ `attempt=${attempt}`,
258
+ `ms=${Date.now() - started}`,
259
+ `input=${usage.inputTokens}`,
260
+ `output=${usage.outputTokens}`,
261
+ `write=${usage.cacheCreation}`,
262
+ `read=${usage.cacheRead}`,
263
+ `total=${usage.totalTokens}`,
264
+ ].join(' ')
265
+ );
266
+
267
+ if (hasCacheHit(usages)) {
268
+ return usages;
269
+ }
270
+ }
271
+
272
+ return usages;
273
+ }
274
+
275
+ async function main(): Promise<void> {
276
+ const results: Array<ModelCase & { usages: CacheUsage[] }> = [];
277
+
278
+ for (const modelCase of modelCases) {
279
+ const usages = await runCase(modelCase);
280
+ results.push({ ...modelCase, usages });
281
+ }
282
+
283
+ const staticToolUsages = await runStaticToolCase();
284
+ results.push({
285
+ label: 'Static tools',
286
+ model: 'anthropic/claude-haiku-4.5',
287
+ usages: staticToolUsages,
288
+ });
289
+
290
+ const failures = results.filter(({ usages }) => {
291
+ return !hasCacheActivity(usages) || !hasCacheHit(usages);
292
+ });
293
+
294
+ log('\nSummary');
295
+ for (const { label, model, usages } of results) {
296
+ const writes = usages.map(({ cacheCreation }) => cacheCreation).join(',');
297
+ const reads = usages.map(({ cacheRead }) => cacheRead).join(',');
298
+ log(`${label} ${model}: writes=[${writes}] reads=[${reads}]`);
299
+ }
300
+
301
+ if (failures.length > 0) {
302
+ const failedModels = failures.map(({ model }) => model).join(', ');
303
+ throw new Error(`Prompt caching was not confirmed for: ${failedModels}`);
304
+ }
305
+ }
306
+
307
+ main().catch((error: Error) => {
308
+ logError(error.message);
309
+ process.exit(1);
310
+ });
@@ -0,0 +1,140 @@
1
+ // src/specs/agent-handoffs.live.test.ts
2
+ /**
3
+ * Live handoff integration verification.
4
+ *
5
+ * Run with:
6
+ * RUN_HANDOFF_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- agent-handoffs.live.test.ts --runInBand
7
+ */
8
+ import { config as dotenvConfig } from 'dotenv';
9
+ dotenvConfig();
10
+
11
+ import { HumanMessage } from '@langchain/core/messages';
12
+ import { describe, expect, it, jest } from '@jest/globals';
13
+ import type { BaseMessage, ToolMessage } from '@langchain/core/messages';
14
+ import type { RunnableConfig } from '@langchain/core/runnables';
15
+ import type * as t from '@/types';
16
+ import { Constants, Providers } from '@/common';
17
+ import { Run } from '@/run';
18
+
19
+ const shouldRunLive =
20
+ process.env.RUN_HANDOFF_LIVE_TESTS === '1' &&
21
+ process.env.ANTHROPIC_API_KEY != null &&
22
+ process.env.ANTHROPIC_API_KEY !== '';
23
+
24
+ const describeIfLive = shouldRunLive ? describe : describe.skip;
25
+ const modelName =
26
+ process.env.ANTHROPIC_HANDOFF_LIVE_MODEL ?? 'claude-sonnet-4-6';
27
+
28
+ function createAnthropicAgent(
29
+ agentId: string,
30
+ instructions: string
31
+ ): t.AgentInputs {
32
+ return {
33
+ agentId,
34
+ provider: Providers.ANTHROPIC,
35
+ clientOptions: {
36
+ modelName,
37
+ apiKey: process.env.ANTHROPIC_API_KEY,
38
+ temperature: 0,
39
+ maxTokens: 256,
40
+ streaming: true,
41
+ },
42
+ instructions,
43
+ maxContextTokens: 8000,
44
+ };
45
+ }
46
+
47
+ function createStreamConfig(threadId: string): Partial<RunnableConfig> & {
48
+ version: 'v1' | 'v2';
49
+ streamMode: string;
50
+ } {
51
+ return {
52
+ configurable: { thread_id: threadId },
53
+ streamMode: 'values',
54
+ version: 'v2',
55
+ };
56
+ }
57
+
58
+ function contentToText(message: BaseMessage): string {
59
+ if (typeof message.content === 'string') {
60
+ return message.content;
61
+ }
62
+ if (!Array.isArray(message.content)) {
63
+ return '';
64
+ }
65
+ return message.content
66
+ .map((part) => {
67
+ if (typeof part === 'string') {
68
+ return part;
69
+ }
70
+ if ('text' in part && typeof part.text === 'string') {
71
+ return part.text;
72
+ }
73
+ return '';
74
+ })
75
+ .join('');
76
+ }
77
+
78
+ describeIfLive('Agent handoffs live integration', () => {
79
+ jest.setTimeout(120_000);
80
+
81
+ it('routes through a real Anthropic handoff and preserves instructions', async () => {
82
+ const nonce = `live-handoff-${Date.now()}`;
83
+ const expectedReply = `${nonce}-specialist-confirmed`;
84
+ const handoffToolName = `${Constants.LC_TRANSFER_TO_}specialist`;
85
+ const agents: t.AgentInputs[] = [
86
+ createAnthropicAgent(
87
+ 'router',
88
+ `You are a routing agent. For every user request, your only valid action is to call the handoff tool named ${handoffToolName}. Do not answer directly.
89
+
90
+ When you call the handoff tool, include instructions telling the specialist to reply exactly with this marker and no extra words: ${expectedReply}`
91
+ ),
92
+ createAnthropicAgent(
93
+ 'specialist',
94
+ 'You are the specialist. When you receive handoff instructions with a marker, reply exactly with that marker and no extra words.'
95
+ ),
96
+ ];
97
+ const edges: t.GraphEdge[] = [
98
+ {
99
+ from: 'router',
100
+ to: 'specialist',
101
+ edgeType: 'handoff',
102
+ description: 'Transfer to the specialist for the final response',
103
+ prompt:
104
+ 'Instructions for the specialist. Include any exact marker that must be returned.',
105
+ promptKey: 'instructions',
106
+ },
107
+ ];
108
+ const run = await Run.create({
109
+ runId: `${nonce}-run`,
110
+ graphConfig: { type: 'multi-agent', agents, edges },
111
+ returnContent: true,
112
+ skipCleanup: true,
113
+ });
114
+
115
+ await run.processStream(
116
+ {
117
+ messages: [
118
+ new HumanMessage(
119
+ `Please delegate this to the specialist. The final answer must be exactly: ${expectedReply}`
120
+ ),
121
+ ],
122
+ },
123
+ createStreamConfig(`${nonce}-thread`)
124
+ );
125
+
126
+ const messages = run.getRunMessages() ?? [];
127
+ const handoffMessage = messages.find(
128
+ (message): message is ToolMessage =>
129
+ message.getType() === 'tool' &&
130
+ (message as ToolMessage).name === handoffToolName
131
+ );
132
+ const finalText = messages
133
+ .filter((message) => message.getType() === 'ai')
134
+ .map(contentToText)
135
+ .join('\n');
136
+
137
+ expect(handoffMessage).toBeDefined();
138
+ expect(finalText).toContain(expectedReply);
139
+ });
140
+ });