@librechat/agents 3.1.81 → 3.1.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +125 -36
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +13 -0
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +50 -13
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +17 -7
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/toolCache.cjs +55 -0
- package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -0
- package/dist/cjs/main.cjs +1 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +96 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +70 -12
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +125 -36
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +13 -0
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +50 -14
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +17 -7
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/toolCache.mjs +53 -0
- package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -0
- package/dist/esm/main.mjs +1 -1
- package/dist/esm/messages/cache.mjs +96 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +70 -12
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +8 -1
- package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +6 -2
- package/dist/types/llm/openrouter/index.d.ts +1 -0
- package/dist/types/llm/openrouter/toolCache.d.ts +2 -0
- package/dist/types/messages/cache.d.ts +1 -0
- package/dist/types/tools/ToolNode.d.ts +5 -0
- package/dist/types/types/run.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +191 -40
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +0 -4
- package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +128 -0
- package/src/agents/__tests__/AgentContext.test.ts +355 -18
- package/src/agents/__tests__/promptCacheLiveHelpers.ts +8 -2
- package/src/graphs/Graph.ts +24 -0
- package/src/llm/custom-chat-models.smoke.test.ts +76 -0
- package/src/llm/openai/deepseek.test.ts +14 -1
- package/src/llm/openai/index.ts +38 -12
- package/src/llm/openrouter/index.ts +22 -7
- package/src/llm/openrouter/reasoning.test.ts +33 -0
- package/src/llm/openrouter/toolCache.test.ts +83 -0
- package/src/llm/openrouter/toolCache.ts +89 -0
- package/src/messages/cache.test.ts +127 -0
- package/src/messages/cache.ts +143 -0
- package/src/scripts/openrouter_prompt_cache_live.ts +310 -0
- package/src/specs/agent-handoffs.live.test.ts +140 -0
- package/src/specs/agent-handoffs.test.ts +266 -2
- package/src/specs/openrouter.simple.test.ts +15 -8
- package/src/tools/ToolNode.ts +92 -13
- package/src/types/run.ts +2 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/agents/__tests__/AgentContext.test.ts
|
|
2
|
-
import { HumanMessage } from '@langchain/core/messages';
|
|
2
|
+
import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages';
|
|
3
3
|
import { AgentContext } from '../AgentContext';
|
|
4
4
|
import { Providers } from '@/common';
|
|
5
5
|
import { addBedrockCacheControl } from '@/messages/cache';
|
|
@@ -79,7 +79,7 @@ describe('AgentContext', () => {
|
|
|
79
79
|
);
|
|
80
80
|
});
|
|
81
81
|
|
|
82
|
-
it('
|
|
82
|
+
it('moves Anthropic dynamic instructions behind stable history', async () => {
|
|
83
83
|
const ctx = createBasicContext({
|
|
84
84
|
agentConfig: {
|
|
85
85
|
provider: Providers.ANTHROPIC,
|
|
@@ -89,18 +89,39 @@ describe('AgentContext', () => {
|
|
|
89
89
|
},
|
|
90
90
|
});
|
|
91
91
|
|
|
92
|
-
const result = await ctx.systemRunnable!.invoke([
|
|
92
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
93
|
+
new HumanMessage('Hello'),
|
|
94
|
+
new HumanMessage('Second'),
|
|
95
|
+
]);
|
|
93
96
|
const content = result[0].content as TestSystemContentBlock[];
|
|
94
|
-
expect(content).
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
97
|
+
expect(content).toEqual([
|
|
98
|
+
{
|
|
99
|
+
type: 'text',
|
|
100
|
+
text: 'Stable instructions',
|
|
101
|
+
cache_control: { type: 'ephemeral' },
|
|
102
|
+
},
|
|
103
|
+
]);
|
|
104
|
+
expect(result[1].content).toBe('Hello');
|
|
105
|
+
expect(result[2].content).toBe('Dynamic instructions');
|
|
106
|
+
expect(result[3].content).toBe('Second');
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('places Anthropic dynamic instructions before a single latest user prompt', async () => {
|
|
110
|
+
const ctx = createBasicContext({
|
|
111
|
+
agentConfig: {
|
|
112
|
+
provider: Providers.ANTHROPIC,
|
|
113
|
+
clientOptions: { model: 'claude-3-5-sonnet', promptCache: true },
|
|
114
|
+
instructions: 'Stable instructions',
|
|
115
|
+
additional_instructions: 'Dynamic instructions',
|
|
116
|
+
},
|
|
103
117
|
});
|
|
118
|
+
|
|
119
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
120
|
+
new HumanMessage('Latest'),
|
|
121
|
+
]);
|
|
122
|
+
|
|
123
|
+
expect(result[1].content).toBe('Dynamic instructions');
|
|
124
|
+
expect(result[2].content).toBe('Latest');
|
|
104
125
|
});
|
|
105
126
|
|
|
106
127
|
it('omits Anthropic cache control when only dynamic system text exists', async () => {
|
|
@@ -119,7 +140,7 @@ describe('AgentContext', () => {
|
|
|
119
140
|
expect(content[0]).not.toHaveProperty('cache_control');
|
|
120
141
|
});
|
|
121
142
|
|
|
122
|
-
it('keeps cross-run summaries in the dynamic Anthropic
|
|
143
|
+
it('keeps cross-run summaries in the dynamic Anthropic tail', async () => {
|
|
123
144
|
const ctx = createBasicContext({
|
|
124
145
|
agentConfig: {
|
|
125
146
|
provider: Providers.ANTHROPIC,
|
|
@@ -131,12 +152,11 @@ describe('AgentContext', () => {
|
|
|
131
152
|
|
|
132
153
|
const result = await ctx.systemRunnable!.invoke([]);
|
|
133
154
|
const content = result[0].content as TestSystemContentBlock[];
|
|
134
|
-
expect(content).toHaveLength(
|
|
155
|
+
expect(content).toHaveLength(1);
|
|
135
156
|
expect(content[0]).toHaveProperty('cache_control');
|
|
136
|
-
expect(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
});
|
|
157
|
+
expect(result[1].content).toBe(
|
|
158
|
+
'## Conversation Summary\n\nPrior summary'
|
|
159
|
+
);
|
|
140
160
|
});
|
|
141
161
|
|
|
142
162
|
it('places the Bedrock cache point before dynamic system text', async () => {
|
|
@@ -198,6 +218,270 @@ describe('AgentContext', () => {
|
|
|
198
218
|
);
|
|
199
219
|
});
|
|
200
220
|
|
|
221
|
+
it('moves OpenRouter dynamic instructions behind stable history', async () => {
|
|
222
|
+
const ctx = createBasicContext({
|
|
223
|
+
agentConfig: {
|
|
224
|
+
provider: Providers.OPENROUTER,
|
|
225
|
+
clientOptions: {
|
|
226
|
+
model: 'anthropic/claude-haiku-4.5',
|
|
227
|
+
promptCache: true,
|
|
228
|
+
},
|
|
229
|
+
instructions: 'Stable instructions',
|
|
230
|
+
additional_instructions: 'Dynamic instructions',
|
|
231
|
+
},
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
235
|
+
new HumanMessage('Hello'),
|
|
236
|
+
new HumanMessage('Second'),
|
|
237
|
+
]);
|
|
238
|
+
const content = result[0].content as TestSystemContentBlock[];
|
|
239
|
+
expect(content).toEqual([
|
|
240
|
+
{
|
|
241
|
+
type: 'text',
|
|
242
|
+
text: 'Stable instructions',
|
|
243
|
+
cache_control: { type: 'ephemeral' },
|
|
244
|
+
},
|
|
245
|
+
]);
|
|
246
|
+
expect(result[1].content).toBe('Hello');
|
|
247
|
+
expect(result[2].content).toBe('Dynamic instructions');
|
|
248
|
+
expect(result[3].content).toBe('Second');
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
it('keeps dynamic-only OpenRouter instructions as system text', async () => {
|
|
252
|
+
const tokenCounter = (msg: { content: unknown }): number => {
|
|
253
|
+
const content =
|
|
254
|
+
typeof msg.content === 'string'
|
|
255
|
+
? msg.content
|
|
256
|
+
: JSON.stringify(msg.content);
|
|
257
|
+
return content.length;
|
|
258
|
+
};
|
|
259
|
+
const ctx = createBasicContext({
|
|
260
|
+
agentConfig: {
|
|
261
|
+
provider: Providers.OPENROUTER,
|
|
262
|
+
clientOptions: {
|
|
263
|
+
model: 'anthropic/claude-haiku-4.5',
|
|
264
|
+
promptCache: true,
|
|
265
|
+
},
|
|
266
|
+
instructions: undefined,
|
|
267
|
+
additional_instructions: 'Dynamic only',
|
|
268
|
+
},
|
|
269
|
+
tokenCounter,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
ctx.initializeSystemRunnable();
|
|
273
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
274
|
+
new HumanMessage('First'),
|
|
275
|
+
new HumanMessage('Second'),
|
|
276
|
+
]);
|
|
277
|
+
const firstContent = result[1].content as TestSystemContentBlock[];
|
|
278
|
+
const secondContent = result[2].content as TestSystemContentBlock[];
|
|
279
|
+
|
|
280
|
+
expect(result).toHaveLength(3);
|
|
281
|
+
expect(result[0].content).toBe('Dynamic only');
|
|
282
|
+
expect(firstContent[0]).toMatchObject({
|
|
283
|
+
type: 'text',
|
|
284
|
+
text: 'First',
|
|
285
|
+
cache_control: { type: 'ephemeral' },
|
|
286
|
+
});
|
|
287
|
+
expect(secondContent[0]).toMatchObject({
|
|
288
|
+
type: 'text',
|
|
289
|
+
text: 'Second',
|
|
290
|
+
cache_control: { type: 'ephemeral' },
|
|
291
|
+
});
|
|
292
|
+
expect(ctx.systemMessageTokens).toBeGreaterThan(0);
|
|
293
|
+
expect(ctx.dynamicInstructionTokens).toBe(0);
|
|
294
|
+
expect(ctx.instructionTokens).toBe(ctx.systemMessageTokens);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
it('does not cache OpenRouter body messages after dynamic instructions', async () => {
|
|
298
|
+
const ctx = createBasicContext({
|
|
299
|
+
agentConfig: {
|
|
300
|
+
provider: Providers.OPENROUTER,
|
|
301
|
+
clientOptions: {
|
|
302
|
+
model: 'google/gemini-2.5-flash',
|
|
303
|
+
promptCache: true,
|
|
304
|
+
},
|
|
305
|
+
instructions: 'Stable instructions',
|
|
306
|
+
additional_instructions: 'Dynamic instructions',
|
|
307
|
+
},
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
311
|
+
new HumanMessage('First'),
|
|
312
|
+
new HumanMessage('Second'),
|
|
313
|
+
]);
|
|
314
|
+
|
|
315
|
+
expect(result[1].content).toBe('First');
|
|
316
|
+
expect(result[2].content).toBe('Dynamic instructions');
|
|
317
|
+
expect(result[3].content).toBe('Second');
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
it('keeps the first OpenRouter user message before single-turn dynamic instructions', async () => {
|
|
321
|
+
const ctx = createBasicContext({
|
|
322
|
+
agentConfig: {
|
|
323
|
+
provider: Providers.OPENROUTER,
|
|
324
|
+
clientOptions: {
|
|
325
|
+
model: 'anthropic/claude-haiku-4.5',
|
|
326
|
+
promptCache: true,
|
|
327
|
+
},
|
|
328
|
+
instructions: 'Stable instructions',
|
|
329
|
+
additional_instructions: 'Dynamic instructions',
|
|
330
|
+
},
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
334
|
+
new HumanMessage('Latest'),
|
|
335
|
+
]);
|
|
336
|
+
|
|
337
|
+
expect(result[1].content).toBe('Latest');
|
|
338
|
+
expect(result[2].content).toBe('Dynamic instructions');
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
it('caches stable Anthropic history before dynamic instructions', async () => {
|
|
342
|
+
const ctx = createBasicContext({
|
|
343
|
+
agentConfig: {
|
|
344
|
+
provider: Providers.ANTHROPIC,
|
|
345
|
+
clientOptions: {
|
|
346
|
+
model: 'claude-3-5-sonnet',
|
|
347
|
+
promptCache: true,
|
|
348
|
+
},
|
|
349
|
+
instructions: 'Stable instructions',
|
|
350
|
+
additional_instructions: 'Dynamic instructions',
|
|
351
|
+
},
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
355
|
+
new HumanMessage('First'),
|
|
356
|
+
new AIMessage('Stable assistant history'),
|
|
357
|
+
new HumanMessage('Latest'),
|
|
358
|
+
]);
|
|
359
|
+
const stableHistory = result[2].content as TestSystemContentBlock[];
|
|
360
|
+
|
|
361
|
+
expect(result[1].content).toBe('First');
|
|
362
|
+
expect(stableHistory[0]).toMatchObject({
|
|
363
|
+
type: 'text',
|
|
364
|
+
text: 'Stable assistant history',
|
|
365
|
+
cache_control: { type: 'ephemeral' },
|
|
366
|
+
});
|
|
367
|
+
expect(result[3].content).toBe('Dynamic instructions');
|
|
368
|
+
expect(result[4].content).toBe('Latest');
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
it('does not place Anthropic dynamic instructions between tool calls and results', async () => {
|
|
372
|
+
const ctx = createBasicContext({
|
|
373
|
+
agentConfig: {
|
|
374
|
+
provider: Providers.ANTHROPIC,
|
|
375
|
+
clientOptions: {
|
|
376
|
+
model: 'claude-3-5-sonnet',
|
|
377
|
+
promptCache: true,
|
|
378
|
+
},
|
|
379
|
+
instructions: 'Stable instructions',
|
|
380
|
+
additional_instructions: 'Dynamic instructions',
|
|
381
|
+
},
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
385
|
+
new HumanMessage('Use the tool'),
|
|
386
|
+
new AIMessage({
|
|
387
|
+
content: '',
|
|
388
|
+
tool_calls: [
|
|
389
|
+
{
|
|
390
|
+
id: 'call_1',
|
|
391
|
+
name: 'calculator',
|
|
392
|
+
args: { expression: '2+2' },
|
|
393
|
+
type: 'tool_call',
|
|
394
|
+
},
|
|
395
|
+
],
|
|
396
|
+
}),
|
|
397
|
+
new ToolMessage({
|
|
398
|
+
content: '4',
|
|
399
|
+
name: 'calculator',
|
|
400
|
+
tool_call_id: 'call_1',
|
|
401
|
+
}),
|
|
402
|
+
]);
|
|
403
|
+
|
|
404
|
+
expect(result[1].content).toBe('Use the tool');
|
|
405
|
+
expect((result[2] as AIMessage).tool_calls?.[0]?.id).toBe('call_1');
|
|
406
|
+
expect(result[3].getType()).toBe('tool');
|
|
407
|
+
expect(result[4].content).toBe('Dynamic instructions');
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
it('caches stable OpenRouter history before dynamic instructions', async () => {
|
|
411
|
+
const ctx = createBasicContext({
|
|
412
|
+
agentConfig: {
|
|
413
|
+
provider: Providers.OPENROUTER,
|
|
414
|
+
clientOptions: {
|
|
415
|
+
model: 'anthropic/claude-haiku-4.5',
|
|
416
|
+
promptCache: true,
|
|
417
|
+
},
|
|
418
|
+
instructions: 'Stable instructions',
|
|
419
|
+
additional_instructions: 'Dynamic instructions',
|
|
420
|
+
},
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
424
|
+
new HumanMessage('First'),
|
|
425
|
+
new AIMessage('Stable assistant history'),
|
|
426
|
+
new HumanMessage('Latest'),
|
|
427
|
+
]);
|
|
428
|
+
const stableHistory = result[2].content as TestSystemContentBlock[];
|
|
429
|
+
|
|
430
|
+
expect(result[1].content).toBe('First');
|
|
431
|
+
expect(stableHistory[0]).toMatchObject({
|
|
432
|
+
type: 'text',
|
|
433
|
+
text: 'Stable assistant history',
|
|
434
|
+
cache_control: { type: 'ephemeral' },
|
|
435
|
+
});
|
|
436
|
+
expect(result[3].content).toBe('Dynamic instructions');
|
|
437
|
+
expect(result[4].content).toBe('Latest');
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
it('adds OpenRouter body cache points when there is no dynamic tail', async () => {
|
|
441
|
+
const ctx = createBasicContext({
|
|
442
|
+
agentConfig: {
|
|
443
|
+
provider: Providers.OPENROUTER,
|
|
444
|
+
clientOptions: {
|
|
445
|
+
model: 'google/gemini-3.1-pro-preview',
|
|
446
|
+
promptCache: true,
|
|
447
|
+
},
|
|
448
|
+
instructions: 'Stable instructions',
|
|
449
|
+
},
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
453
|
+
new HumanMessage('First'),
|
|
454
|
+
new HumanMessage('Second'),
|
|
455
|
+
]);
|
|
456
|
+
const firstContent = result[1].content as TestSystemContentBlock[];
|
|
457
|
+
const secondContent = result[2].content as TestSystemContentBlock[];
|
|
458
|
+
expect(firstContent[0]).toHaveProperty('cache_control');
|
|
459
|
+
expect(secondContent[0]).toHaveProperty('cache_control');
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
it('places OpenRouter user-message summaries after the first stable message', async () => {
|
|
463
|
+
const ctx = createBasicContext({
|
|
464
|
+
agentConfig: {
|
|
465
|
+
provider: Providers.OPENROUTER,
|
|
466
|
+
clientOptions: {
|
|
467
|
+
model: 'google/gemini-3.1-pro-preview',
|
|
468
|
+
promptCache: true,
|
|
469
|
+
},
|
|
470
|
+
instructions: 'Stable instructions',
|
|
471
|
+
},
|
|
472
|
+
});
|
|
473
|
+
ctx.setSummary('Rotating summary', 7);
|
|
474
|
+
|
|
475
|
+
const result = await ctx.systemRunnable!.invoke([
|
|
476
|
+
new HumanMessage('First'),
|
|
477
|
+
new HumanMessage('Second'),
|
|
478
|
+
]);
|
|
479
|
+
|
|
480
|
+
expect(result[1].content).toBe('First');
|
|
481
|
+
expect(result[2].content).toContain('Rotating summary');
|
|
482
|
+
expect(result[3].content).toBe('Second');
|
|
483
|
+
});
|
|
484
|
+
|
|
201
485
|
it('preserves the Bedrock system cache point through message cache-control pass', async () => {
|
|
202
486
|
const ctx = createBasicContext({
|
|
203
487
|
agentConfig: {
|
|
@@ -557,6 +841,59 @@ describe('AgentContext', () => {
|
|
|
557
841
|
expect(ctxWithDeferred.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
|
|
558
842
|
});
|
|
559
843
|
|
|
844
|
+
it('counts OpenRouter dynamic instructions outside the system message', () => {
|
|
845
|
+
const ctx = createBasicContext({
|
|
846
|
+
agentConfig: {
|
|
847
|
+
provider: Providers.OPENROUTER,
|
|
848
|
+
clientOptions: {
|
|
849
|
+
model: 'google/gemini-3.1-pro-preview',
|
|
850
|
+
promptCache: true,
|
|
851
|
+
},
|
|
852
|
+
instructions: 'Stable',
|
|
853
|
+
additional_instructions: 'Dynamic tail',
|
|
854
|
+
},
|
|
855
|
+
tokenCounter: mockTokenCounter,
|
|
856
|
+
});
|
|
857
|
+
|
|
858
|
+
ctx.initializeSystemRunnable();
|
|
859
|
+
|
|
860
|
+
expect(ctx.systemMessageTokens).toBeGreaterThan(0);
|
|
861
|
+
expect(ctx.dynamicInstructionTokens).toBeGreaterThan(0);
|
|
862
|
+
expect(ctx.instructionTokens).toBe(
|
|
863
|
+
ctx.systemMessageTokens + ctx.dynamicInstructionTokens
|
|
864
|
+
);
|
|
865
|
+
expect(ctx.getTokenBudgetBreakdown().dynamicInstructionTokens).toBe(
|
|
866
|
+
ctx.dynamicInstructionTokens
|
|
867
|
+
);
|
|
868
|
+
});
|
|
869
|
+
|
|
870
|
+
it('clears OpenRouter dynamic instruction tokens when no prompt remains', () => {
|
|
871
|
+
const ctx = createBasicContext({
|
|
872
|
+
agentConfig: {
|
|
873
|
+
provider: Providers.OPENROUTER,
|
|
874
|
+
clientOptions: {
|
|
875
|
+
model: 'google/gemini-3.1-pro-preview',
|
|
876
|
+
promptCache: true,
|
|
877
|
+
},
|
|
878
|
+
instructions: 'Stable instructions',
|
|
879
|
+
},
|
|
880
|
+
tokenCounter: mockTokenCounter,
|
|
881
|
+
});
|
|
882
|
+
|
|
883
|
+
ctx.setInitialSummary('Volatile summary', 8);
|
|
884
|
+
ctx.initializeSystemRunnable();
|
|
885
|
+
expect(ctx.dynamicInstructionTokens).toBeGreaterThan(0);
|
|
886
|
+
|
|
887
|
+
ctx.instructions = undefined;
|
|
888
|
+
ctx.clearSummary();
|
|
889
|
+
ctx.initializeSystemRunnable();
|
|
890
|
+
|
|
891
|
+
expect(ctx.systemRunnable).toBeUndefined();
|
|
892
|
+
expect(ctx.systemMessageTokens).toBe(0);
|
|
893
|
+
expect(ctx.dynamicInstructionTokens).toBe(0);
|
|
894
|
+
expect(ctx.instructionTokens).toBe(0);
|
|
895
|
+
});
|
|
896
|
+
|
|
560
897
|
it('excludes programmatic-only toolDefinitions from toolSchemaTokens', async () => {
|
|
561
898
|
// getEventDrivenToolsForBinding excludes definitions whose
|
|
562
899
|
// allowed_callers omit 'direct'. Accounting must mirror that — a
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import { expect } from '@jest/globals';
|
|
2
2
|
import { HumanMessage } from '@langchain/core/messages';
|
|
3
3
|
import type { UsageMetadata } from '@langchain/core/messages';
|
|
4
|
+
import type { ClientOptions } from '@langchain/openai';
|
|
4
5
|
import type * as t from '@/types';
|
|
5
6
|
import { GraphEvents, Providers } from '@/common';
|
|
6
7
|
import { AgentContext } from '../AgentContext';
|
|
7
8
|
import { ModelEndHandler } from '@/events';
|
|
8
9
|
import { Run } from '@/run';
|
|
10
|
+
import type { ChatOpenRouterInput } from '@/llm/openrouter';
|
|
9
11
|
|
|
10
|
-
type LivePromptCacheProvider =
|
|
12
|
+
type LivePromptCacheProvider =
|
|
13
|
+
| Providers.ANTHROPIC
|
|
14
|
+
| Providers.BEDROCK
|
|
15
|
+
| Providers.OPENROUTER;
|
|
11
16
|
|
|
12
17
|
type PromptCacheExpectedSystemBlock =
|
|
13
18
|
| { type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }
|
|
@@ -15,7 +20,8 @@ type PromptCacheExpectedSystemBlock =
|
|
|
15
20
|
|
|
16
21
|
type LivePromptCacheClientOptions =
|
|
17
22
|
| t.ClientOptions
|
|
18
|
-
| t.BedrockAnthropicClientOptions
|
|
23
|
+
| t.BedrockAnthropicClientOptions
|
|
24
|
+
| (ChatOpenRouterInput & { configuration?: ClientOptions });
|
|
19
25
|
|
|
20
26
|
export function buildStableInstructions({
|
|
21
27
|
nonce,
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -62,6 +62,7 @@ import { isThinkingEnabled } from '@/llm/request';
|
|
|
62
62
|
import { initializeModel } from '@/llm/init';
|
|
63
63
|
import { HandlerRegistry } from '@/events';
|
|
64
64
|
import { ChatOpenAI } from '@/llm/openai';
|
|
65
|
+
import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
|
|
65
66
|
import type { HookRegistry } from '@/hooks';
|
|
66
67
|
|
|
67
68
|
const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
|
|
@@ -817,6 +818,19 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
817
818
|
rawToolsForBinding,
|
|
818
819
|
makeIsDeferred(agentContext.toolDefinitions)
|
|
819
820
|
) ?? rawToolsForBinding;
|
|
821
|
+
} else if (
|
|
822
|
+
agentContext.provider === Providers.OPENROUTER &&
|
|
823
|
+
(
|
|
824
|
+
agentContext.clientOptions as
|
|
825
|
+
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
826
|
+
| undefined
|
|
827
|
+
)?.promptCache === true
|
|
828
|
+
) {
|
|
829
|
+
toolsForBinding =
|
|
830
|
+
partitionAndMarkOpenRouterToolCache(
|
|
831
|
+
rawToolsForBinding,
|
|
832
|
+
makeIsDeferred(agentContext.toolDefinitions)
|
|
833
|
+
) ?? rawToolsForBinding;
|
|
820
834
|
}
|
|
821
835
|
|
|
822
836
|
let model =
|
|
@@ -1073,6 +1087,16 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1073
1087
|
if (bedrockOptions?.promptCache === true) {
|
|
1074
1088
|
finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
|
|
1075
1089
|
}
|
|
1090
|
+
} else if (agentContext.provider === Providers.OPENROUTER) {
|
|
1091
|
+
const openRouterOptions = agentContext.clientOptions as
|
|
1092
|
+
| t.ProviderOptionsMap[Providers.OPENROUTER]
|
|
1093
|
+
| undefined;
|
|
1094
|
+
if (
|
|
1095
|
+
openRouterOptions?.promptCache === true &&
|
|
1096
|
+
!agentContext.systemRunnable
|
|
1097
|
+
) {
|
|
1098
|
+
finalMessages = addCacheControl<BaseMessage>(finalMessages);
|
|
1099
|
+
}
|
|
1076
1100
|
}
|
|
1077
1101
|
|
|
1078
1102
|
if (
|
|
@@ -120,6 +120,17 @@ type OpenRouterReasoningStreamChoice = Omit<
|
|
|
120
120
|
> & {
|
|
121
121
|
delta: OpenRouterReasoningStreamDelta;
|
|
122
122
|
};
|
|
123
|
+
type PromptTokensDetailsWithCacheWrite = NonNullable<
|
|
124
|
+
OpenAIClient.Completions.CompletionUsage['prompt_tokens_details']
|
|
125
|
+
> & {
|
|
126
|
+
cache_write_tokens?: number;
|
|
127
|
+
};
|
|
128
|
+
type CompletionUsageWithCacheWrite = Omit<
|
|
129
|
+
OpenAIClient.Completions.CompletionUsage,
|
|
130
|
+
'prompt_tokens_details'
|
|
131
|
+
> & {
|
|
132
|
+
prompt_tokens_details?: PromptTokensDetailsWithCacheWrite;
|
|
133
|
+
};
|
|
123
134
|
type OpenAIStreamModel = ChatOpenAI | AzureChatOpenAI;
|
|
124
135
|
|
|
125
136
|
const baseAzureFields = {
|
|
@@ -654,6 +665,71 @@ describe('custom chat model class smoke tests', () => {
|
|
|
654
665
|
]);
|
|
655
666
|
});
|
|
656
667
|
|
|
668
|
+
it('maps OpenRouter cache write usage to cache_creation in streaming responses', async () => {
|
|
669
|
+
const model = new ChatOpenRouter({
|
|
670
|
+
model: 'anthropic/claude-sonnet-test',
|
|
671
|
+
apiKey: 'test-key',
|
|
672
|
+
streamUsage: true,
|
|
673
|
+
});
|
|
674
|
+
const completions = (model as unknown as StreamingCompletionBackedModel)
|
|
675
|
+
.completions;
|
|
676
|
+
const usage: CompletionUsageWithCacheWrite = {
|
|
677
|
+
prompt_tokens: 11,
|
|
678
|
+
completion_tokens: 7,
|
|
679
|
+
total_tokens: 18,
|
|
680
|
+
prompt_tokens_details: {
|
|
681
|
+
audio_tokens: 2,
|
|
682
|
+
cached_tokens: 3,
|
|
683
|
+
cache_write_tokens: 5,
|
|
684
|
+
},
|
|
685
|
+
completion_tokens_details: {
|
|
686
|
+
audio_tokens: 4,
|
|
687
|
+
reasoning_tokens: 6,
|
|
688
|
+
},
|
|
689
|
+
};
|
|
690
|
+
|
|
691
|
+
async function* streamChunks(): AsyncGenerator<OpenAIClient.Chat.Completions.ChatCompletionChunk> {
|
|
692
|
+
yield createOpenAIStreamChunk('answer', 'stop');
|
|
693
|
+
yield {
|
|
694
|
+
id: 'chatcmpl-openrouter-usage',
|
|
695
|
+
object: 'chat.completion.chunk',
|
|
696
|
+
created: 0,
|
|
697
|
+
model: 'anthropic/claude-sonnet-test',
|
|
698
|
+
choices: [],
|
|
699
|
+
usage,
|
|
700
|
+
} as OpenAIClient.Chat.Completions.ChatCompletionChunk;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
completions.completionWithRetry = async (): Promise<
|
|
704
|
+
AsyncIterable<OpenAIClient.Chat.Completions.ChatCompletionChunk>
|
|
705
|
+
> => streamChunks();
|
|
706
|
+
|
|
707
|
+
const chunks: AIMessageChunk[] = [];
|
|
708
|
+
const stream = await model.stream([new HumanMessage('hi')]);
|
|
709
|
+
for await (const chunk of stream) {
|
|
710
|
+
chunks.push(chunk);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
const usageChunk = chunks.find(
|
|
714
|
+
(chunk) =>
|
|
715
|
+
chunk.usage_metadata?.input_token_details?.cache_creation === 5
|
|
716
|
+
);
|
|
717
|
+
expect(usageChunk?.usage_metadata).toEqual({
|
|
718
|
+
input_tokens: 11,
|
|
719
|
+
output_tokens: 7,
|
|
720
|
+
total_tokens: 18,
|
|
721
|
+
input_token_details: {
|
|
722
|
+
audio: 2,
|
|
723
|
+
cache_read: 3,
|
|
724
|
+
cache_creation: 5,
|
|
725
|
+
},
|
|
726
|
+
output_token_details: {
|
|
727
|
+
audio: 4,
|
|
728
|
+
reasoning: 6,
|
|
729
|
+
},
|
|
730
|
+
});
|
|
731
|
+
});
|
|
732
|
+
|
|
657
733
|
it('keeps Anthropic output, residency, compaction, and stream-delay options', () => {
|
|
658
734
|
const contextManagement = {
|
|
659
735
|
edits: [
|
|
@@ -11,6 +11,17 @@ type DeepSeekRequest =
|
|
|
11
11
|
type OpenAIChatCompletion = OpenAIClient.Chat.Completions.ChatCompletion;
|
|
12
12
|
type OpenAIChatCompletionChunk =
|
|
13
13
|
OpenAIClient.Chat.Completions.ChatCompletionChunk;
|
|
14
|
+
type PromptTokensDetailsWithCacheWrite = NonNullable<
|
|
15
|
+
OpenAIClient.Completions.CompletionUsage['prompt_tokens_details']
|
|
16
|
+
> & {
|
|
17
|
+
cache_write_tokens?: number;
|
|
18
|
+
};
|
|
19
|
+
type CompletionUsageWithCacheWrite = Omit<
|
|
20
|
+
OpenAIClient.Completions.CompletionUsage,
|
|
21
|
+
'prompt_tokens_details'
|
|
22
|
+
> & {
|
|
23
|
+
prompt_tokens_details?: PromptTokensDetailsWithCacheWrite;
|
|
24
|
+
};
|
|
14
25
|
type ReasoningAssistantMessageParam =
|
|
15
26
|
OpenAIClient.Chat.Completions.ChatCompletionAssistantMessageParam & {
|
|
16
27
|
reasoning_content?: string;
|
|
@@ -129,7 +140,7 @@ async function* createCompletionStream(
|
|
|
129
140
|
}
|
|
130
141
|
|
|
131
142
|
function createCompletion(
|
|
132
|
-
usage:
|
|
143
|
+
usage: CompletionUsageWithCacheWrite = {
|
|
133
144
|
prompt_tokens: 1,
|
|
134
145
|
completion_tokens: 1,
|
|
135
146
|
total_tokens: 2,
|
|
@@ -392,6 +403,7 @@ describe('ChatDeepSeek', () => {
|
|
|
392
403
|
prompt_tokens_details: {
|
|
393
404
|
audio_tokens: 2,
|
|
394
405
|
cached_tokens: 3,
|
|
406
|
+
cache_write_tokens: 6,
|
|
395
407
|
},
|
|
396
408
|
completion_tokens_details: {
|
|
397
409
|
audio_tokens: 4,
|
|
@@ -409,6 +421,7 @@ describe('ChatDeepSeek', () => {
|
|
|
409
421
|
input_token_details: {
|
|
410
422
|
audio: 2,
|
|
411
423
|
cache_read: 3,
|
|
424
|
+
cache_creation: 6,
|
|
412
425
|
},
|
|
413
426
|
output_token_details: {
|
|
414
427
|
audio: 4,
|