@librechat/agents 3.1.81 → 3.1.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +125 -36
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +13 -0
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/llm/openai/index.cjs +50 -13
  6. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  7. package/dist/cjs/llm/openrouter/index.cjs +17 -7
  8. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  9. package/dist/cjs/llm/openrouter/toolCache.cjs +55 -0
  10. package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -0
  11. package/dist/cjs/main.cjs +1 -0
  12. package/dist/cjs/main.cjs.map +1 -1
  13. package/dist/cjs/messages/cache.cjs +96 -0
  14. package/dist/cjs/messages/cache.cjs.map +1 -1
  15. package/dist/cjs/tools/ToolNode.cjs +70 -12
  16. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  17. package/dist/esm/agents/AgentContext.mjs +125 -36
  18. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  19. package/dist/esm/graphs/Graph.mjs +13 -0
  20. package/dist/esm/graphs/Graph.mjs.map +1 -1
  21. package/dist/esm/llm/openai/index.mjs +50 -14
  22. package/dist/esm/llm/openai/index.mjs.map +1 -1
  23. package/dist/esm/llm/openrouter/index.mjs +17 -7
  24. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  25. package/dist/esm/llm/openrouter/toolCache.mjs +53 -0
  26. package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -0
  27. package/dist/esm/main.mjs +1 -1
  28. package/dist/esm/messages/cache.mjs +96 -1
  29. package/dist/esm/messages/cache.mjs.map +1 -1
  30. package/dist/esm/tools/ToolNode.mjs +70 -12
  31. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  32. package/dist/types/agents/AgentContext.d.ts +8 -1
  33. package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +6 -2
  34. package/dist/types/llm/openrouter/index.d.ts +1 -0
  35. package/dist/types/llm/openrouter/toolCache.d.ts +2 -0
  36. package/dist/types/messages/cache.d.ts +1 -0
  37. package/dist/types/tools/ToolNode.d.ts +5 -0
  38. package/dist/types/types/run.d.ts +2 -0
  39. package/package.json +2 -1
  40. package/src/agents/AgentContext.ts +191 -40
  41. package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +0 -4
  42. package/src/agents/__tests__/AgentContext.openrouter.live.test.ts +128 -0
  43. package/src/agents/__tests__/AgentContext.test.ts +355 -18
  44. package/src/agents/__tests__/promptCacheLiveHelpers.ts +8 -2
  45. package/src/graphs/Graph.ts +24 -0
  46. package/src/llm/custom-chat-models.smoke.test.ts +76 -0
  47. package/src/llm/openai/deepseek.test.ts +14 -1
  48. package/src/llm/openai/index.ts +38 -12
  49. package/src/llm/openrouter/index.ts +22 -7
  50. package/src/llm/openrouter/reasoning.test.ts +33 -0
  51. package/src/llm/openrouter/toolCache.test.ts +83 -0
  52. package/src/llm/openrouter/toolCache.ts +89 -0
  53. package/src/messages/cache.test.ts +127 -0
  54. package/src/messages/cache.ts +143 -0
  55. package/src/scripts/openrouter_prompt_cache_live.ts +310 -0
  56. package/src/specs/agent-handoffs.live.test.ts +140 -0
  57. package/src/specs/agent-handoffs.test.ts +266 -2
  58. package/src/specs/openrouter.simple.test.ts +15 -8
  59. package/src/tools/ToolNode.ts +92 -13
  60. package/src/types/run.ts +2 -0
@@ -1,5 +1,5 @@
1
1
  // src/agents/__tests__/AgentContext.test.ts
2
- import { HumanMessage } from '@langchain/core/messages';
2
+ import { AIMessage, HumanMessage, ToolMessage } from '@langchain/core/messages';
3
3
  import { AgentContext } from '../AgentContext';
4
4
  import { Providers } from '@/common';
5
5
  import { addBedrockCacheControl } from '@/messages/cache';
@@ -79,7 +79,7 @@ describe('AgentContext', () => {
79
79
  );
80
80
  });
81
81
 
82
- it('marks only stable system text for Anthropic prompt caching', async () => {
82
+ it('moves Anthropic dynamic instructions behind stable history', async () => {
83
83
  const ctx = createBasicContext({
84
84
  agentConfig: {
85
85
  provider: Providers.ANTHROPIC,
@@ -89,18 +89,39 @@ describe('AgentContext', () => {
89
89
  },
90
90
  });
91
91
 
92
- const result = await ctx.systemRunnable!.invoke([]);
92
+ const result = await ctx.systemRunnable!.invoke([
93
+ new HumanMessage('Hello'),
94
+ new HumanMessage('Second'),
95
+ ]);
93
96
  const content = result[0].content as TestSystemContentBlock[];
94
- expect(content).toHaveLength(2);
95
- expect(content[0]).toMatchObject({
96
- type: 'text',
97
- text: 'Stable instructions',
98
- cache_control: { type: 'ephemeral' },
99
- });
100
- expect(content[1]).toEqual({
101
- type: 'text',
102
- text: 'Dynamic instructions',
97
+ expect(content).toEqual([
98
+ {
99
+ type: 'text',
100
+ text: 'Stable instructions',
101
+ cache_control: { type: 'ephemeral' },
102
+ },
103
+ ]);
104
+ expect(result[1].content).toBe('Hello');
105
+ expect(result[2].content).toBe('Dynamic instructions');
106
+ expect(result[3].content).toBe('Second');
107
+ });
108
+
109
+ it('places Anthropic dynamic instructions before a single latest user prompt', async () => {
110
+ const ctx = createBasicContext({
111
+ agentConfig: {
112
+ provider: Providers.ANTHROPIC,
113
+ clientOptions: { model: 'claude-3-5-sonnet', promptCache: true },
114
+ instructions: 'Stable instructions',
115
+ additional_instructions: 'Dynamic instructions',
116
+ },
103
117
  });
118
+
119
+ const result = await ctx.systemRunnable!.invoke([
120
+ new HumanMessage('Latest'),
121
+ ]);
122
+
123
+ expect(result[1].content).toBe('Dynamic instructions');
124
+ expect(result[2].content).toBe('Latest');
104
125
  });
105
126
 
106
127
  it('omits Anthropic cache control when only dynamic system text exists', async () => {
@@ -119,7 +140,7 @@ describe('AgentContext', () => {
119
140
  expect(content[0]).not.toHaveProperty('cache_control');
120
141
  });
121
142
 
122
- it('keeps cross-run summaries in the dynamic Anthropic system tail', async () => {
143
+ it('keeps cross-run summaries in the dynamic Anthropic tail', async () => {
123
144
  const ctx = createBasicContext({
124
145
  agentConfig: {
125
146
  provider: Providers.ANTHROPIC,
@@ -131,12 +152,11 @@ describe('AgentContext', () => {
131
152
 
132
153
  const result = await ctx.systemRunnable!.invoke([]);
133
154
  const content = result[0].content as TestSystemContentBlock[];
134
- expect(content).toHaveLength(2);
155
+ expect(content).toHaveLength(1);
135
156
  expect(content[0]).toHaveProperty('cache_control');
136
- expect(content[1]).toEqual({
137
- type: 'text',
138
- text: '## Conversation Summary\n\nPrior summary',
139
- });
157
+ expect(result[1].content).toBe(
158
+ '## Conversation Summary\n\nPrior summary'
159
+ );
140
160
  });
141
161
 
142
162
  it('places the Bedrock cache point before dynamic system text', async () => {
@@ -198,6 +218,270 @@ describe('AgentContext', () => {
198
218
  );
199
219
  });
200
220
 
221
+ it('moves OpenRouter dynamic instructions behind stable history', async () => {
222
+ const ctx = createBasicContext({
223
+ agentConfig: {
224
+ provider: Providers.OPENROUTER,
225
+ clientOptions: {
226
+ model: 'anthropic/claude-haiku-4.5',
227
+ promptCache: true,
228
+ },
229
+ instructions: 'Stable instructions',
230
+ additional_instructions: 'Dynamic instructions',
231
+ },
232
+ });
233
+
234
+ const result = await ctx.systemRunnable!.invoke([
235
+ new HumanMessage('Hello'),
236
+ new HumanMessage('Second'),
237
+ ]);
238
+ const content = result[0].content as TestSystemContentBlock[];
239
+ expect(content).toEqual([
240
+ {
241
+ type: 'text',
242
+ text: 'Stable instructions',
243
+ cache_control: { type: 'ephemeral' },
244
+ },
245
+ ]);
246
+ expect(result[1].content).toBe('Hello');
247
+ expect(result[2].content).toBe('Dynamic instructions');
248
+ expect(result[3].content).toBe('Second');
249
+ });
250
+
251
+ it('keeps dynamic-only OpenRouter instructions as system text', async () => {
252
+ const tokenCounter = (msg: { content: unknown }): number => {
253
+ const content =
254
+ typeof msg.content === 'string'
255
+ ? msg.content
256
+ : JSON.stringify(msg.content);
257
+ return content.length;
258
+ };
259
+ const ctx = createBasicContext({
260
+ agentConfig: {
261
+ provider: Providers.OPENROUTER,
262
+ clientOptions: {
263
+ model: 'anthropic/claude-haiku-4.5',
264
+ promptCache: true,
265
+ },
266
+ instructions: undefined,
267
+ additional_instructions: 'Dynamic only',
268
+ },
269
+ tokenCounter,
270
+ });
271
+
272
+ ctx.initializeSystemRunnable();
273
+ const result = await ctx.systemRunnable!.invoke([
274
+ new HumanMessage('First'),
275
+ new HumanMessage('Second'),
276
+ ]);
277
+ const firstContent = result[1].content as TestSystemContentBlock[];
278
+ const secondContent = result[2].content as TestSystemContentBlock[];
279
+
280
+ expect(result).toHaveLength(3);
281
+ expect(result[0].content).toBe('Dynamic only');
282
+ expect(firstContent[0]).toMatchObject({
283
+ type: 'text',
284
+ text: 'First',
285
+ cache_control: { type: 'ephemeral' },
286
+ });
287
+ expect(secondContent[0]).toMatchObject({
288
+ type: 'text',
289
+ text: 'Second',
290
+ cache_control: { type: 'ephemeral' },
291
+ });
292
+ expect(ctx.systemMessageTokens).toBeGreaterThan(0);
293
+ expect(ctx.dynamicInstructionTokens).toBe(0);
294
+ expect(ctx.instructionTokens).toBe(ctx.systemMessageTokens);
295
+ });
296
+
297
+ it('does not cache OpenRouter body messages after dynamic instructions', async () => {
298
+ const ctx = createBasicContext({
299
+ agentConfig: {
300
+ provider: Providers.OPENROUTER,
301
+ clientOptions: {
302
+ model: 'google/gemini-2.5-flash',
303
+ promptCache: true,
304
+ },
305
+ instructions: 'Stable instructions',
306
+ additional_instructions: 'Dynamic instructions',
307
+ },
308
+ });
309
+
310
+ const result = await ctx.systemRunnable!.invoke([
311
+ new HumanMessage('First'),
312
+ new HumanMessage('Second'),
313
+ ]);
314
+
315
+ expect(result[1].content).toBe('First');
316
+ expect(result[2].content).toBe('Dynamic instructions');
317
+ expect(result[3].content).toBe('Second');
318
+ });
319
+
320
+ it('keeps the first OpenRouter user message before single-turn dynamic instructions', async () => {
321
+ const ctx = createBasicContext({
322
+ agentConfig: {
323
+ provider: Providers.OPENROUTER,
324
+ clientOptions: {
325
+ model: 'anthropic/claude-haiku-4.5',
326
+ promptCache: true,
327
+ },
328
+ instructions: 'Stable instructions',
329
+ additional_instructions: 'Dynamic instructions',
330
+ },
331
+ });
332
+
333
+ const result = await ctx.systemRunnable!.invoke([
334
+ new HumanMessage('Latest'),
335
+ ]);
336
+
337
+ expect(result[1].content).toBe('Latest');
338
+ expect(result[2].content).toBe('Dynamic instructions');
339
+ });
340
+
341
+ it('caches stable Anthropic history before dynamic instructions', async () => {
342
+ const ctx = createBasicContext({
343
+ agentConfig: {
344
+ provider: Providers.ANTHROPIC,
345
+ clientOptions: {
346
+ model: 'claude-3-5-sonnet',
347
+ promptCache: true,
348
+ },
349
+ instructions: 'Stable instructions',
350
+ additional_instructions: 'Dynamic instructions',
351
+ },
352
+ });
353
+
354
+ const result = await ctx.systemRunnable!.invoke([
355
+ new HumanMessage('First'),
356
+ new AIMessage('Stable assistant history'),
357
+ new HumanMessage('Latest'),
358
+ ]);
359
+ const stableHistory = result[2].content as TestSystemContentBlock[];
360
+
361
+ expect(result[1].content).toBe('First');
362
+ expect(stableHistory[0]).toMatchObject({
363
+ type: 'text',
364
+ text: 'Stable assistant history',
365
+ cache_control: { type: 'ephemeral' },
366
+ });
367
+ expect(result[3].content).toBe('Dynamic instructions');
368
+ expect(result[4].content).toBe('Latest');
369
+ });
370
+
371
+ it('does not place Anthropic dynamic instructions between tool calls and results', async () => {
372
+ const ctx = createBasicContext({
373
+ agentConfig: {
374
+ provider: Providers.ANTHROPIC,
375
+ clientOptions: {
376
+ model: 'claude-3-5-sonnet',
377
+ promptCache: true,
378
+ },
379
+ instructions: 'Stable instructions',
380
+ additional_instructions: 'Dynamic instructions',
381
+ },
382
+ });
383
+
384
+ const result = await ctx.systemRunnable!.invoke([
385
+ new HumanMessage('Use the tool'),
386
+ new AIMessage({
387
+ content: '',
388
+ tool_calls: [
389
+ {
390
+ id: 'call_1',
391
+ name: 'calculator',
392
+ args: { expression: '2+2' },
393
+ type: 'tool_call',
394
+ },
395
+ ],
396
+ }),
397
+ new ToolMessage({
398
+ content: '4',
399
+ name: 'calculator',
400
+ tool_call_id: 'call_1',
401
+ }),
402
+ ]);
403
+
404
+ expect(result[1].content).toBe('Use the tool');
405
+ expect((result[2] as AIMessage).tool_calls?.[0]?.id).toBe('call_1');
406
+ expect(result[3].getType()).toBe('tool');
407
+ expect(result[4].content).toBe('Dynamic instructions');
408
+ });
409
+
410
+ it('caches stable OpenRouter history before dynamic instructions', async () => {
411
+ const ctx = createBasicContext({
412
+ agentConfig: {
413
+ provider: Providers.OPENROUTER,
414
+ clientOptions: {
415
+ model: 'anthropic/claude-haiku-4.5',
416
+ promptCache: true,
417
+ },
418
+ instructions: 'Stable instructions',
419
+ additional_instructions: 'Dynamic instructions',
420
+ },
421
+ });
422
+
423
+ const result = await ctx.systemRunnable!.invoke([
424
+ new HumanMessage('First'),
425
+ new AIMessage('Stable assistant history'),
426
+ new HumanMessage('Latest'),
427
+ ]);
428
+ const stableHistory = result[2].content as TestSystemContentBlock[];
429
+
430
+ expect(result[1].content).toBe('First');
431
+ expect(stableHistory[0]).toMatchObject({
432
+ type: 'text',
433
+ text: 'Stable assistant history',
434
+ cache_control: { type: 'ephemeral' },
435
+ });
436
+ expect(result[3].content).toBe('Dynamic instructions');
437
+ expect(result[4].content).toBe('Latest');
438
+ });
439
+
440
+ it('adds OpenRouter body cache points when there is no dynamic tail', async () => {
441
+ const ctx = createBasicContext({
442
+ agentConfig: {
443
+ provider: Providers.OPENROUTER,
444
+ clientOptions: {
445
+ model: 'google/gemini-3.1-pro-preview',
446
+ promptCache: true,
447
+ },
448
+ instructions: 'Stable instructions',
449
+ },
450
+ });
451
+
452
+ const result = await ctx.systemRunnable!.invoke([
453
+ new HumanMessage('First'),
454
+ new HumanMessage('Second'),
455
+ ]);
456
+ const firstContent = result[1].content as TestSystemContentBlock[];
457
+ const secondContent = result[2].content as TestSystemContentBlock[];
458
+ expect(firstContent[0]).toHaveProperty('cache_control');
459
+ expect(secondContent[0]).toHaveProperty('cache_control');
460
+ });
461
+
462
+ it('places OpenRouter user-message summaries after the first stable message', async () => {
463
+ const ctx = createBasicContext({
464
+ agentConfig: {
465
+ provider: Providers.OPENROUTER,
466
+ clientOptions: {
467
+ model: 'google/gemini-3.1-pro-preview',
468
+ promptCache: true,
469
+ },
470
+ instructions: 'Stable instructions',
471
+ },
472
+ });
473
+ ctx.setSummary('Rotating summary', 7);
474
+
475
+ const result = await ctx.systemRunnable!.invoke([
476
+ new HumanMessage('First'),
477
+ new HumanMessage('Second'),
478
+ ]);
479
+
480
+ expect(result[1].content).toBe('First');
481
+ expect(result[2].content).toContain('Rotating summary');
482
+ expect(result[3].content).toBe('Second');
483
+ });
484
+
201
485
  it('preserves the Bedrock system cache point through message cache-control pass', async () => {
202
486
  const ctx = createBasicContext({
203
487
  agentConfig: {
@@ -557,6 +841,59 @@ describe('AgentContext', () => {
557
841
  expect(ctxWithDeferred.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
558
842
  });
559
843
 
844
+ it('counts OpenRouter dynamic instructions outside the system message', () => {
845
+ const ctx = createBasicContext({
846
+ agentConfig: {
847
+ provider: Providers.OPENROUTER,
848
+ clientOptions: {
849
+ model: 'google/gemini-3.1-pro-preview',
850
+ promptCache: true,
851
+ },
852
+ instructions: 'Stable',
853
+ additional_instructions: 'Dynamic tail',
854
+ },
855
+ tokenCounter: mockTokenCounter,
856
+ });
857
+
858
+ ctx.initializeSystemRunnable();
859
+
860
+ expect(ctx.systemMessageTokens).toBeGreaterThan(0);
861
+ expect(ctx.dynamicInstructionTokens).toBeGreaterThan(0);
862
+ expect(ctx.instructionTokens).toBe(
863
+ ctx.systemMessageTokens + ctx.dynamicInstructionTokens
864
+ );
865
+ expect(ctx.getTokenBudgetBreakdown().dynamicInstructionTokens).toBe(
866
+ ctx.dynamicInstructionTokens
867
+ );
868
+ });
869
+
870
+ it('clears OpenRouter dynamic instruction tokens when no prompt remains', () => {
871
+ const ctx = createBasicContext({
872
+ agentConfig: {
873
+ provider: Providers.OPENROUTER,
874
+ clientOptions: {
875
+ model: 'google/gemini-3.1-pro-preview',
876
+ promptCache: true,
877
+ },
878
+ instructions: 'Stable instructions',
879
+ },
880
+ tokenCounter: mockTokenCounter,
881
+ });
882
+
883
+ ctx.setInitialSummary('Volatile summary', 8);
884
+ ctx.initializeSystemRunnable();
885
+ expect(ctx.dynamicInstructionTokens).toBeGreaterThan(0);
886
+
887
+ ctx.instructions = undefined;
888
+ ctx.clearSummary();
889
+ ctx.initializeSystemRunnable();
890
+
891
+ expect(ctx.systemRunnable).toBeUndefined();
892
+ expect(ctx.systemMessageTokens).toBe(0);
893
+ expect(ctx.dynamicInstructionTokens).toBe(0);
894
+ expect(ctx.instructionTokens).toBe(0);
895
+ });
896
+
560
897
  it('excludes programmatic-only toolDefinitions from toolSchemaTokens', async () => {
561
898
  // getEventDrivenToolsForBinding excludes definitions whose
562
899
  // allowed_callers omit 'direct'. Accounting must mirror that — a
@@ -1,13 +1,18 @@
1
1
  import { expect } from '@jest/globals';
2
2
  import { HumanMessage } from '@langchain/core/messages';
3
3
  import type { UsageMetadata } from '@langchain/core/messages';
4
+ import type { ClientOptions } from '@langchain/openai';
4
5
  import type * as t from '@/types';
5
6
  import { GraphEvents, Providers } from '@/common';
6
7
  import { AgentContext } from '../AgentContext';
7
8
  import { ModelEndHandler } from '@/events';
8
9
  import { Run } from '@/run';
10
+ import type { ChatOpenRouterInput } from '@/llm/openrouter';
9
11
 
10
- type LivePromptCacheProvider = Providers.ANTHROPIC | Providers.BEDROCK;
12
+ type LivePromptCacheProvider =
13
+ | Providers.ANTHROPIC
14
+ | Providers.BEDROCK
15
+ | Providers.OPENROUTER;
11
16
 
12
17
  type PromptCacheExpectedSystemBlock =
13
18
  | { type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }
@@ -15,7 +20,8 @@ type PromptCacheExpectedSystemBlock =
15
20
 
16
21
  type LivePromptCacheClientOptions =
17
22
  | t.ClientOptions
18
- | t.BedrockAnthropicClientOptions;
23
+ | t.BedrockAnthropicClientOptions
24
+ | (ChatOpenRouterInput & { configuration?: ClientOptions });
19
25
 
20
26
  export function buildStableInstructions({
21
27
  nonce,
@@ -62,6 +62,7 @@ import { isThinkingEnabled } from '@/llm/request';
62
62
  import { initializeModel } from '@/llm/init';
63
63
  import { HandlerRegistry } from '@/events';
64
64
  import { ChatOpenAI } from '@/llm/openai';
65
+ import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
65
66
  import type { HookRegistry } from '@/hooks';
66
67
 
67
68
  const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
@@ -817,6 +818,19 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
817
818
  rawToolsForBinding,
818
819
  makeIsDeferred(agentContext.toolDefinitions)
819
820
  ) ?? rawToolsForBinding;
821
+ } else if (
822
+ agentContext.provider === Providers.OPENROUTER &&
823
+ (
824
+ agentContext.clientOptions as
825
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
826
+ | undefined
827
+ )?.promptCache === true
828
+ ) {
829
+ toolsForBinding =
830
+ partitionAndMarkOpenRouterToolCache(
831
+ rawToolsForBinding,
832
+ makeIsDeferred(agentContext.toolDefinitions)
833
+ ) ?? rawToolsForBinding;
820
834
  }
821
835
 
822
836
  let model =
@@ -1073,6 +1087,16 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1073
1087
  if (bedrockOptions?.promptCache === true) {
1074
1088
  finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
1075
1089
  }
1090
+ } else if (agentContext.provider === Providers.OPENROUTER) {
1091
+ const openRouterOptions = agentContext.clientOptions as
1092
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
1093
+ | undefined;
1094
+ if (
1095
+ openRouterOptions?.promptCache === true &&
1096
+ !agentContext.systemRunnable
1097
+ ) {
1098
+ finalMessages = addCacheControl<BaseMessage>(finalMessages);
1099
+ }
1076
1100
  }
1077
1101
 
1078
1102
  if (
@@ -120,6 +120,17 @@ type OpenRouterReasoningStreamChoice = Omit<
120
120
  > & {
121
121
  delta: OpenRouterReasoningStreamDelta;
122
122
  };
123
+ type PromptTokensDetailsWithCacheWrite = NonNullable<
124
+ OpenAIClient.Completions.CompletionUsage['prompt_tokens_details']
125
+ > & {
126
+ cache_write_tokens?: number;
127
+ };
128
+ type CompletionUsageWithCacheWrite = Omit<
129
+ OpenAIClient.Completions.CompletionUsage,
130
+ 'prompt_tokens_details'
131
+ > & {
132
+ prompt_tokens_details?: PromptTokensDetailsWithCacheWrite;
133
+ };
123
134
  type OpenAIStreamModel = ChatOpenAI | AzureChatOpenAI;
124
135
 
125
136
  const baseAzureFields = {
@@ -654,6 +665,71 @@ describe('custom chat model class smoke tests', () => {
654
665
  ]);
655
666
  });
656
667
 
668
+ it('maps OpenRouter cache write usage to cache_creation in streaming responses', async () => {
669
+ const model = new ChatOpenRouter({
670
+ model: 'anthropic/claude-sonnet-test',
671
+ apiKey: 'test-key',
672
+ streamUsage: true,
673
+ });
674
+ const completions = (model as unknown as StreamingCompletionBackedModel)
675
+ .completions;
676
+ const usage: CompletionUsageWithCacheWrite = {
677
+ prompt_tokens: 11,
678
+ completion_tokens: 7,
679
+ total_tokens: 18,
680
+ prompt_tokens_details: {
681
+ audio_tokens: 2,
682
+ cached_tokens: 3,
683
+ cache_write_tokens: 5,
684
+ },
685
+ completion_tokens_details: {
686
+ audio_tokens: 4,
687
+ reasoning_tokens: 6,
688
+ },
689
+ };
690
+
691
+ async function* streamChunks(): AsyncGenerator<OpenAIClient.Chat.Completions.ChatCompletionChunk> {
692
+ yield createOpenAIStreamChunk('answer', 'stop');
693
+ yield {
694
+ id: 'chatcmpl-openrouter-usage',
695
+ object: 'chat.completion.chunk',
696
+ created: 0,
697
+ model: 'anthropic/claude-sonnet-test',
698
+ choices: [],
699
+ usage,
700
+ } as OpenAIClient.Chat.Completions.ChatCompletionChunk;
701
+ }
702
+
703
+ completions.completionWithRetry = async (): Promise<
704
+ AsyncIterable<OpenAIClient.Chat.Completions.ChatCompletionChunk>
705
+ > => streamChunks();
706
+
707
+ const chunks: AIMessageChunk[] = [];
708
+ const stream = await model.stream([new HumanMessage('hi')]);
709
+ for await (const chunk of stream) {
710
+ chunks.push(chunk);
711
+ }
712
+
713
+ const usageChunk = chunks.find(
714
+ (chunk) =>
715
+ chunk.usage_metadata?.input_token_details?.cache_creation === 5
716
+ );
717
+ expect(usageChunk?.usage_metadata).toEqual({
718
+ input_tokens: 11,
719
+ output_tokens: 7,
720
+ total_tokens: 18,
721
+ input_token_details: {
722
+ audio: 2,
723
+ cache_read: 3,
724
+ cache_creation: 5,
725
+ },
726
+ output_token_details: {
727
+ audio: 4,
728
+ reasoning: 6,
729
+ },
730
+ });
731
+ });
732
+
657
733
  it('keeps Anthropic output, residency, compaction, and stream-delay options', () => {
658
734
  const contextManagement = {
659
735
  edits: [
@@ -11,6 +11,17 @@ type DeepSeekRequest =
11
11
  type OpenAIChatCompletion = OpenAIClient.Chat.Completions.ChatCompletion;
12
12
  type OpenAIChatCompletionChunk =
13
13
  OpenAIClient.Chat.Completions.ChatCompletionChunk;
14
+ type PromptTokensDetailsWithCacheWrite = NonNullable<
15
+ OpenAIClient.Completions.CompletionUsage['prompt_tokens_details']
16
+ > & {
17
+ cache_write_tokens?: number;
18
+ };
19
+ type CompletionUsageWithCacheWrite = Omit<
20
+ OpenAIClient.Completions.CompletionUsage,
21
+ 'prompt_tokens_details'
22
+ > & {
23
+ prompt_tokens_details?: PromptTokensDetailsWithCacheWrite;
24
+ };
14
25
  type ReasoningAssistantMessageParam =
15
26
  OpenAIClient.Chat.Completions.ChatCompletionAssistantMessageParam & {
16
27
  reasoning_content?: string;
@@ -129,7 +140,7 @@ async function* createCompletionStream(
129
140
  }
130
141
 
131
142
  function createCompletion(
132
- usage: OpenAIClient.Completions.CompletionUsage = {
143
+ usage: CompletionUsageWithCacheWrite = {
133
144
  prompt_tokens: 1,
134
145
  completion_tokens: 1,
135
146
  total_tokens: 2,
@@ -392,6 +403,7 @@ describe('ChatDeepSeek', () => {
392
403
  prompt_tokens_details: {
393
404
  audio_tokens: 2,
394
405
  cached_tokens: 3,
406
+ cache_write_tokens: 6,
395
407
  },
396
408
  completion_tokens_details: {
397
409
  audio_tokens: 4,
@@ -409,6 +421,7 @@ describe('ChatDeepSeek', () => {
409
421
  input_token_details: {
410
422
  audio: 2,
411
423
  cache_read: 3,
424
+ cache_creation: 6,
412
425
  },
413
426
  output_token_details: {
414
427
  audio: 4,