@librechat/agents 3.1.81 → 3.1.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +102 -35
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +13 -0
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/llm/openai/index.cjs +50 -13
  6. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  7. package/dist/cjs/llm/openrouter/index.cjs +17 -7
  8. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  9. package/dist/cjs/llm/openrouter/toolCache.cjs +55 -0
  10. package/dist/cjs/llm/openrouter/toolCache.cjs.map +1 -0
  11. package/dist/cjs/tools/ToolNode.cjs +70 -12
  12. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  13. package/dist/esm/agents/AgentContext.mjs +101 -34
  14. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  15. package/dist/esm/graphs/Graph.mjs +13 -0
  16. package/dist/esm/graphs/Graph.mjs.map +1 -1
  17. package/dist/esm/llm/openai/index.mjs +50 -14
  18. package/dist/esm/llm/openai/index.mjs.map +1 -1
  19. package/dist/esm/llm/openrouter/index.mjs +17 -7
  20. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  21. package/dist/esm/llm/openrouter/toolCache.mjs +53 -0
  22. package/dist/esm/llm/openrouter/toolCache.mjs.map +1 -0
  23. package/dist/esm/tools/ToolNode.mjs +70 -12
  24. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  25. package/dist/types/agents/AgentContext.d.ts +6 -1
  26. package/dist/types/llm/openrouter/index.d.ts +1 -0
  27. package/dist/types/llm/openrouter/toolCache.d.ts +2 -0
  28. package/dist/types/tools/ToolNode.d.ts +5 -0
  29. package/dist/types/types/run.d.ts +2 -0
  30. package/package.json +2 -1
  31. package/src/agents/AgentContext.ts +146 -38
  32. package/src/agents/__tests__/AgentContext.test.ts +198 -0
  33. package/src/graphs/Graph.ts +24 -0
  34. package/src/llm/custom-chat-models.smoke.test.ts +76 -0
  35. package/src/llm/openai/deepseek.test.ts +14 -1
  36. package/src/llm/openai/index.ts +38 -12
  37. package/src/llm/openrouter/index.ts +22 -7
  38. package/src/llm/openrouter/reasoning.test.ts +33 -0
  39. package/src/llm/openrouter/toolCache.test.ts +83 -0
  40. package/src/llm/openrouter/toolCache.ts +89 -0
  41. package/src/messages/cache.test.ts +127 -0
  42. package/src/scripts/openrouter_prompt_cache_live.ts +310 -0
  43. package/src/specs/agent-handoffs.live.test.ts +140 -0
  44. package/src/specs/agent-handoffs.test.ts +266 -2
  45. package/src/specs/openrouter.simple.test.ts +15 -8
  46. package/src/tools/ToolNode.ts +92 -13
  47. package/src/types/run.ts +2 -0
@@ -54,6 +54,8 @@ export declare class AgentContext {
54
54
  tokenCounter?: t.TokenCounter;
55
55
  /** Token count for the system message (instructions text). */
56
56
  systemMessageTokens: number;
57
+ /** Token count for instruction text emitted outside the system message. */
58
+ dynamicInstructionTokens: number;
57
59
  /** Token count for tool schemas only. */
58
60
  toolSchemaTokens: number;
59
61
  /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
@@ -224,7 +226,10 @@ export declare class AgentContext {
224
226
  * Only called when content has actually changed.
225
227
  */
226
228
  private buildSystemRunnable;
227
- private hasAnthropicPromptCache;
229
+ private buildSummaryHumanMessage;
230
+ private buildOpenRouterDynamicTail;
231
+ private insertAfterFirstMessage;
232
+ private getPromptCacheProvider;
228
233
  private hasBedrockPromptCache;
229
234
  private buildSystemMessage;
230
235
  /**
@@ -15,6 +15,7 @@ export interface ChatOpenRouterCallOptions extends Omit<ChatOpenAICallOptions, '
15
15
  include_reasoning?: boolean;
16
16
  reasoning?: OpenRouterReasoning;
17
17
  modelKwargs?: OpenAIChatInput['modelKwargs'];
18
+ promptCache?: boolean;
18
19
  }
19
20
  export type ChatOpenRouterInput = Partial<ChatOpenRouterCallOptions & OpenAIChatInput>;
20
21
  /** invocationParams return type extended with OpenRouter reasoning */
@@ -0,0 +1,2 @@
1
+ import type { GraphTools } from '@/types';
2
+ export declare function partitionAndMarkOpenRouterToolCache(tools: GraphTools | undefined, isDeferred: (toolName: string) => boolean): GraphTools | undefined;
@@ -152,6 +152,11 @@ export declare class ToolNode<T = any> extends RunnableCallable<T, T> {
152
152
  * `createLocalCodingToolBundle()` use.
153
153
  */
154
154
  getFileCheckpointer(): t.LocalFileCheckpointer | undefined;
155
+ private getRegisteredHandoffNames;
156
+ private hasRegisteredHandoffTool;
157
+ private getHandoffToolNameSuggestion;
158
+ private shouldHandleUnknownHandoffLocally;
159
+ private getUnknownToolErrorMessage;
155
160
  /**
156
161
  * Flush the per-Run direct-path turn cache. Called by the Graph at
157
162
  * end-of-Run via `clearHeavyState`. The map intentionally survives
@@ -195,6 +195,8 @@ export type TokenBudgetBreakdown = {
195
195
  instructionTokens: number;
196
196
  /** Tokens from the system message text alone. */
197
197
  systemMessageTokens: number;
198
+ /** Tokens from instruction text emitted outside the system message. */
199
+ dynamicInstructionTokens: number;
198
200
  /** Tokens from tool schema definitions. */
199
201
  toolSchemaTokens: number;
200
202
  /** Tokens from the conversation summary. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@librechat/agents",
3
- "version": "3.1.81",
3
+ "version": "3.1.82",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -171,6 +171,7 @@
171
171
  "start:dev": "node --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/main.ts",
172
172
  "supervised": "node -r dotenv/config --loader ./tsconfig-paths-bootstrap.mjs --experimental-specifier-resolution=node ./src/scripts/supervised.ts --provider anthropic --name Jo --location \"New York, NY\"",
173
173
  "test": "NODE_OPTIONS='--experimental-vm-modules' jest",
174
+ "test:live:handoffs": "RUN_HANDOFF_LIVE_TESTS=1 NODE_OPTIONS='--experimental-vm-modules' jest src/specs/agent-handoffs.live.test.ts --runInBand",
174
175
  "test:memory": "NODE_OPTIONS='--expose-gc' npx jest src/specs/title.memory-leak.test.ts",
175
176
  "test:all": "npm test -- --testPathIgnorePatterns=title.memory-leak.test.ts && npm run test:memory",
176
177
  "reinstall": "npm run clean && npm ci && rm -rf ./dist && npm run build",
@@ -30,6 +30,8 @@ type AgentSystemContentBlock =
30
30
  | AgentSystemTextBlock
31
31
  | { cachePoint: { type: 'default' } };
32
32
 
33
+ type PromptCacheProvider = Providers.ANTHROPIC | Providers.OPENROUTER;
34
+
33
35
  /**
34
36
  * Encapsulates agent-specific state that can vary between agents in a multi-agent system
35
37
  */
@@ -177,6 +179,8 @@ export class AgentContext {
177
179
  tokenCounter?: t.TokenCounter;
178
180
  /** Token count for the system message (instructions text). */
179
181
  systemMessageTokens: number = 0;
182
+ /** Token count for instruction text emitted outside the system message. */
183
+ dynamicInstructionTokens: number = 0;
180
184
  /** Token count for tool schemas only. */
181
185
  toolSchemaTokens: number = 0;
182
186
  /** Running calibration ratio from the pruner — persisted across runs via contextMeta. */
@@ -190,7 +194,12 @@ export class AgentContext {
190
194
  get instructionTokens(): number {
191
195
  const summaryOverhead =
192
196
  this._summaryLocation === 'user_message' ? this.summaryTokenCount : 0;
193
- return this.systemMessageTokens + this.toolSchemaTokens + summaryOverhead;
197
+ return (
198
+ this.systemMessageTokens +
199
+ this.dynamicInstructionTokens +
200
+ this.toolSchemaTokens +
201
+ summaryOverhead
202
+ );
194
203
  }
195
204
  /** The amount of time that should pass before another consecutive API call */
196
205
  streamBuffer?: number;
@@ -570,20 +579,29 @@ export class AgentContext {
570
579
 
571
580
  if (!stableInstructions && !dynamicInstructions && !hasMidRunSummary) {
572
581
  this.systemMessageTokens = 0;
582
+ this.dynamicInstructionTokens = 0;
573
583
  return undefined;
574
584
  }
575
585
 
576
- const usePromptCache = this.hasAnthropicPromptCache();
586
+ const promptCacheProvider = this.getPromptCacheProvider();
587
+ const shouldMoveOpenRouterDynamicInstructions =
588
+ promptCacheProvider === Providers.OPENROUTER &&
589
+ stableInstructions !== '' &&
590
+ dynamicInstructions !== '';
577
591
  const systemMessage = this.buildSystemMessage({
578
592
  stableInstructions,
579
593
  dynamicInstructions,
580
- usePromptCache,
594
+ promptCacheProvider,
581
595
  });
582
596
 
583
597
  if (this.tokenCounter) {
584
598
  this.systemMessageTokens = systemMessage
585
599
  ? this.tokenCounter(systemMessage)
586
600
  : 0;
601
+ this.dynamicInstructionTokens =
602
+ shouldMoveOpenRouterDynamicInstructions
603
+ ? this.tokenCounter(new HumanMessage(dynamicInstructions))
604
+ : 0;
587
605
  }
588
606
 
589
607
  return RunnableLambda.from((messages: BaseMessage[]) => {
@@ -597,45 +615,114 @@ export class AgentContext {
597
615
  this.summaryText != null &&
598
616
  this.summaryText !== '';
599
617
 
600
- let body: BaseMessage[];
601
- if (hasSummaryBody) {
602
- const wrappedSummary =
603
- '<summary>\n' +
604
- (this.summaryText as string) +
605
- '\n</summary>\n\n' +
606
- 'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
607
-
608
- const summaryMsg = usePromptCache
609
- ? new HumanMessage({
610
- content: [
611
- {
612
- type: 'text',
613
- text: wrappedSummary,
614
- cache_control: { type: 'ephemeral' },
615
- },
616
- ],
617
- })
618
- : new HumanMessage(wrappedSummary);
619
- body = [summaryMsg, ...messages];
620
- } else {
621
- body = messages;
622
- }
618
+ const bodyWithSummary =
619
+ hasSummaryBody && promptCacheProvider !== Providers.OPENROUTER
620
+ ? [this.buildSummaryHumanMessage(promptCacheProvider), ...messages]
621
+ : messages;
622
+ const dynamicTail = this.buildOpenRouterDynamicTail({
623
+ dynamicInstructions,
624
+ hasSummaryBody,
625
+ promptCacheProvider,
626
+ shouldMoveOpenRouterDynamicInstructions,
627
+ });
628
+ let body = this.insertAfterFirstMessage(bodyWithSummary, dynamicTail);
623
629
 
624
- if (usePromptCache && body.length >= 2) {
630
+ if (
631
+ promptCacheProvider != null &&
632
+ dynamicTail.length === 0 &&
633
+ body.length >= 2
634
+ ) {
625
635
  body = addCacheControl(body);
626
636
  }
627
637
  return [...prefix, ...body];
628
638
  }).withConfig({ runName: 'prompt' });
629
639
  }
630
640
 
631
- private hasAnthropicPromptCache(): boolean {
632
- if (this.provider !== Providers.ANTHROPIC) {
633
- return false;
641
+ private buildSummaryHumanMessage(
642
+ promptCacheProvider: PromptCacheProvider | undefined
643
+ ): HumanMessage {
644
+ const wrappedSummary =
645
+ '<summary>\n' +
646
+ (this.summaryText as string) +
647
+ '\n</summary>\n\n' +
648
+ 'This is your own checkpoint: you wrote it to preserve context after compaction. Pick up where you left off based on the summary above. Do not repeat prior tasks, information or acknowledge this checkpoint message directly.';
649
+
650
+ if (promptCacheProvider !== Providers.ANTHROPIC) {
651
+ return new HumanMessage(wrappedSummary);
634
652
  }
635
- const anthropicOptions = this.clientOptions as
636
- | t.AnthropicClientOptions
637
- | undefined;
638
- return anthropicOptions?.promptCache === true;
653
+
654
+ return new HumanMessage({
655
+ content: [
656
+ {
657
+ type: 'text',
658
+ text: wrappedSummary,
659
+ cache_control: { type: 'ephemeral' },
660
+ },
661
+ ],
662
+ });
663
+ }
664
+
665
+ private buildOpenRouterDynamicTail({
666
+ dynamicInstructions,
667
+ hasSummaryBody,
668
+ promptCacheProvider,
669
+ shouldMoveOpenRouterDynamicInstructions,
670
+ }: {
671
+ dynamicInstructions: string;
672
+ hasSummaryBody: boolean;
673
+ promptCacheProvider: PromptCacheProvider | undefined;
674
+ shouldMoveOpenRouterDynamicInstructions: boolean;
675
+ }): BaseMessage[] {
676
+ if (promptCacheProvider !== Providers.OPENROUTER) {
677
+ return [];
678
+ }
679
+
680
+ const dynamicTail = shouldMoveOpenRouterDynamicInstructions
681
+ ? [new HumanMessage(dynamicInstructions)]
682
+ : [];
683
+
684
+ if (!hasSummaryBody) {
685
+ return dynamicTail;
686
+ }
687
+
688
+ return [...dynamicTail, this.buildSummaryHumanMessage(promptCacheProvider)];
689
+ }
690
+
691
+ private insertAfterFirstMessage(
692
+ messages: BaseMessage[],
693
+ tail: BaseMessage[]
694
+ ): BaseMessage[] {
695
+ if (tail.length === 0) {
696
+ return messages;
697
+ }
698
+
699
+ if (messages.length === 0) {
700
+ return tail;
701
+ }
702
+
703
+ return [messages[0], ...tail, ...messages.slice(1)];
704
+ }
705
+
706
+ private getPromptCacheProvider(): PromptCacheProvider | undefined {
707
+ if (this.provider === Providers.ANTHROPIC) {
708
+ const anthropicOptions = this.clientOptions as
709
+ | t.AnthropicClientOptions
710
+ | undefined;
711
+ return anthropicOptions?.promptCache === true
712
+ ? Providers.ANTHROPIC
713
+ : undefined;
714
+ }
715
+
716
+ if (this.provider === Providers.OPENROUTER) {
717
+ const openRouterOptions = this.clientOptions as
718
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
719
+ | undefined;
720
+ return openRouterOptions?.promptCache === true
721
+ ? Providers.OPENROUTER
722
+ : undefined;
723
+ }
724
+
725
+ return undefined;
639
726
  }
640
727
 
641
728
  private hasBedrockPromptCache(): boolean {
@@ -651,17 +738,17 @@ export class AgentContext {
651
738
  private buildSystemMessage({
652
739
  stableInstructions,
653
740
  dynamicInstructions,
654
- usePromptCache,
741
+ promptCacheProvider,
655
742
  }: {
656
743
  stableInstructions: string;
657
744
  dynamicInstructions: string;
658
- usePromptCache: boolean;
745
+ promptCacheProvider: PromptCacheProvider | undefined;
659
746
  }): SystemMessage | undefined {
660
747
  if (!stableInstructions && !dynamicInstructions) {
661
748
  return undefined;
662
749
  }
663
750
 
664
- if (usePromptCache) {
751
+ if (promptCacheProvider === Providers.ANTHROPIC) {
665
752
  const content: AgentSystemContentBlock[] = [];
666
753
  if (stableInstructions) {
667
754
  content.push({
@@ -676,6 +763,25 @@ export class AgentContext {
676
763
  return new SystemMessage({ content } as BaseMessageFields);
677
764
  }
678
765
 
766
+ if (
767
+ promptCacheProvider === Providers.OPENROUTER &&
768
+ !stableInstructions
769
+ ) {
770
+ return new SystemMessage(dynamicInstructions);
771
+ }
772
+
773
+ if (promptCacheProvider === Providers.OPENROUTER) {
774
+ return new SystemMessage({
775
+ content: [
776
+ {
777
+ type: 'text',
778
+ text: stableInstructions,
779
+ cache_control: { type: 'ephemeral' },
780
+ },
781
+ ],
782
+ } as BaseMessageFields);
783
+ }
784
+
679
785
  if (this.hasBedrockPromptCache() && stableInstructions) {
680
786
  const content: AgentSystemContentBlock[] = [
681
787
  { type: 'text', text: stableInstructions },
@@ -699,6 +805,7 @@ export class AgentContext {
699
805
  */
700
806
  reset(): void {
701
807
  this.systemMessageTokens = 0;
808
+ this.dynamicInstructionTokens = 0;
702
809
  this.toolSchemaTokens = 0;
703
810
  this.cachedSystemRunnable = undefined;
704
811
  this.systemRunnableStale = true;
@@ -1054,6 +1161,7 @@ export class AgentContext {
1054
1161
  maxContextTokens,
1055
1162
  instructionTokens: this.instructionTokens,
1056
1163
  systemMessageTokens: this.systemMessageTokens,
1164
+ dynamicInstructionTokens: this.dynamicInstructionTokens,
1057
1165
  toolSchemaTokens: this.toolSchemaTokens,
1058
1166
  summaryTokens: this.summaryTokenCount,
1059
1167
  toolCount,
@@ -1072,7 +1180,7 @@ export class AgentContext {
1072
1180
  const lines = [
1073
1181
  'Token budget breakdown:',
1074
1182
  ` maxContextTokens: ${b.maxContextTokens}`,
1075
- ` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
1183
+ ` instructionTokens: ${b.instructionTokens} (system: ${b.systemMessageTokens}, dynamic: ${b.dynamicInstructionTokens}, tools: ${b.toolSchemaTokens} [${b.toolCount} tools])`,
1076
1184
  ` summaryTokens: ${b.summaryTokens}`,
1077
1185
  ` messageTokens: ${b.messageTokens} (${b.messageCount} messages)`,
1078
1186
  ` availableForMessages: ${b.availableForMessages}`,
@@ -198,6 +198,151 @@ describe('AgentContext', () => {
198
198
  );
199
199
  });
200
200
 
201
+ it('marks stable OpenRouter system text and keeps first user message stable', async () => {
202
+ const ctx = createBasicContext({
203
+ agentConfig: {
204
+ provider: Providers.OPENROUTER,
205
+ clientOptions: {
206
+ model: 'anthropic/claude-haiku-4.5',
207
+ promptCache: true,
208
+ },
209
+ instructions: 'Stable instructions',
210
+ additional_instructions: 'Dynamic instructions',
211
+ },
212
+ });
213
+
214
+ const result = await ctx.systemRunnable!.invoke([
215
+ new HumanMessage('Hello'),
216
+ new HumanMessage('Second'),
217
+ ]);
218
+ const content = result[0].content as TestSystemContentBlock[];
219
+ expect(content).toEqual([
220
+ {
221
+ type: 'text',
222
+ text: 'Stable instructions',
223
+ cache_control: { type: 'ephemeral' },
224
+ },
225
+ ]);
226
+ expect(result[1]).toBeInstanceOf(HumanMessage);
227
+ expect(result[1].content).toBe('Hello');
228
+ expect(result[2].content).toBe('Dynamic instructions');
229
+ expect(result[3].content).toBe('Second');
230
+ });
231
+
232
+ it('keeps dynamic-only OpenRouter instructions as system text', async () => {
233
+ const tokenCounter = (msg: { content: unknown }): number => {
234
+ const content =
235
+ typeof msg.content === 'string'
236
+ ? msg.content
237
+ : JSON.stringify(msg.content);
238
+ return content.length;
239
+ };
240
+ const ctx = createBasicContext({
241
+ agentConfig: {
242
+ provider: Providers.OPENROUTER,
243
+ clientOptions: {
244
+ model: 'anthropic/claude-haiku-4.5',
245
+ promptCache: true,
246
+ },
247
+ instructions: undefined,
248
+ additional_instructions: 'Dynamic only',
249
+ },
250
+ tokenCounter,
251
+ });
252
+
253
+ ctx.initializeSystemRunnable();
254
+ const result = await ctx.systemRunnable!.invoke([
255
+ new HumanMessage('First'),
256
+ new HumanMessage('Second'),
257
+ ]);
258
+ const firstContent = result[1].content as TestSystemContentBlock[];
259
+ const secondContent = result[2].content as TestSystemContentBlock[];
260
+
261
+ expect(result).toHaveLength(3);
262
+ expect(result[0].content).toBe('Dynamic only');
263
+ expect(firstContent[0]).toMatchObject({
264
+ type: 'text',
265
+ text: 'First',
266
+ cache_control: { type: 'ephemeral' },
267
+ });
268
+ expect(secondContent[0]).toMatchObject({
269
+ type: 'text',
270
+ text: 'Second',
271
+ cache_control: { type: 'ephemeral' },
272
+ });
273
+ expect(ctx.systemMessageTokens).toBeGreaterThan(0);
274
+ expect(ctx.dynamicInstructionTokens).toBe(0);
275
+ expect(ctx.instructionTokens).toBe(ctx.systemMessageTokens);
276
+ });
277
+
278
+ it('does not cache OpenRouter body messages after dynamic instructions', async () => {
279
+ const ctx = createBasicContext({
280
+ agentConfig: {
281
+ provider: Providers.OPENROUTER,
282
+ clientOptions: {
283
+ model: 'google/gemini-2.5-flash',
284
+ promptCache: true,
285
+ },
286
+ instructions: 'Stable instructions',
287
+ additional_instructions: 'Dynamic instructions',
288
+ },
289
+ });
290
+
291
+ const result = await ctx.systemRunnable!.invoke([
292
+ new HumanMessage('First'),
293
+ new HumanMessage('Second'),
294
+ ]);
295
+
296
+ expect(result[1].content).toBe('First');
297
+ expect(result[2].content).toBe('Dynamic instructions');
298
+ expect(result[3].content).toBe('Second');
299
+ });
300
+
301
+ it('adds OpenRouter body cache points when there is no dynamic tail', async () => {
302
+ const ctx = createBasicContext({
303
+ agentConfig: {
304
+ provider: Providers.OPENROUTER,
305
+ clientOptions: {
306
+ model: 'anthropic/claude-haiku-4.5',
307
+ promptCache: true,
308
+ },
309
+ instructions: 'Stable instructions',
310
+ },
311
+ });
312
+
313
+ const result = await ctx.systemRunnable!.invoke([
314
+ new HumanMessage('First'),
315
+ new HumanMessage('Second'),
316
+ ]);
317
+ const firstContent = result[1].content as TestSystemContentBlock[];
318
+ const secondContent = result[2].content as TestSystemContentBlock[];
319
+ expect(firstContent[0]).toHaveProperty('cache_control');
320
+ expect(secondContent[0]).toHaveProperty('cache_control');
321
+ });
322
+
323
+ it('places OpenRouter user-message summaries after the first stable message', async () => {
324
+ const ctx = createBasicContext({
325
+ agentConfig: {
326
+ provider: Providers.OPENROUTER,
327
+ clientOptions: {
328
+ model: 'anthropic/claude-haiku-4.5',
329
+ promptCache: true,
330
+ },
331
+ instructions: 'Stable instructions',
332
+ },
333
+ });
334
+ ctx.setSummary('Rotating summary', 7);
335
+
336
+ const result = await ctx.systemRunnable!.invoke([
337
+ new HumanMessage('First'),
338
+ new HumanMessage('Second'),
339
+ ]);
340
+
341
+ expect(result[1].content).toBe('First');
342
+ expect(result[2].content).toContain('Rotating summary');
343
+ expect(result[3].content).toBe('Second');
344
+ });
345
+
201
346
  it('preserves the Bedrock system cache point through message cache-control pass', async () => {
202
347
  const ctx = createBasicContext({
203
348
  agentConfig: {
@@ -557,6 +702,59 @@ describe('AgentContext', () => {
557
702
  expect(ctxWithDeferred.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
558
703
  });
559
704
 
705
+ it('counts OpenRouter dynamic instructions outside the system message', () => {
706
+ const ctx = createBasicContext({
707
+ agentConfig: {
708
+ provider: Providers.OPENROUTER,
709
+ clientOptions: {
710
+ model: 'anthropic/claude-haiku-4.5',
711
+ promptCache: true,
712
+ },
713
+ instructions: 'Stable',
714
+ additional_instructions: 'Dynamic tail',
715
+ },
716
+ tokenCounter: mockTokenCounter,
717
+ });
718
+
719
+ ctx.initializeSystemRunnable();
720
+
721
+ expect(ctx.systemMessageTokens).toBeGreaterThan(0);
722
+ expect(ctx.dynamicInstructionTokens).toBeGreaterThan(0);
723
+ expect(ctx.instructionTokens).toBe(
724
+ ctx.systemMessageTokens + ctx.dynamicInstructionTokens
725
+ );
726
+ expect(ctx.getTokenBudgetBreakdown().dynamicInstructionTokens).toBe(
727
+ ctx.dynamicInstructionTokens
728
+ );
729
+ });
730
+
731
+ it('clears OpenRouter dynamic instruction tokens when no prompt remains', () => {
732
+ const ctx = createBasicContext({
733
+ agentConfig: {
734
+ provider: Providers.OPENROUTER,
735
+ clientOptions: {
736
+ model: 'anthropic/claude-haiku-4.5',
737
+ promptCache: true,
738
+ },
739
+ instructions: 'Stable instructions',
740
+ },
741
+ tokenCounter: mockTokenCounter,
742
+ });
743
+
744
+ ctx.setInitialSummary('Volatile summary', 8);
745
+ ctx.initializeSystemRunnable();
746
+ expect(ctx.dynamicInstructionTokens).toBeGreaterThan(0);
747
+
748
+ ctx.instructions = undefined;
749
+ ctx.clearSummary();
750
+ ctx.initializeSystemRunnable();
751
+
752
+ expect(ctx.systemRunnable).toBeUndefined();
753
+ expect(ctx.systemMessageTokens).toBe(0);
754
+ expect(ctx.dynamicInstructionTokens).toBe(0);
755
+ expect(ctx.instructionTokens).toBe(0);
756
+ });
757
+
560
758
  it('excludes programmatic-only toolDefinitions from toolSchemaTokens', async () => {
561
759
  // getEventDrivenToolsForBinding excludes definitions whose
562
760
  // allowed_callers omit 'direct'. Accounting must mirror that — a
@@ -62,6 +62,7 @@ import { isThinkingEnabled } from '@/llm/request';
62
62
  import { initializeModel } from '@/llm/init';
63
63
  import { HandlerRegistry } from '@/events';
64
64
  import { ChatOpenAI } from '@/llm/openai';
65
+ import { partitionAndMarkOpenRouterToolCache } from '@/llm/openrouter/toolCache';
65
66
  import type { HookRegistry } from '@/hooks';
66
67
 
67
68
  const { AGENT, TOOLS, SUMMARIZE } = GraphNodeKeys;
@@ -817,6 +818,19 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
817
818
  rawToolsForBinding,
818
819
  makeIsDeferred(agentContext.toolDefinitions)
819
820
  ) ?? rawToolsForBinding;
821
+ } else if (
822
+ agentContext.provider === Providers.OPENROUTER &&
823
+ (
824
+ agentContext.clientOptions as
825
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
826
+ | undefined
827
+ )?.promptCache === true
828
+ ) {
829
+ toolsForBinding =
830
+ partitionAndMarkOpenRouterToolCache(
831
+ rawToolsForBinding,
832
+ makeIsDeferred(agentContext.toolDefinitions)
833
+ ) ?? rawToolsForBinding;
820
834
  }
821
835
 
822
836
  let model =
@@ -1073,6 +1087,16 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1073
1087
  if (bedrockOptions?.promptCache === true) {
1074
1088
  finalMessages = addBedrockCacheControl<BaseMessage>(finalMessages);
1075
1089
  }
1090
+ } else if (agentContext.provider === Providers.OPENROUTER) {
1091
+ const openRouterOptions = agentContext.clientOptions as
1092
+ | t.ProviderOptionsMap[Providers.OPENROUTER]
1093
+ | undefined;
1094
+ if (
1095
+ openRouterOptions?.promptCache === true &&
1096
+ !agentContext.systemRunnable
1097
+ ) {
1098
+ finalMessages = addCacheControl<BaseMessage>(finalMessages);
1099
+ }
1076
1100
  }
1077
1101
 
1078
1102
  if (