@librechat/agents 3.1.52 → 3.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +16 -5
  2. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  3. package/dist/cjs/llm/google/index.cjs.map +1 -1
  4. package/dist/cjs/llm/openrouter/index.cjs +59 -5
  5. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  6. package/dist/cjs/llm/vertexai/index.cjs +16 -2
  7. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  8. package/dist/cjs/main.cjs +2 -0
  9. package/dist/cjs/main.cjs.map +1 -1
  10. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +16 -5
  11. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  12. package/dist/esm/llm/google/index.mjs.map +1 -1
  13. package/dist/esm/llm/openrouter/index.mjs +59 -5
  14. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  15. package/dist/esm/llm/vertexai/index.mjs +16 -2
  16. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  17. package/dist/esm/main.mjs +1 -0
  18. package/dist/esm/main.mjs.map +1 -1
  19. package/dist/types/index.d.ts +2 -0
  20. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +1 -1
  21. package/dist/types/llm/google/index.d.ts +2 -3
  22. package/dist/types/llm/openrouter/index.d.ts +21 -1
  23. package/dist/types/llm/vertexai/index.d.ts +2 -1
  24. package/dist/types/types/llm.d.ts +7 -2
  25. package/package.json +1 -1
  26. package/src/index.ts +6 -0
  27. package/src/llm/bedrock/llm.spec.ts +233 -4
  28. package/src/llm/bedrock/utils/message_outputs.ts +51 -11
  29. package/src/llm/google/index.ts +2 -3
  30. package/src/llm/openrouter/index.ts +117 -6
  31. package/src/llm/openrouter/reasoning.test.ts +207 -0
  32. package/src/llm/vertexai/index.ts +20 -3
  33. package/src/scripts/bedrock-cache-debug.ts +250 -0
  34. package/src/specs/openrouter.simple.test.ts +163 -2
  35. package/src/types/llm.ts +7 -2
  36. package/src/utils/llmConfig.ts +3 -4
@@ -5,16 +5,24 @@ config();
5
5
  import { expect, test, describe, jest } from '@jest/globals';
6
6
  import {
7
7
  AIMessage,
8
- AIMessageChunk,
8
+ ToolMessage,
9
9
  HumanMessage,
10
10
  SystemMessage,
11
- ToolMessage,
11
+ AIMessageChunk,
12
12
  } from '@langchain/core/messages';
13
13
  import { concat } from '@langchain/core/utils/stream';
14
14
  import { ChatGenerationChunk } from '@langchain/core/outputs';
15
- import { BedrockRuntimeClient } from '@aws-sdk/client-bedrock-runtime';
15
+ import {
16
+ BedrockRuntimeClient,
17
+ ConverseCommand,
18
+ } from '@aws-sdk/client-bedrock-runtime';
19
+ import type { ConverseResponse } from '@aws-sdk/client-bedrock-runtime';
20
+ import {
21
+ convertConverseMessageToLangChainMessage,
22
+ handleConverseStreamMetadata,
23
+ convertToConverseMessages,
24
+ } from './utils';
16
25
  import { CustomChatBedrockConverse, ServiceTierType } from './index';
17
- import { convertToConverseMessages } from './utils';
18
26
 
19
27
  jest.setTimeout(120000);
20
28
 
@@ -429,6 +437,164 @@ describe('CustomChatBedrockConverse', () => {
429
437
  });
430
438
  });
431
439
 
440
+ describe('handleConverseStreamMetadata - cache token extraction', () => {
441
+ test('should extract cacheReadInputTokens and cacheWriteInputTokens into input_token_details', () => {
442
+ const metadata = {
443
+ usage: {
444
+ inputTokens: 13,
445
+ outputTokens: 5,
446
+ totalTokens: 10849,
447
+ cacheReadInputTokens: 10831,
448
+ cacheWriteInputTokens: 0,
449
+ },
450
+ metrics: { latencyMs: 1000 },
451
+ };
452
+
453
+ const chunk = handleConverseStreamMetadata(metadata, {
454
+ streamUsage: true,
455
+ });
456
+ const msg = chunk.message as AIMessageChunk;
457
+
458
+ expect(msg.usage_metadata).toEqual({
459
+ input_tokens: 13,
460
+ output_tokens: 5,
461
+ total_tokens: 10849,
462
+ input_token_details: {
463
+ cache_read: 10831,
464
+ cache_creation: 0,
465
+ },
466
+ });
467
+ });
468
+
469
+ test('should not include input_token_details when no cache tokens present', () => {
470
+ const metadata = {
471
+ usage: {
472
+ inputTokens: 100,
473
+ outputTokens: 50,
474
+ totalTokens: 150,
475
+ },
476
+ metrics: { latencyMs: 500 },
477
+ };
478
+
479
+ const chunk = handleConverseStreamMetadata(metadata, {
480
+ streamUsage: true,
481
+ });
482
+ const msg = chunk.message as AIMessageChunk;
483
+
484
+ expect(msg.usage_metadata).toEqual({
485
+ input_tokens: 100,
486
+ output_tokens: 50,
487
+ total_tokens: 150,
488
+ });
489
+ expect(msg.usage_metadata?.input_token_details).toBeUndefined();
490
+ });
491
+
492
+ test('should include input_token_details when only cacheWriteInputTokens is present', () => {
493
+ const metadata = {
494
+ usage: {
495
+ inputTokens: 50,
496
+ outputTokens: 10,
497
+ totalTokens: 10060,
498
+ cacheWriteInputTokens: 10000,
499
+ },
500
+ metrics: { latencyMs: 800 },
501
+ };
502
+
503
+ const chunk = handleConverseStreamMetadata(metadata, {
504
+ streamUsage: true,
505
+ });
506
+ const msg = chunk.message as AIMessageChunk;
507
+
508
+ expect(msg.usage_metadata?.input_token_details).toEqual({
509
+ cache_read: 0,
510
+ cache_creation: 10000,
511
+ });
512
+ });
513
+
514
+ test('should return undefined usage_metadata when streamUsage is false', () => {
515
+ const metadata = {
516
+ usage: {
517
+ inputTokens: 13,
518
+ outputTokens: 5,
519
+ totalTokens: 10849,
520
+ cacheReadInputTokens: 10831,
521
+ cacheWriteInputTokens: 0,
522
+ },
523
+ metrics: { latencyMs: 1000 },
524
+ };
525
+
526
+ const chunk = handleConverseStreamMetadata(metadata, {
527
+ streamUsage: false,
528
+ });
529
+ const msg = chunk.message as AIMessageChunk;
530
+
531
+ expect(msg.usage_metadata).toBeUndefined();
532
+ });
533
+ });
534
+
535
+ describe('convertConverseMessageToLangChainMessage - cache token extraction', () => {
536
+ const makeResponseMetadata = (
537
+ usage: Record<string, number>
538
+ ): Omit<ConverseResponse, 'output'> =>
539
+ ({
540
+ usage,
541
+ stopReason: 'end_turn',
542
+ metrics: undefined,
543
+ $metadata: { requestId: 'test-id' },
544
+ }) as unknown as Omit<ConverseResponse, 'output'>;
545
+
546
+ test('should extract cache tokens in non-streaming response', () => {
547
+ const message = {
548
+ role: 'assistant' as const,
549
+ content: [{ text: 'Hello!' }],
550
+ };
551
+
552
+ const result = convertConverseMessageToLangChainMessage(
553
+ message,
554
+ makeResponseMetadata({
555
+ inputTokens: 20,
556
+ outputTokens: 5,
557
+ totalTokens: 10856,
558
+ cacheReadInputTokens: 10831,
559
+ cacheWriteInputTokens: 0,
560
+ })
561
+ );
562
+
563
+ expect(result.usage_metadata).toEqual({
564
+ input_tokens: 20,
565
+ output_tokens: 5,
566
+ total_tokens: 10856,
567
+ input_token_details: {
568
+ cache_read: 10831,
569
+ cache_creation: 0,
570
+ },
571
+ });
572
+ });
573
+
574
+ test('should not include input_token_details when no cache tokens in non-streaming response', () => {
575
+ const message = {
576
+ role: 'assistant' as const,
577
+ content: [{ text: 'Hello!' }],
578
+ };
579
+
580
+ const result = convertConverseMessageToLangChainMessage(
581
+ message,
582
+ makeResponseMetadata({
583
+ inputTokens: 100,
584
+ outputTokens: 50,
585
+ totalTokens: 150,
586
+ })
587
+ );
588
+
589
+ expect(result.usage_metadata).toEqual({
590
+ input_tokens: 100,
591
+ output_tokens: 50,
592
+ total_tokens: 150,
593
+ });
594
+ expect(result.usage_metadata?.input_token_details).toBeUndefined();
595
+ });
596
+ });
597
+
432
598
  describe('convertToConverseMessages', () => {
433
599
  test('should convert basic messages', () => {
434
600
  const { converseMessages, converseSystem } = convertToConverseMessages([
@@ -647,4 +813,67 @@ describe.skip('Integration tests', () => {
647
813
  expect(reasoningBlocks.length).toBeGreaterThanOrEqual(0);
648
814
  }
649
815
  });
816
+
817
+ test('cache tokens should populate input_token_details', async () => {
818
+ const client = new BedrockRuntimeClient({
819
+ region: integrationArgs.region,
820
+ credentials: integrationArgs.credentials,
821
+ });
822
+
823
+ // Large system prompt (>1024 tokens) to meet Bedrock's minimum cache threshold
824
+ const largeSystemPrompt = [
825
+ 'You are an expert assistant.',
826
+ ...Array(200).fill(
827
+ 'This is padding content to exceed the minimum token threshold for Bedrock prompt caching. '
828
+ ),
829
+ 'When answering, be brief and direct.',
830
+ ].join(' ');
831
+
832
+ const systemBlocks = [
833
+ { text: largeSystemPrompt },
834
+ { cachePoint: { type: 'default' as const } },
835
+ ];
836
+
837
+ const converseArgs = {
838
+ modelId: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
839
+ system: systemBlocks,
840
+ inferenceConfig: { maxTokens: 50 },
841
+ };
842
+
843
+ // Call 1: populate the cache (may be a write or read if already warm)
844
+ await client.send(
845
+ new ConverseCommand({
846
+ ...converseArgs,
847
+ messages: [{ role: 'user', content: [{ text: 'Say hello.' }] }],
848
+ })
849
+ );
850
+
851
+ // Call 2: should read from cache — this is the one we assert on
852
+ const response = await client.send(
853
+ new ConverseCommand({
854
+ ...converseArgs,
855
+ messages: [
856
+ { role: 'user', content: [{ text: 'Say hello.' }] },
857
+ { role: 'assistant', content: [{ text: 'Hello!' }] },
858
+ { role: 'user', content: [{ text: 'Say goodbye.' }] },
859
+ ],
860
+ })
861
+ );
862
+
863
+ // Feed raw response through convertConverseMessageToLangChainMessage
864
+ const result = convertConverseMessageToLangChainMessage(
865
+ response.output!.message!,
866
+ response
867
+ );
868
+
869
+ expect(result.usage_metadata).toBeDefined();
870
+ expect(result.usage_metadata!.input_tokens).toBeGreaterThan(0);
871
+ expect(result.usage_metadata!.output_tokens).toBeGreaterThan(0);
872
+
873
+ // Cache should have been populated by call 1, so call 2 should show cache reads
874
+ expect(result.usage_metadata!.input_token_details).toBeDefined();
875
+ expect(
876
+ result.usage_metadata!.input_token_details!.cache_read
877
+ ).toBeGreaterThan(0);
878
+ });
650
879
  });
@@ -2,8 +2,9 @@
2
2
  * Utility functions for converting Bedrock Converse responses to LangChain messages.
3
3
  * Ported from @langchain/aws common.js
4
4
  */
5
- import { AIMessage, AIMessageChunk } from '@langchain/core/messages';
6
5
  import { ChatGenerationChunk } from '@langchain/core/outputs';
6
+ import { AIMessage, AIMessageChunk } from '@langchain/core/messages';
7
+ import type { UsageMetadata } from '@langchain/core/messages';
7
8
  import type {
8
9
  BedrockMessage,
9
10
  ConverseResponse,
@@ -107,17 +108,38 @@ export function convertConverseMessageToLangChainMessage(
107
108
  }
108
109
 
109
110
  let tokenUsage:
110
- | { input_tokens: number; output_tokens: number; total_tokens: number }
111
+ | {
112
+ input_tokens: number;
113
+ output_tokens: number;
114
+ total_tokens: number;
115
+ input_token_details?: {
116
+ cache_read: number;
117
+ cache_creation: number;
118
+ };
119
+ }
111
120
  | undefined;
112
121
  if (responseMetadata.usage != null) {
113
- const input_tokens = responseMetadata.usage.inputTokens ?? 0;
114
- const output_tokens = responseMetadata.usage.outputTokens ?? 0;
122
+ const usage = responseMetadata.usage as NonNullable<
123
+ typeof responseMetadata.usage
124
+ > & {
125
+ cacheReadInputTokens?: number;
126
+ cacheWriteInputTokens?: number;
127
+ };
128
+ const input_tokens = usage.inputTokens ?? 0;
129
+ const output_tokens = usage.outputTokens ?? 0;
130
+ const cacheRead = usage.cacheReadInputTokens;
131
+ const cacheWrite = usage.cacheWriteInputTokens;
115
132
  tokenUsage = {
116
133
  input_tokens,
117
134
  output_tokens,
118
- total_tokens:
119
- responseMetadata.usage.totalTokens ?? input_tokens + output_tokens,
135
+ total_tokens: usage.totalTokens ?? input_tokens + output_tokens,
120
136
  };
137
+ if (cacheRead != null || cacheWrite != null) {
138
+ tokenUsage.input_token_details = {
139
+ cache_read: cacheRead ?? 0,
140
+ cache_creation: cacheWrite ?? 0,
141
+ };
142
+ }
121
143
  }
122
144
 
123
145
  if (
@@ -285,19 +307,37 @@ export function handleConverseStreamMetadata(
285
307
  metadata: ConverseStreamMetadataEvent,
286
308
  extra: { streamUsage: boolean }
287
309
  ): ChatGenerationChunk {
288
- const inputTokens = metadata.usage?.inputTokens ?? 0;
289
- const outputTokens = metadata.usage?.outputTokens ?? 0;
290
- const usage_metadata = {
310
+ const usage = metadata.usage as
311
+ | (NonNullable<ConverseStreamMetadataEvent['usage']> & {
312
+ cacheReadInputTokens?: number;
313
+ cacheWriteInputTokens?: number;
314
+ })
315
+ | undefined;
316
+ const inputTokens = usage?.inputTokens ?? 0;
317
+ const outputTokens = usage?.outputTokens ?? 0;
318
+ const cacheRead = usage?.cacheReadInputTokens;
319
+ const cacheWrite = usage?.cacheWriteInputTokens;
320
+
321
+ const usage_metadata: Record<string, unknown> = {
291
322
  input_tokens: inputTokens,
292
323
  output_tokens: outputTokens,
293
- total_tokens: metadata.usage?.totalTokens ?? inputTokens + outputTokens,
324
+ total_tokens: usage?.totalTokens ?? inputTokens + outputTokens,
294
325
  };
295
326
 
327
+ if (cacheRead != null || cacheWrite != null) {
328
+ usage_metadata.input_token_details = {
329
+ cache_read: cacheRead ?? 0,
330
+ cache_creation: cacheWrite ?? 0,
331
+ };
332
+ }
333
+
296
334
  return new ChatGenerationChunk({
297
335
  text: '',
298
336
  message: new AIMessageChunk({
299
337
  content: '',
300
- usage_metadata: extra.streamUsage ? usage_metadata : undefined,
338
+ usage_metadata: extra.streamUsage
339
+ ? (usage_metadata as UsageMetadata)
340
+ : undefined,
301
341
  response_metadata: {
302
342
  // Use the same key as returned from the Converse API
303
343
  metadata,
@@ -10,9 +10,8 @@ import type {
10
10
  } from '@google/generative-ai';
11
11
  import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
12
12
  import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
13
- import type { GeminiGenerationConfig } from '@langchain/google-common';
14
13
  import type { GeminiApiUsageMetadata, InputTokenDetails } from './types';
15
- import type { GoogleClientOptions } from '@/types';
14
+ import type { GoogleClientOptions, GoogleThinkingConfig } from '@/types';
16
15
  import {
17
16
  convertResponseContentToChatGenerationChunk,
18
17
  convertBaseMessagesToContent,
@@ -20,7 +19,7 @@ import {
20
19
  } from './utils/common';
21
20
 
22
21
  export class CustomChatGoogleGenerativeAI extends ChatGoogleGenerativeAI {
23
- thinkingConfig?: GeminiGenerationConfig['thinkingConfig'];
22
+ thinkingConfig?: GoogleThinkingConfig;
24
23
 
25
24
  /**
26
25
  * Override to add gemini-3 model support for multimodal and function calling thought signatures
@@ -29,24 +29,135 @@ type OpenAIRoleEnum =
29
29
  | 'function'
30
30
  | 'tool';
31
31
 
32
- export interface ChatOpenRouterCallOptions extends ChatOpenAICallOptions {
32
+ export type OpenRouterReasoningEffort =
33
+ | 'xhigh'
34
+ | 'high'
35
+ | 'medium'
36
+ | 'low'
37
+ | 'minimal'
38
+ | 'none';
39
+
40
+ export interface OpenRouterReasoning {
41
+ effort?: OpenRouterReasoningEffort;
42
+ max_tokens?: number;
43
+ exclude?: boolean;
44
+ enabled?: boolean;
45
+ }
46
+
47
+ export interface ChatOpenRouterCallOptions
48
+ extends Omit<ChatOpenAICallOptions, 'reasoning'> {
49
+ /** @deprecated Use `reasoning` object instead */
33
50
  include_reasoning?: boolean;
51
+ reasoning?: OpenRouterReasoning;
34
52
  modelKwargs?: OpenAIChatInput['modelKwargs'];
35
53
  }
54
+
55
+ /** invocationParams return type extended with OpenRouter reasoning */
56
+ export type OpenRouterInvocationParams = Omit<
57
+ OpenAIClient.Chat.ChatCompletionCreateParams,
58
+ 'messages'
59
+ > & {
60
+ reasoning?: OpenRouterReasoning;
61
+ };
36
62
  export class ChatOpenRouter extends ChatOpenAI {
63
+ private openRouterReasoning?: OpenRouterReasoning;
64
+ /** @deprecated Use `reasoning` object instead */
65
+ private includeReasoning?: boolean;
66
+
37
67
  constructor(_fields: Partial<ChatOpenRouterCallOptions>) {
38
- const { include_reasoning, modelKwargs = {}, ...fields } = _fields;
68
+ const {
69
+ include_reasoning,
70
+ reasoning: openRouterReasoning,
71
+ modelKwargs = {},
72
+ ...fields
73
+ } = _fields;
74
+
75
+ // Extract reasoning from modelKwargs if provided there (e.g., from LLMConfig)
76
+ const { reasoning: mkReasoning, ...restModelKwargs } = modelKwargs as {
77
+ reasoning?: OpenRouterReasoning;
78
+ } & Record<string, unknown>;
79
+
39
80
  super({
40
81
  ...fields,
41
- modelKwargs: {
42
- ...modelKwargs,
43
- include_reasoning,
44
- },
82
+ modelKwargs: restModelKwargs,
45
83
  });
84
+
85
+ // Merge reasoning config: modelKwargs.reasoning < constructor reasoning
86
+ if (mkReasoning != null || openRouterReasoning != null) {
87
+ this.openRouterReasoning = {
88
+ ...mkReasoning,
89
+ ...openRouterReasoning,
90
+ };
91
+ }
92
+
93
+ this.includeReasoning = include_reasoning;
46
94
  }
47
95
  static lc_name(): 'LibreChatOpenRouter' {
48
96
  return 'LibreChatOpenRouter';
49
97
  }
98
+
99
+ // @ts-expect-error - OpenRouter reasoning extends OpenAI Reasoning with additional
100
+ // effort levels ('xhigh' | 'none' | 'minimal') not in ReasoningEffort.
101
+ // The parent's generic conditional return type cannot be widened in an override.
102
+ override invocationParams(
103
+ options?: this['ParsedCallOptions'],
104
+ extra?: { streaming?: boolean }
105
+ ): OpenRouterInvocationParams {
106
+ type MutableParams = Omit<
107
+ OpenAIClient.Chat.ChatCompletionCreateParams,
108
+ 'messages'
109
+ > & { reasoning_effort?: string; reasoning?: OpenRouterReasoning };
110
+
111
+ const params = super.invocationParams(options, extra) as MutableParams;
112
+
113
+ // Remove the OpenAI-native reasoning_effort that the parent sets;
114
+ // OpenRouter uses a `reasoning` object instead
115
+ delete params.reasoning_effort;
116
+
117
+ // Build the OpenRouter reasoning config
118
+ const reasoning = this.buildOpenRouterReasoning(options);
119
+ if (reasoning != null) {
120
+ params.reasoning = reasoning;
121
+ } else {
122
+ delete params.reasoning;
123
+ }
124
+
125
+ return params;
126
+ }
127
+
128
+ private buildOpenRouterReasoning(
129
+ options?: this['ParsedCallOptions']
130
+ ): OpenRouterReasoning | undefined {
131
+ let reasoning: OpenRouterReasoning | undefined;
132
+
133
+ // 1. Instance-level reasoning config (from constructor)
134
+ if (this.openRouterReasoning != null) {
135
+ reasoning = { ...this.openRouterReasoning };
136
+ }
137
+
138
+ // 2. LangChain-style reasoning params (from parent's `this.reasoning`)
139
+ const lcReasoning = this.getReasoningParams(options);
140
+ if (lcReasoning?.effort != null) {
141
+ reasoning = {
142
+ ...reasoning,
143
+ effort: lcReasoning.effort as OpenRouterReasoningEffort,
144
+ };
145
+ }
146
+
147
+ // 3. Call-level reasoning override
148
+ const callReasoning = (options as ChatOpenRouterCallOptions | undefined)
149
+ ?.reasoning;
150
+ if (callReasoning != null) {
151
+ reasoning = { ...reasoning, ...callReasoning };
152
+ }
153
+
154
+ // 4. Legacy include_reasoning backward compatibility
155
+ if (reasoning == null && this.includeReasoning === true) {
156
+ reasoning = { enabled: true };
157
+ }
158
+
159
+ return reasoning;
160
+ }
50
161
  protected override _convertOpenAIDeltaToBaseMessageChunk(
51
162
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
52
163
  delta: Record<string, any>,