@librechat/agents 3.1.52 → 3.1.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +16 -5
  2. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  3. package/dist/cjs/llm/google/index.cjs.map +1 -1
  4. package/dist/cjs/llm/openrouter/index.cjs +59 -5
  5. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  6. package/dist/cjs/llm/vertexai/index.cjs +16 -2
  7. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  8. package/dist/cjs/main.cjs +2 -0
  9. package/dist/cjs/main.cjs.map +1 -1
  10. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +16 -5
  11. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  12. package/dist/esm/llm/google/index.mjs.map +1 -1
  13. package/dist/esm/llm/openrouter/index.mjs +59 -5
  14. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  15. package/dist/esm/llm/vertexai/index.mjs +16 -2
  16. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  17. package/dist/esm/main.mjs +1 -0
  18. package/dist/esm/main.mjs.map +1 -1
  19. package/dist/types/index.d.ts +2 -0
  20. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +1 -1
  21. package/dist/types/llm/google/index.d.ts +2 -3
  22. package/dist/types/llm/openrouter/index.d.ts +21 -1
  23. package/dist/types/llm/vertexai/index.d.ts +2 -1
  24. package/dist/types/types/llm.d.ts +7 -2
  25. package/package.json +1 -1
  26. package/src/index.ts +6 -0
  27. package/src/llm/bedrock/llm.spec.ts +233 -4
  28. package/src/llm/bedrock/utils/message_outputs.ts +51 -11
  29. package/src/llm/google/index.ts +2 -3
  30. package/src/llm/openrouter/index.ts +117 -6
  31. package/src/llm/openrouter/reasoning.test.ts +207 -0
  32. package/src/llm/vertexai/index.ts +20 -3
  33. package/src/scripts/bedrock-cache-debug.ts +250 -0
  34. package/src/specs/openrouter.simple.test.ts +163 -2
  35. package/src/types/llm.ts +7 -2
  36. package/src/utils/llmConfig.ts +3 -4
@@ -0,0 +1,207 @@
1
+ import { ChatOpenRouter } from './index';
2
+ import type { OpenRouterReasoning, ChatOpenRouterCallOptions } from './index';
3
+ import type { OpenAIChatInput } from '@langchain/openai';
4
+
5
+ type CreateRouterOptions = Partial<
6
+ ChatOpenRouterCallOptions & Pick<OpenAIChatInput, 'model' | 'apiKey'>
7
+ >;
8
+
9
+ function createRouter(overrides: CreateRouterOptions = {}): ChatOpenRouter {
10
+ return new ChatOpenRouter({
11
+ model: 'openrouter/test-model',
12
+ apiKey: 'test-key',
13
+ ...overrides,
14
+ });
15
+ }
16
+
17
+ describe('ChatOpenRouter reasoning handling', () => {
18
+ // ---------------------------------------------------------------
19
+ // 1. Constructor reasoning config
20
+ // ---------------------------------------------------------------
21
+ describe('constructor reasoning config', () => {
22
+ it('stores reasoning when passed directly', () => {
23
+ const router = createRouter({ reasoning: { effort: 'high' } });
24
+ const params = router.invocationParams();
25
+ expect(params.reasoning).toEqual({ effort: 'high' });
26
+ });
27
+ });
28
+
29
+ // ---------------------------------------------------------------
30
+ // 2. modelKwargs reasoning extraction
31
+ // ---------------------------------------------------------------
32
+ describe('modelKwargs reasoning extraction', () => {
33
+ it('extracts reasoning from modelKwargs and places it into params.reasoning', () => {
34
+ const router = createRouter({
35
+ modelKwargs: { reasoning: { effort: 'medium' } },
36
+ });
37
+ const params = router.invocationParams();
38
+ expect(params.reasoning).toEqual({ effort: 'medium' });
39
+ });
40
+
41
+ it('does not leak reasoning into modelKwargs that reach the parent', () => {
42
+ const router = createRouter({
43
+ modelKwargs: {
44
+ reasoning: { effort: 'medium' },
45
+ },
46
+ });
47
+ const params = router.invocationParams();
48
+ // reasoning should be the structured OpenRouter object, not buried in modelKwargs
49
+ expect(params.reasoning).toEqual({ effort: 'medium' });
50
+ });
51
+ });
52
+
53
+ // ---------------------------------------------------------------
54
+ // 3. Reasoning merge precedence
55
+ // ---------------------------------------------------------------
56
+ describe('reasoning merge precedence', () => {
57
+ it('constructor reasoning overrides modelKwargs.reasoning', () => {
58
+ const router = createRouter({
59
+ reasoning: { effort: 'high' },
60
+ modelKwargs: { reasoning: { effort: 'low' } },
61
+ });
62
+ const params = router.invocationParams();
63
+ expect(params.reasoning).toEqual({ effort: 'high' });
64
+ });
65
+
66
+ it('merges non-overlapping keys from modelKwargs.reasoning and constructor reasoning', () => {
67
+ const router = createRouter({
68
+ reasoning: { effort: 'high' },
69
+ modelKwargs: { reasoning: { max_tokens: 5000 } },
70
+ });
71
+ const params = router.invocationParams();
72
+ expect(params.reasoning).toEqual({ effort: 'high', max_tokens: 5000 });
73
+ });
74
+ });
75
+
76
+ // ---------------------------------------------------------------
77
+ // 4. invocationParams output
78
+ // ---------------------------------------------------------------
79
+ describe('invocationParams output', () => {
80
+ it('includes reasoning object in params', () => {
81
+ const router = createRouter({ reasoning: { effort: 'high' } });
82
+ const params = router.invocationParams();
83
+ expect(params.reasoning).toBeDefined();
84
+ expect(params.reasoning).toEqual({ effort: 'high' });
85
+ });
86
+
87
+ it('does NOT include reasoning_effort in params', () => {
88
+ const router = createRouter({ reasoning: { effort: 'high' } });
89
+ const params = router.invocationParams();
90
+ expect(params.reasoning_effort).toBeUndefined();
91
+ });
92
+
93
+ it('does not include reasoning when none is configured', () => {
94
+ const router = createRouter();
95
+ const params = router.invocationParams();
96
+ expect(params.reasoning).toBeUndefined();
97
+ expect(params.reasoning_effort).toBeUndefined();
98
+ });
99
+ });
100
+
101
+ // ---------------------------------------------------------------
102
+ // 5. Legacy include_reasoning
103
+ // ---------------------------------------------------------------
104
+ describe('legacy include_reasoning', () => {
105
+ it('produces { enabled: true } when only include_reasoning is true', () => {
106
+ const router = createRouter({ include_reasoning: true });
107
+ const params = router.invocationParams();
108
+ expect(params.reasoning).toEqual({ enabled: true });
109
+ });
110
+
111
+ it('does not produce reasoning when include_reasoning is false', () => {
112
+ const router = createRouter({ include_reasoning: false });
113
+ const params = router.invocationParams();
114
+ expect(params.reasoning).toBeUndefined();
115
+ });
116
+ });
117
+
118
+ // ---------------------------------------------------------------
119
+ // 6. Legacy include_reasoning ignored when reasoning is provided
120
+ // ---------------------------------------------------------------
121
+ describe('legacy include_reasoning ignored when reasoning provided', () => {
122
+ it('reasoning wins over include_reasoning', () => {
123
+ const router = createRouter({
124
+ reasoning: { effort: 'medium' },
125
+ include_reasoning: true,
126
+ });
127
+ const params = router.invocationParams();
128
+ // Should use the structured reasoning, NOT fall back to { enabled: true }
129
+ expect(params.reasoning).toEqual({ effort: 'medium' });
130
+ });
131
+
132
+ it('reasoning from modelKwargs also wins over include_reasoning', () => {
133
+ const router = createRouter({
134
+ modelKwargs: { reasoning: { effort: 'low' } },
135
+ include_reasoning: true,
136
+ });
137
+ const params = router.invocationParams();
138
+ expect(params.reasoning).toEqual({ effort: 'low' });
139
+ });
140
+ });
141
+
142
+ // ---------------------------------------------------------------
143
+ // 7. Various effort levels (OpenRouter-specific)
144
+ // ---------------------------------------------------------------
145
+ describe('various effort levels', () => {
146
+ const efforts: Array<{
147
+ effort: OpenRouterReasoning['effort'];
148
+ }> = [
149
+ { effort: 'xhigh' },
150
+ { effort: 'none' },
151
+ { effort: 'minimal' },
152
+ { effort: 'high' },
153
+ { effort: 'medium' },
154
+ { effort: 'low' },
155
+ ];
156
+
157
+ it.each(efforts)('supports effort level "$effort"', ({ effort }) => {
158
+ const router = createRouter({ reasoning: { effort } });
159
+ const params = router.invocationParams();
160
+ expect(params.reasoning).toEqual({ effort });
161
+ expect(params.reasoning_effort).toBeUndefined();
162
+ });
163
+ });
164
+
165
+ // ---------------------------------------------------------------
166
+ // 8. max_tokens reasoning
167
+ // ---------------------------------------------------------------
168
+ describe('max_tokens reasoning', () => {
169
+ it('passes max_tokens in reasoning object', () => {
170
+ const router = createRouter({
171
+ reasoning: { max_tokens: 8000 },
172
+ });
173
+ const params = router.invocationParams();
174
+ expect(params.reasoning).toEqual({ max_tokens: 8000 });
175
+ });
176
+
177
+ it('combines max_tokens with effort', () => {
178
+ const router = createRouter({
179
+ reasoning: { effort: 'high', max_tokens: 8000 },
180
+ });
181
+ const params = router.invocationParams();
182
+ expect(params.reasoning).toEqual({ effort: 'high', max_tokens: 8000 });
183
+ expect(params.reasoning_effort).toBeUndefined();
184
+ });
185
+ });
186
+
187
+ // ---------------------------------------------------------------
188
+ // 9. exclude reasoning
189
+ // ---------------------------------------------------------------
190
+ describe('exclude reasoning', () => {
191
+ it('passes exclude flag in reasoning object', () => {
192
+ const router = createRouter({
193
+ reasoning: { effort: 'high', exclude: true },
194
+ });
195
+ const params = router.invocationParams();
196
+ expect(params.reasoning).toEqual({ effort: 'high', exclude: true });
197
+ });
198
+
199
+ it('supports exclude without effort', () => {
200
+ const router = createRouter({
201
+ reasoning: { exclude: true },
202
+ });
203
+ const params = router.invocationParams();
204
+ expect(params.reasoning).toEqual({ exclude: true });
205
+ });
206
+ });
207
+ });
@@ -6,9 +6,11 @@ import type {
6
6
  GoogleAbstractedClient,
7
7
  } from '@langchain/google-common';
8
8
  import type { BaseMessage } from '@langchain/core/messages';
9
- import type { VertexAIClientOptions } from '@/types';
9
+ import type { GoogleThinkingConfig, VertexAIClientOptions } from '@/types';
10
10
 
11
11
  class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
12
+ thinkingConfig?: GoogleThinkingConfig;
13
+
12
14
  async formatData(
13
15
  input: BaseMessage[],
14
16
  parameters: GoogleAIModelRequestParams
@@ -26,6 +28,15 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
26
28
  }
27
29
  delete formattedData.generationConfig.thinkingConfig.thinkingBudget;
28
30
  }
31
+ if (this.thinkingConfig?.thinkingLevel) {
32
+ formattedData.generationConfig ??= {};
33
+ (
34
+ formattedData.generationConfig as Record<string, unknown>
35
+ ).thinkingConfig = {
36
+ ...formattedData.generationConfig.thinkingConfig,
37
+ thinkingLevel: this.thinkingConfig.thinkingLevel,
38
+ };
39
+ }
29
40
  return formattedData;
30
41
  }
31
42
  }
@@ -315,6 +326,7 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
315
326
  export class ChatVertexAI extends ChatGoogle {
316
327
  lc_namespace = ['langchain', 'chat_models', 'vertexai'];
317
328
  dynamicThinkingBudget = false;
329
+ thinkingConfig?: GoogleThinkingConfig;
318
330
 
319
331
  static lc_name(): 'LibreChatVertexAI' {
320
332
  return 'LibreChatVertexAI';
@@ -327,6 +339,7 @@ export class ChatVertexAI extends ChatGoogle {
327
339
  platformType: 'gcp',
328
340
  });
329
341
  this.dynamicThinkingBudget = dynamicThinkingBudget;
342
+ this.thinkingConfig = fields?.thinkingConfig;
330
343
  }
331
344
  invocationParams(
332
345
  options?: this['ParsedCallOptions'] | undefined
@@ -342,18 +355,22 @@ export class ChatVertexAI extends ChatGoogle {
342
355
  fields: VertexAIClientOptions,
343
356
  client: GoogleAbstractedClient
344
357
  ): void {
345
- this.connection = new CustomChatConnection(
358
+ const connection = new CustomChatConnection(
346
359
  { ...fields, ...this },
347
360
  this.caller,
348
361
  client,
349
362
  false
350
363
  );
364
+ connection.thinkingConfig = this.thinkingConfig;
365
+ this.connection = connection;
351
366
 
352
- this.streamedConnection = new CustomChatConnection(
367
+ const streamedConnection = new CustomChatConnection(
353
368
  { ...fields, ...this },
354
369
  this.caller,
355
370
  client,
356
371
  true
357
372
  );
373
+ streamedConnection.thinkingConfig = this.thinkingConfig;
374
+ this.streamedConnection = streamedConnection;
358
375
  }
359
376
  }
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Debug script to investigate cache token omission in Bedrock responses.
3
+ *
4
+ * This script:
5
+ * 1. Makes a streaming call to Bedrock and logs the raw metadata event
6
+ * 2. Shows exactly what fields the AWS SDK returns in usage (including cache tokens)
7
+ * 3. Shows what our handleConverseStreamMetadata produces vs what it should produce
8
+ * 4. Makes a multi-turn call to trigger caching and verify cache tokens appear
9
+ */
10
+ import { config } from 'dotenv';
11
+ config();
12
+ import { HumanMessage } from '@langchain/core/messages';
13
+ import type { AIMessageChunk } from '@langchain/core/messages';
14
+ import { concat } from '@langchain/core/utils/stream';
15
+ import {
16
+ ConverseStreamCommand,
17
+ BedrockRuntimeClient,
18
+ } from '@aws-sdk/client-bedrock-runtime';
19
+ import { CustomChatBedrockConverse } from '@/llm/bedrock';
20
+
21
+ const region = process.env.BEDROCK_AWS_REGION ?? 'us-east-1';
22
+ const credentials = {
23
+ accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!,
24
+ secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!,
25
+ };
26
+
27
+ const MODEL_ID = 'us.anthropic.claude-sonnet-4-5-20250929-v1:0';
28
+
29
+ // A long system prompt to increase likelihood of cache usage
30
+ // Bedrock requires minimum 1024 tokens for prompt caching to activate
31
+ const SYSTEM_PROMPT = `You are an expert assistant. Here is a large context block to help trigger cache behavior:
32
+
33
+ ${Array(200).fill('This is padding content to make the prompt large enough to trigger Bedrock prompt caching. The minimum requirement for Anthropic models on Bedrock is 1024 tokens in the cached prefix. We need to ensure this prompt is well above that threshold. ').join('')}
34
+
35
+ When answering, be brief and direct.`;
36
+
37
+ async function rawSdkCall(): Promise<void> {
38
+ console.log('='.repeat(60));
39
+ console.log('TEST 1: Raw AWS SDK call - inspect metadata.usage directly');
40
+ console.log('='.repeat(60));
41
+
42
+ const client = new BedrockRuntimeClient({ region, credentials });
43
+
44
+ // First call - should create cache
45
+ // Use cachePoint block to explicitly enable prompt caching
46
+ console.log('\n--- Call 1 (cache write expected) ---');
47
+ const command1 = new ConverseStreamCommand({
48
+ modelId: MODEL_ID,
49
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
50
+ messages: [{ role: 'user', content: [{ text: 'What is 2+2?' }] }],
51
+ inferenceConfig: { maxTokens: 100 },
52
+ });
53
+
54
+ const response1 = await client.send(command1);
55
+ if (response1.stream) {
56
+ for await (const event of response1.stream) {
57
+ if (event.metadata != null) {
58
+ console.log('\nRAW metadata event (Call 1):');
59
+ console.dir(event.metadata, { depth: null });
60
+ console.log('\nRAW metadata.usage:');
61
+ console.dir(event.metadata.usage, { depth: null });
62
+ console.log('\nSpecific cache fields:');
63
+ console.log(
64
+ ' cacheReadInputTokens:',
65
+ (event.metadata.usage as Record<string, unknown>)
66
+ ?.cacheReadInputTokens
67
+ );
68
+ console.log(
69
+ ' cacheWriteInputTokens:',
70
+ (event.metadata.usage as Record<string, unknown>)
71
+ ?.cacheWriteInputTokens
72
+ );
73
+ }
74
+ }
75
+ }
76
+
77
+ // Second call - should read from cache
78
+ console.log('\n--- Call 2 (cache read expected) ---');
79
+ const command2 = new ConverseStreamCommand({
80
+ modelId: MODEL_ID,
81
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
82
+ messages: [
83
+ { role: 'user', content: [{ text: 'What is 2+2?' }] },
84
+ { role: 'assistant', content: [{ text: '4' }] },
85
+ { role: 'user', content: [{ text: 'And what is 3+3?' }] },
86
+ ],
87
+ inferenceConfig: { maxTokens: 100 },
88
+ });
89
+
90
+ const response2 = await client.send(command2);
91
+ if (response2.stream) {
92
+ for await (const event of response2.stream) {
93
+ if (event.metadata != null) {
94
+ console.log('\nRAW metadata event (Call 2):');
95
+ console.dir(event.metadata, { depth: null });
96
+ console.log('\nRAW metadata.usage:');
97
+ console.dir(event.metadata.usage, { depth: null });
98
+ console.log('\nSpecific cache fields:');
99
+ console.log(
100
+ ' cacheReadInputTokens:',
101
+ (event.metadata.usage as Record<string, unknown>)
102
+ ?.cacheReadInputTokens
103
+ );
104
+ console.log(
105
+ ' cacheWriteInputTokens:',
106
+ (event.metadata.usage as Record<string, unknown>)
107
+ ?.cacheWriteInputTokens
108
+ );
109
+ }
110
+ }
111
+ }
112
+ }
113
+
114
+ async function wrapperStreamCallNoCachePoint(): Promise<void> {
115
+ console.log('\n' + '='.repeat(60));
116
+ console.log(
117
+ 'TEST 2: CustomChatBedrockConverse stream (NO cachePoint) - check usage_metadata'
118
+ );
119
+ console.log('='.repeat(60));
120
+ console.log('(Without cachePoint, Bedrock does NOT return cache tokens)');
121
+
122
+ const model = new CustomChatBedrockConverse({
123
+ model: MODEL_ID,
124
+ region,
125
+ credentials,
126
+ maxTokens: 100,
127
+ streaming: true,
128
+ streamUsage: true,
129
+ });
130
+
131
+ console.log('\n--- Wrapper Call (no cachePoint) ---');
132
+ const messages1 = [new HumanMessage(SYSTEM_PROMPT + '\n\nWhat is 2+2?')];
133
+ let finalChunk1: AIMessageChunk | undefined;
134
+
135
+ for await (const chunk of await model.stream(messages1)) {
136
+ finalChunk1 = finalChunk1 ? concat(finalChunk1, chunk) : chunk;
137
+ }
138
+
139
+ console.log(
140
+ '\nFinal usage_metadata:',
141
+ JSON.stringify(finalChunk1!.usage_metadata)
142
+ );
143
+ console.log('(No cache tokens expected since no cachePoint block was sent)');
144
+ }
145
+
146
+ async function wrapperStreamCallWithCachePoint(): Promise<void> {
147
+ console.log('\n' + '='.repeat(60));
148
+ console.log(
149
+ 'TEST 3: Raw SDK with cachePoint -> verify handleConverseStreamMetadata extracts cache tokens'
150
+ );
151
+ console.log('='.repeat(60));
152
+
153
+ // We use the raw SDK with cachePoint to trigger caching, then verify
154
+ // that our handleConverseStreamMetadata function properly extracts cache fields
155
+ const { handleConverseStreamMetadata } = await import(
156
+ '@/llm/bedrock/utils/message_outputs'
157
+ );
158
+
159
+ const client = new BedrockRuntimeClient({ region, credentials });
160
+
161
+ // Call 1 - establish cache
162
+ console.log('\n--- Call 1 (cache write) ---');
163
+ const command1 = new ConverseStreamCommand({
164
+ modelId: MODEL_ID,
165
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
166
+ messages: [{ role: 'user', content: [{ text: 'What is 2+2?' }] }],
167
+ inferenceConfig: { maxTokens: 100 },
168
+ });
169
+
170
+ const response1 = await client.send(command1);
171
+ if (response1.stream) {
172
+ for await (const event of response1.stream) {
173
+ if (event.metadata != null) {
174
+ console.log('Raw usage:', JSON.stringify(event.metadata.usage));
175
+
176
+ // Test our handler
177
+ const chunk = handleConverseStreamMetadata(event.metadata, {
178
+ streamUsage: true,
179
+ });
180
+ console.log(
181
+ 'handleConverseStreamMetadata output usage_metadata:',
182
+ JSON.stringify(chunk.message.usage_metadata)
183
+ );
184
+
185
+ const hasDetails =
186
+ chunk.message.usage_metadata?.input_token_details != null;
187
+ console.log(
188
+ `Has input_token_details: ${hasDetails}`,
189
+ hasDetails
190
+ ? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
191
+ : '(MISSING - BUG!)'
192
+ );
193
+ }
194
+ }
195
+ }
196
+
197
+ // Call 2 - read from cache
198
+ console.log('\n--- Call 2 (cache read) ---');
199
+ const command2 = new ConverseStreamCommand({
200
+ modelId: MODEL_ID,
201
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
202
+ messages: [
203
+ { role: 'user', content: [{ text: 'What is 2+2?' }] },
204
+ { role: 'assistant', content: [{ text: '4' }] },
205
+ { role: 'user', content: [{ text: 'What is 3+3?' }] },
206
+ ],
207
+ inferenceConfig: { maxTokens: 100 },
208
+ });
209
+
210
+ const response2 = await client.send(command2);
211
+ if (response2.stream) {
212
+ for await (const event of response2.stream) {
213
+ if (event.metadata != null) {
214
+ console.log('Raw usage:', JSON.stringify(event.metadata.usage));
215
+
216
+ const chunk = handleConverseStreamMetadata(event.metadata, {
217
+ streamUsage: true,
218
+ });
219
+ console.log(
220
+ 'handleConverseStreamMetadata output usage_metadata:',
221
+ JSON.stringify(chunk.message.usage_metadata)
222
+ );
223
+
224
+ const hasDetails =
225
+ chunk.message.usage_metadata?.input_token_details != null;
226
+ console.log(
227
+ `Has input_token_details: ${hasDetails}`,
228
+ hasDetails
229
+ ? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
230
+ : '(MISSING - BUG!)'
231
+ );
232
+ }
233
+ }
234
+ }
235
+ }
236
+
237
+ async function main(): Promise<void> {
238
+ console.log('Bedrock Cache Token Debug Script');
239
+ console.log(`Model: ${MODEL_ID}`);
240
+ console.log(`Region: ${region}\n`);
241
+
242
+ await rawSdkCall();
243
+ await wrapperStreamCallNoCachePoint();
244
+ await wrapperStreamCallWithCachePoint();
245
+ }
246
+
247
+ main().catch((err) => {
248
+ console.error('Fatal error:', err);
249
+ process.exit(1);
250
+ });