@librechat/agents 3.1.52 → 3.1.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +16 -5
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +59 -5
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +16 -2
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +2 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +16 -5
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +59 -5
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +16 -2
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +1 -0
- package/dist/esm/main.mjs.map +1 -1
- package/dist/types/index.d.ts +2 -0
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +1 -1
- package/dist/types/llm/google/index.d.ts +2 -3
- package/dist/types/llm/openrouter/index.d.ts +21 -1
- package/dist/types/llm/vertexai/index.d.ts +2 -1
- package/dist/types/types/llm.d.ts +7 -2
- package/package.json +1 -1
- package/src/index.ts +6 -0
- package/src/llm/bedrock/llm.spec.ts +233 -4
- package/src/llm/bedrock/utils/message_outputs.ts +51 -11
- package/src/llm/google/index.ts +2 -3
- package/src/llm/openrouter/index.ts +117 -6
- package/src/llm/openrouter/reasoning.test.ts +207 -0
- package/src/llm/vertexai/index.ts +20 -3
- package/src/scripts/bedrock-cache-debug.ts +250 -0
- package/src/specs/openrouter.simple.test.ts +163 -2
- package/src/types/llm.ts +7 -2
- package/src/utils/llmConfig.ts +3 -4
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import { ChatOpenRouter } from './index';
|
|
2
|
+
import type { OpenRouterReasoning, ChatOpenRouterCallOptions } from './index';
|
|
3
|
+
import type { OpenAIChatInput } from '@langchain/openai';
|
|
4
|
+
|
|
5
|
+
type CreateRouterOptions = Partial<
|
|
6
|
+
ChatOpenRouterCallOptions & Pick<OpenAIChatInput, 'model' | 'apiKey'>
|
|
7
|
+
>;
|
|
8
|
+
|
|
9
|
+
function createRouter(overrides: CreateRouterOptions = {}): ChatOpenRouter {
|
|
10
|
+
return new ChatOpenRouter({
|
|
11
|
+
model: 'openrouter/test-model',
|
|
12
|
+
apiKey: 'test-key',
|
|
13
|
+
...overrides,
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
describe('ChatOpenRouter reasoning handling', () => {
|
|
18
|
+
// ---------------------------------------------------------------
|
|
19
|
+
// 1. Constructor reasoning config
|
|
20
|
+
// ---------------------------------------------------------------
|
|
21
|
+
describe('constructor reasoning config', () => {
|
|
22
|
+
it('stores reasoning when passed directly', () => {
|
|
23
|
+
const router = createRouter({ reasoning: { effort: 'high' } });
|
|
24
|
+
const params = router.invocationParams();
|
|
25
|
+
expect(params.reasoning).toEqual({ effort: 'high' });
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------
|
|
30
|
+
// 2. modelKwargs reasoning extraction
|
|
31
|
+
// ---------------------------------------------------------------
|
|
32
|
+
describe('modelKwargs reasoning extraction', () => {
|
|
33
|
+
it('extracts reasoning from modelKwargs and places it into params.reasoning', () => {
|
|
34
|
+
const router = createRouter({
|
|
35
|
+
modelKwargs: { reasoning: { effort: 'medium' } },
|
|
36
|
+
});
|
|
37
|
+
const params = router.invocationParams();
|
|
38
|
+
expect(params.reasoning).toEqual({ effort: 'medium' });
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it('does not leak reasoning into modelKwargs that reach the parent', () => {
|
|
42
|
+
const router = createRouter({
|
|
43
|
+
modelKwargs: {
|
|
44
|
+
reasoning: { effort: 'medium' },
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
const params = router.invocationParams();
|
|
48
|
+
// reasoning should be the structured OpenRouter object, not buried in modelKwargs
|
|
49
|
+
expect(params.reasoning).toEqual({ effort: 'medium' });
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------
|
|
54
|
+
// 3. Reasoning merge precedence
|
|
55
|
+
// ---------------------------------------------------------------
|
|
56
|
+
describe('reasoning merge precedence', () => {
|
|
57
|
+
it('constructor reasoning overrides modelKwargs.reasoning', () => {
|
|
58
|
+
const router = createRouter({
|
|
59
|
+
reasoning: { effort: 'high' },
|
|
60
|
+
modelKwargs: { reasoning: { effort: 'low' } },
|
|
61
|
+
});
|
|
62
|
+
const params = router.invocationParams();
|
|
63
|
+
expect(params.reasoning).toEqual({ effort: 'high' });
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('merges non-overlapping keys from modelKwargs.reasoning and constructor reasoning', () => {
|
|
67
|
+
const router = createRouter({
|
|
68
|
+
reasoning: { effort: 'high' },
|
|
69
|
+
modelKwargs: { reasoning: { max_tokens: 5000 } },
|
|
70
|
+
});
|
|
71
|
+
const params = router.invocationParams();
|
|
72
|
+
expect(params.reasoning).toEqual({ effort: 'high', max_tokens: 5000 });
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------
|
|
77
|
+
// 4. invocationParams output
|
|
78
|
+
// ---------------------------------------------------------------
|
|
79
|
+
describe('invocationParams output', () => {
|
|
80
|
+
it('includes reasoning object in params', () => {
|
|
81
|
+
const router = createRouter({ reasoning: { effort: 'high' } });
|
|
82
|
+
const params = router.invocationParams();
|
|
83
|
+
expect(params.reasoning).toBeDefined();
|
|
84
|
+
expect(params.reasoning).toEqual({ effort: 'high' });
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('does NOT include reasoning_effort in params', () => {
|
|
88
|
+
const router = createRouter({ reasoning: { effort: 'high' } });
|
|
89
|
+
const params = router.invocationParams();
|
|
90
|
+
expect(params.reasoning_effort).toBeUndefined();
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('does not include reasoning when none is configured', () => {
|
|
94
|
+
const router = createRouter();
|
|
95
|
+
const params = router.invocationParams();
|
|
96
|
+
expect(params.reasoning).toBeUndefined();
|
|
97
|
+
expect(params.reasoning_effort).toBeUndefined();
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// ---------------------------------------------------------------
|
|
102
|
+
// 5. Legacy include_reasoning
|
|
103
|
+
// ---------------------------------------------------------------
|
|
104
|
+
describe('legacy include_reasoning', () => {
|
|
105
|
+
it('produces { enabled: true } when only include_reasoning is true', () => {
|
|
106
|
+
const router = createRouter({ include_reasoning: true });
|
|
107
|
+
const params = router.invocationParams();
|
|
108
|
+
expect(params.reasoning).toEqual({ enabled: true });
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('does not produce reasoning when include_reasoning is false', () => {
|
|
112
|
+
const router = createRouter({ include_reasoning: false });
|
|
113
|
+
const params = router.invocationParams();
|
|
114
|
+
expect(params.reasoning).toBeUndefined();
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// ---------------------------------------------------------------
|
|
119
|
+
// 6. Legacy include_reasoning ignored when reasoning is provided
|
|
120
|
+
// ---------------------------------------------------------------
|
|
121
|
+
describe('legacy include_reasoning ignored when reasoning provided', () => {
|
|
122
|
+
it('reasoning wins over include_reasoning', () => {
|
|
123
|
+
const router = createRouter({
|
|
124
|
+
reasoning: { effort: 'medium' },
|
|
125
|
+
include_reasoning: true,
|
|
126
|
+
});
|
|
127
|
+
const params = router.invocationParams();
|
|
128
|
+
// Should use the structured reasoning, NOT fall back to { enabled: true }
|
|
129
|
+
expect(params.reasoning).toEqual({ effort: 'medium' });
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it('reasoning from modelKwargs also wins over include_reasoning', () => {
|
|
133
|
+
const router = createRouter({
|
|
134
|
+
modelKwargs: { reasoning: { effort: 'low' } },
|
|
135
|
+
include_reasoning: true,
|
|
136
|
+
});
|
|
137
|
+
const params = router.invocationParams();
|
|
138
|
+
expect(params.reasoning).toEqual({ effort: 'low' });
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// ---------------------------------------------------------------
|
|
143
|
+
// 7. Various effort levels (OpenRouter-specific)
|
|
144
|
+
// ---------------------------------------------------------------
|
|
145
|
+
describe('various effort levels', () => {
|
|
146
|
+
const efforts: Array<{
|
|
147
|
+
effort: OpenRouterReasoning['effort'];
|
|
148
|
+
}> = [
|
|
149
|
+
{ effort: 'xhigh' },
|
|
150
|
+
{ effort: 'none' },
|
|
151
|
+
{ effort: 'minimal' },
|
|
152
|
+
{ effort: 'high' },
|
|
153
|
+
{ effort: 'medium' },
|
|
154
|
+
{ effort: 'low' },
|
|
155
|
+
];
|
|
156
|
+
|
|
157
|
+
it.each(efforts)('supports effort level "$effort"', ({ effort }) => {
|
|
158
|
+
const router = createRouter({ reasoning: { effort } });
|
|
159
|
+
const params = router.invocationParams();
|
|
160
|
+
expect(params.reasoning).toEqual({ effort });
|
|
161
|
+
expect(params.reasoning_effort).toBeUndefined();
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
// ---------------------------------------------------------------
|
|
166
|
+
// 8. max_tokens reasoning
|
|
167
|
+
// ---------------------------------------------------------------
|
|
168
|
+
describe('max_tokens reasoning', () => {
|
|
169
|
+
it('passes max_tokens in reasoning object', () => {
|
|
170
|
+
const router = createRouter({
|
|
171
|
+
reasoning: { max_tokens: 8000 },
|
|
172
|
+
});
|
|
173
|
+
const params = router.invocationParams();
|
|
174
|
+
expect(params.reasoning).toEqual({ max_tokens: 8000 });
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it('combines max_tokens with effort', () => {
|
|
178
|
+
const router = createRouter({
|
|
179
|
+
reasoning: { effort: 'high', max_tokens: 8000 },
|
|
180
|
+
});
|
|
181
|
+
const params = router.invocationParams();
|
|
182
|
+
expect(params.reasoning).toEqual({ effort: 'high', max_tokens: 8000 });
|
|
183
|
+
expect(params.reasoning_effort).toBeUndefined();
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// ---------------------------------------------------------------
|
|
188
|
+
// 9. exclude reasoning
|
|
189
|
+
// ---------------------------------------------------------------
|
|
190
|
+
describe('exclude reasoning', () => {
|
|
191
|
+
it('passes exclude flag in reasoning object', () => {
|
|
192
|
+
const router = createRouter({
|
|
193
|
+
reasoning: { effort: 'high', exclude: true },
|
|
194
|
+
});
|
|
195
|
+
const params = router.invocationParams();
|
|
196
|
+
expect(params.reasoning).toEqual({ effort: 'high', exclude: true });
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
it('supports exclude without effort', () => {
|
|
200
|
+
const router = createRouter({
|
|
201
|
+
reasoning: { exclude: true },
|
|
202
|
+
});
|
|
203
|
+
const params = router.invocationParams();
|
|
204
|
+
expect(params.reasoning).toEqual({ exclude: true });
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
});
|
|
@@ -6,9 +6,11 @@ import type {
|
|
|
6
6
|
GoogleAbstractedClient,
|
|
7
7
|
} from '@langchain/google-common';
|
|
8
8
|
import type { BaseMessage } from '@langchain/core/messages';
|
|
9
|
-
import type { VertexAIClientOptions } from '@/types';
|
|
9
|
+
import type { GoogleThinkingConfig, VertexAIClientOptions } from '@/types';
|
|
10
10
|
|
|
11
11
|
class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
|
|
12
|
+
thinkingConfig?: GoogleThinkingConfig;
|
|
13
|
+
|
|
12
14
|
async formatData(
|
|
13
15
|
input: BaseMessage[],
|
|
14
16
|
parameters: GoogleAIModelRequestParams
|
|
@@ -26,6 +28,15 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
|
|
|
26
28
|
}
|
|
27
29
|
delete formattedData.generationConfig.thinkingConfig.thinkingBudget;
|
|
28
30
|
}
|
|
31
|
+
if (this.thinkingConfig?.thinkingLevel) {
|
|
32
|
+
formattedData.generationConfig ??= {};
|
|
33
|
+
(
|
|
34
|
+
formattedData.generationConfig as Record<string, unknown>
|
|
35
|
+
).thinkingConfig = {
|
|
36
|
+
...formattedData.generationConfig.thinkingConfig,
|
|
37
|
+
thinkingLevel: this.thinkingConfig.thinkingLevel,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
29
40
|
return formattedData;
|
|
30
41
|
}
|
|
31
42
|
}
|
|
@@ -315,6 +326,7 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
|
|
|
315
326
|
export class ChatVertexAI extends ChatGoogle {
|
|
316
327
|
lc_namespace = ['langchain', 'chat_models', 'vertexai'];
|
|
317
328
|
dynamicThinkingBudget = false;
|
|
329
|
+
thinkingConfig?: GoogleThinkingConfig;
|
|
318
330
|
|
|
319
331
|
static lc_name(): 'LibreChatVertexAI' {
|
|
320
332
|
return 'LibreChatVertexAI';
|
|
@@ -327,6 +339,7 @@ export class ChatVertexAI extends ChatGoogle {
|
|
|
327
339
|
platformType: 'gcp',
|
|
328
340
|
});
|
|
329
341
|
this.dynamicThinkingBudget = dynamicThinkingBudget;
|
|
342
|
+
this.thinkingConfig = fields?.thinkingConfig;
|
|
330
343
|
}
|
|
331
344
|
invocationParams(
|
|
332
345
|
options?: this['ParsedCallOptions'] | undefined
|
|
@@ -342,18 +355,22 @@ export class ChatVertexAI extends ChatGoogle {
|
|
|
342
355
|
fields: VertexAIClientOptions,
|
|
343
356
|
client: GoogleAbstractedClient
|
|
344
357
|
): void {
|
|
345
|
-
|
|
358
|
+
const connection = new CustomChatConnection(
|
|
346
359
|
{ ...fields, ...this },
|
|
347
360
|
this.caller,
|
|
348
361
|
client,
|
|
349
362
|
false
|
|
350
363
|
);
|
|
364
|
+
connection.thinkingConfig = this.thinkingConfig;
|
|
365
|
+
this.connection = connection;
|
|
351
366
|
|
|
352
|
-
|
|
367
|
+
const streamedConnection = new CustomChatConnection(
|
|
353
368
|
{ ...fields, ...this },
|
|
354
369
|
this.caller,
|
|
355
370
|
client,
|
|
356
371
|
true
|
|
357
372
|
);
|
|
373
|
+
streamedConnection.thinkingConfig = this.thinkingConfig;
|
|
374
|
+
this.streamedConnection = streamedConnection;
|
|
358
375
|
}
|
|
359
376
|
}
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Debug script to investigate cache token omission in Bedrock responses.
|
|
3
|
+
*
|
|
4
|
+
* This script:
|
|
5
|
+
* 1. Makes a streaming call to Bedrock and logs the raw metadata event
|
|
6
|
+
* 2. Shows exactly what fields the AWS SDK returns in usage (including cache tokens)
|
|
7
|
+
* 3. Shows what our handleConverseStreamMetadata produces vs what it should produce
|
|
8
|
+
* 4. Makes a multi-turn call to trigger caching and verify cache tokens appear
|
|
9
|
+
*/
|
|
10
|
+
import { config } from 'dotenv';
|
|
11
|
+
config();
|
|
12
|
+
import { HumanMessage } from '@langchain/core/messages';
|
|
13
|
+
import type { AIMessageChunk } from '@langchain/core/messages';
|
|
14
|
+
import { concat } from '@langchain/core/utils/stream';
|
|
15
|
+
import {
|
|
16
|
+
ConverseStreamCommand,
|
|
17
|
+
BedrockRuntimeClient,
|
|
18
|
+
} from '@aws-sdk/client-bedrock-runtime';
|
|
19
|
+
import { CustomChatBedrockConverse } from '@/llm/bedrock';
|
|
20
|
+
|
|
21
|
+
const region = process.env.BEDROCK_AWS_REGION ?? 'us-east-1';
|
|
22
|
+
const credentials = {
|
|
23
|
+
accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!,
|
|
24
|
+
secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const MODEL_ID = 'us.anthropic.claude-sonnet-4-5-20250929-v1:0';
|
|
28
|
+
|
|
29
|
+
// A long system prompt to increase likelihood of cache usage
|
|
30
|
+
// Bedrock requires minimum 1024 tokens for prompt caching to activate
|
|
31
|
+
const SYSTEM_PROMPT = `You are an expert assistant. Here is a large context block to help trigger cache behavior:
|
|
32
|
+
|
|
33
|
+
${Array(200).fill('This is padding content to make the prompt large enough to trigger Bedrock prompt caching. The minimum requirement for Anthropic models on Bedrock is 1024 tokens in the cached prefix. We need to ensure this prompt is well above that threshold. ').join('')}
|
|
34
|
+
|
|
35
|
+
When answering, be brief and direct.`;
|
|
36
|
+
|
|
37
|
+
async function rawSdkCall(): Promise<void> {
|
|
38
|
+
console.log('='.repeat(60));
|
|
39
|
+
console.log('TEST 1: Raw AWS SDK call - inspect metadata.usage directly');
|
|
40
|
+
console.log('='.repeat(60));
|
|
41
|
+
|
|
42
|
+
const client = new BedrockRuntimeClient({ region, credentials });
|
|
43
|
+
|
|
44
|
+
// First call - should create cache
|
|
45
|
+
// Use cachePoint block to explicitly enable prompt caching
|
|
46
|
+
console.log('\n--- Call 1 (cache write expected) ---');
|
|
47
|
+
const command1 = new ConverseStreamCommand({
|
|
48
|
+
modelId: MODEL_ID,
|
|
49
|
+
system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
|
|
50
|
+
messages: [{ role: 'user', content: [{ text: 'What is 2+2?' }] }],
|
|
51
|
+
inferenceConfig: { maxTokens: 100 },
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const response1 = await client.send(command1);
|
|
55
|
+
if (response1.stream) {
|
|
56
|
+
for await (const event of response1.stream) {
|
|
57
|
+
if (event.metadata != null) {
|
|
58
|
+
console.log('\nRAW metadata event (Call 1):');
|
|
59
|
+
console.dir(event.metadata, { depth: null });
|
|
60
|
+
console.log('\nRAW metadata.usage:');
|
|
61
|
+
console.dir(event.metadata.usage, { depth: null });
|
|
62
|
+
console.log('\nSpecific cache fields:');
|
|
63
|
+
console.log(
|
|
64
|
+
' cacheReadInputTokens:',
|
|
65
|
+
(event.metadata.usage as Record<string, unknown>)
|
|
66
|
+
?.cacheReadInputTokens
|
|
67
|
+
);
|
|
68
|
+
console.log(
|
|
69
|
+
' cacheWriteInputTokens:',
|
|
70
|
+
(event.metadata.usage as Record<string, unknown>)
|
|
71
|
+
?.cacheWriteInputTokens
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Second call - should read from cache
|
|
78
|
+
console.log('\n--- Call 2 (cache read expected) ---');
|
|
79
|
+
const command2 = new ConverseStreamCommand({
|
|
80
|
+
modelId: MODEL_ID,
|
|
81
|
+
system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
|
|
82
|
+
messages: [
|
|
83
|
+
{ role: 'user', content: [{ text: 'What is 2+2?' }] },
|
|
84
|
+
{ role: 'assistant', content: [{ text: '4' }] },
|
|
85
|
+
{ role: 'user', content: [{ text: 'And what is 3+3?' }] },
|
|
86
|
+
],
|
|
87
|
+
inferenceConfig: { maxTokens: 100 },
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const response2 = await client.send(command2);
|
|
91
|
+
if (response2.stream) {
|
|
92
|
+
for await (const event of response2.stream) {
|
|
93
|
+
if (event.metadata != null) {
|
|
94
|
+
console.log('\nRAW metadata event (Call 2):');
|
|
95
|
+
console.dir(event.metadata, { depth: null });
|
|
96
|
+
console.log('\nRAW metadata.usage:');
|
|
97
|
+
console.dir(event.metadata.usage, { depth: null });
|
|
98
|
+
console.log('\nSpecific cache fields:');
|
|
99
|
+
console.log(
|
|
100
|
+
' cacheReadInputTokens:',
|
|
101
|
+
(event.metadata.usage as Record<string, unknown>)
|
|
102
|
+
?.cacheReadInputTokens
|
|
103
|
+
);
|
|
104
|
+
console.log(
|
|
105
|
+
' cacheWriteInputTokens:',
|
|
106
|
+
(event.metadata.usage as Record<string, unknown>)
|
|
107
|
+
?.cacheWriteInputTokens
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async function wrapperStreamCallNoCachePoint(): Promise<void> {
|
|
115
|
+
console.log('\n' + '='.repeat(60));
|
|
116
|
+
console.log(
|
|
117
|
+
'TEST 2: CustomChatBedrockConverse stream (NO cachePoint) - check usage_metadata'
|
|
118
|
+
);
|
|
119
|
+
console.log('='.repeat(60));
|
|
120
|
+
console.log('(Without cachePoint, Bedrock does NOT return cache tokens)');
|
|
121
|
+
|
|
122
|
+
const model = new CustomChatBedrockConverse({
|
|
123
|
+
model: MODEL_ID,
|
|
124
|
+
region,
|
|
125
|
+
credentials,
|
|
126
|
+
maxTokens: 100,
|
|
127
|
+
streaming: true,
|
|
128
|
+
streamUsage: true,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
console.log('\n--- Wrapper Call (no cachePoint) ---');
|
|
132
|
+
const messages1 = [new HumanMessage(SYSTEM_PROMPT + '\n\nWhat is 2+2?')];
|
|
133
|
+
let finalChunk1: AIMessageChunk | undefined;
|
|
134
|
+
|
|
135
|
+
for await (const chunk of await model.stream(messages1)) {
|
|
136
|
+
finalChunk1 = finalChunk1 ? concat(finalChunk1, chunk) : chunk;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
console.log(
|
|
140
|
+
'\nFinal usage_metadata:',
|
|
141
|
+
JSON.stringify(finalChunk1!.usage_metadata)
|
|
142
|
+
);
|
|
143
|
+
console.log('(No cache tokens expected since no cachePoint block was sent)');
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async function wrapperStreamCallWithCachePoint(): Promise<void> {
|
|
147
|
+
console.log('\n' + '='.repeat(60));
|
|
148
|
+
console.log(
|
|
149
|
+
'TEST 3: Raw SDK with cachePoint -> verify handleConverseStreamMetadata extracts cache tokens'
|
|
150
|
+
);
|
|
151
|
+
console.log('='.repeat(60));
|
|
152
|
+
|
|
153
|
+
// We use the raw SDK with cachePoint to trigger caching, then verify
|
|
154
|
+
// that our handleConverseStreamMetadata function properly extracts cache fields
|
|
155
|
+
const { handleConverseStreamMetadata } = await import(
|
|
156
|
+
'@/llm/bedrock/utils/message_outputs'
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
const client = new BedrockRuntimeClient({ region, credentials });
|
|
160
|
+
|
|
161
|
+
// Call 1 - establish cache
|
|
162
|
+
console.log('\n--- Call 1 (cache write) ---');
|
|
163
|
+
const command1 = new ConverseStreamCommand({
|
|
164
|
+
modelId: MODEL_ID,
|
|
165
|
+
system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
|
|
166
|
+
messages: [{ role: 'user', content: [{ text: 'What is 2+2?' }] }],
|
|
167
|
+
inferenceConfig: { maxTokens: 100 },
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
const response1 = await client.send(command1);
|
|
171
|
+
if (response1.stream) {
|
|
172
|
+
for await (const event of response1.stream) {
|
|
173
|
+
if (event.metadata != null) {
|
|
174
|
+
console.log('Raw usage:', JSON.stringify(event.metadata.usage));
|
|
175
|
+
|
|
176
|
+
// Test our handler
|
|
177
|
+
const chunk = handleConverseStreamMetadata(event.metadata, {
|
|
178
|
+
streamUsage: true,
|
|
179
|
+
});
|
|
180
|
+
console.log(
|
|
181
|
+
'handleConverseStreamMetadata output usage_metadata:',
|
|
182
|
+
JSON.stringify(chunk.message.usage_metadata)
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
const hasDetails =
|
|
186
|
+
chunk.message.usage_metadata?.input_token_details != null;
|
|
187
|
+
console.log(
|
|
188
|
+
`Has input_token_details: ${hasDetails}`,
|
|
189
|
+
hasDetails
|
|
190
|
+
? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
|
|
191
|
+
: '(MISSING - BUG!)'
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Call 2 - read from cache
|
|
198
|
+
console.log('\n--- Call 2 (cache read) ---');
|
|
199
|
+
const command2 = new ConverseStreamCommand({
|
|
200
|
+
modelId: MODEL_ID,
|
|
201
|
+
system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
|
|
202
|
+
messages: [
|
|
203
|
+
{ role: 'user', content: [{ text: 'What is 2+2?' }] },
|
|
204
|
+
{ role: 'assistant', content: [{ text: '4' }] },
|
|
205
|
+
{ role: 'user', content: [{ text: 'What is 3+3?' }] },
|
|
206
|
+
],
|
|
207
|
+
inferenceConfig: { maxTokens: 100 },
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
const response2 = await client.send(command2);
|
|
211
|
+
if (response2.stream) {
|
|
212
|
+
for await (const event of response2.stream) {
|
|
213
|
+
if (event.metadata != null) {
|
|
214
|
+
console.log('Raw usage:', JSON.stringify(event.metadata.usage));
|
|
215
|
+
|
|
216
|
+
const chunk = handleConverseStreamMetadata(event.metadata, {
|
|
217
|
+
streamUsage: true,
|
|
218
|
+
});
|
|
219
|
+
console.log(
|
|
220
|
+
'handleConverseStreamMetadata output usage_metadata:',
|
|
221
|
+
JSON.stringify(chunk.message.usage_metadata)
|
|
222
|
+
);
|
|
223
|
+
|
|
224
|
+
const hasDetails =
|
|
225
|
+
chunk.message.usage_metadata?.input_token_details != null;
|
|
226
|
+
console.log(
|
|
227
|
+
`Has input_token_details: ${hasDetails}`,
|
|
228
|
+
hasDetails
|
|
229
|
+
? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
|
|
230
|
+
: '(MISSING - BUG!)'
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
async function main(): Promise<void> {
|
|
238
|
+
console.log('Bedrock Cache Token Debug Script');
|
|
239
|
+
console.log(`Model: ${MODEL_ID}`);
|
|
240
|
+
console.log(`Region: ${region}\n`);
|
|
241
|
+
|
|
242
|
+
await rawSdkCall();
|
|
243
|
+
await wrapperStreamCallNoCachePoint();
|
|
244
|
+
await wrapperStreamCallWithCachePoint();
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
main().catch((err) => {
|
|
248
|
+
console.error('Fatal error:', err);
|
|
249
|
+
process.exit(1);
|
|
250
|
+
});
|