@librechat/agents 3.1.53 → 3.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  2. package/dist/cjs/common/enum.cjs.map +1 -1
  3. package/dist/cjs/events.cjs.map +1 -1
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
  6. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  7. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  8. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  9. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  10. package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
  11. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  12. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  13. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +16 -5
  14. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  15. package/dist/cjs/llm/fake.cjs.map +1 -1
  16. package/dist/cjs/llm/google/index.cjs.map +1 -1
  17. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  18. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  19. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  20. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  21. package/dist/cjs/llm/providers.cjs.map +1 -1
  22. package/dist/cjs/llm/text.cjs.map +1 -1
  23. package/dist/cjs/llm/vertexai/index.cjs +68 -4
  24. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  25. package/dist/cjs/main.cjs +28 -28
  26. package/dist/cjs/messages/cache.cjs.map +1 -1
  27. package/dist/cjs/messages/content.cjs.map +1 -1
  28. package/dist/cjs/messages/core.cjs.map +1 -1
  29. package/dist/cjs/messages/format.cjs.map +1 -1
  30. package/dist/cjs/messages/ids.cjs.map +1 -1
  31. package/dist/cjs/messages/prune.cjs.map +1 -1
  32. package/dist/cjs/messages/tools.cjs.map +1 -1
  33. package/dist/cjs/run.cjs.map +1 -1
  34. package/dist/cjs/splitStream.cjs.map +1 -1
  35. package/dist/cjs/stream.cjs.map +1 -1
  36. package/dist/cjs/tools/Calculator.cjs.map +1 -1
  37. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  38. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  39. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  40. package/dist/cjs/tools/ToolSearch.cjs.map +1 -1
  41. package/dist/cjs/tools/handlers.cjs.map +1 -1
  42. package/dist/cjs/tools/schema.cjs.map +1 -1
  43. package/dist/cjs/tools/search/content.cjs.map +1 -1
  44. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  45. package/dist/cjs/tools/search/format.cjs.map +1 -1
  46. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  47. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  48. package/dist/cjs/tools/search/schema.cjs.map +1 -1
  49. package/dist/cjs/tools/search/search.cjs +1 -0
  50. package/dist/cjs/tools/search/search.cjs.map +1 -1
  51. package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
  52. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  53. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  54. package/dist/cjs/utils/events.cjs.map +1 -1
  55. package/dist/cjs/utils/graph.cjs.map +1 -1
  56. package/dist/cjs/utils/handlers.cjs.map +1 -1
  57. package/dist/cjs/utils/llm.cjs.map +1 -1
  58. package/dist/cjs/utils/misc.cjs.map +1 -1
  59. package/dist/cjs/utils/run.cjs.map +1 -1
  60. package/dist/cjs/utils/schema.cjs.map +1 -1
  61. package/dist/cjs/utils/title.cjs.map +1 -1
  62. package/dist/cjs/utils/tokens.cjs.map +1 -1
  63. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  64. package/dist/esm/common/enum.mjs.map +1 -1
  65. package/dist/esm/events.mjs.map +1 -1
  66. package/dist/esm/graphs/Graph.mjs.map +1 -1
  67. package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
  68. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  69. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  70. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  71. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  72. package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
  73. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  74. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  75. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +16 -5
  76. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  77. package/dist/esm/llm/fake.mjs.map +1 -1
  78. package/dist/esm/llm/google/index.mjs.map +1 -1
  79. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  80. package/dist/esm/llm/openai/index.mjs.map +1 -1
  81. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  82. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  83. package/dist/esm/llm/providers.mjs.map +1 -1
  84. package/dist/esm/llm/text.mjs.map +1 -1
  85. package/dist/esm/llm/vertexai/index.mjs +68 -4
  86. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  87. package/dist/esm/messages/cache.mjs.map +1 -1
  88. package/dist/esm/messages/content.mjs.map +1 -1
  89. package/dist/esm/messages/core.mjs +1 -1
  90. package/dist/esm/messages/core.mjs.map +1 -1
  91. package/dist/esm/messages/format.mjs +2 -2
  92. package/dist/esm/messages/format.mjs.map +1 -1
  93. package/dist/esm/messages/ids.mjs.map +1 -1
  94. package/dist/esm/messages/prune.mjs +1 -1
  95. package/dist/esm/messages/prune.mjs.map +1 -1
  96. package/dist/esm/messages/tools.mjs.map +1 -1
  97. package/dist/esm/run.mjs.map +1 -1
  98. package/dist/esm/splitStream.mjs.map +1 -1
  99. package/dist/esm/stream.mjs +1 -1
  100. package/dist/esm/stream.mjs.map +1 -1
  101. package/dist/esm/tools/Calculator.mjs.map +1 -1
  102. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  103. package/dist/esm/tools/ProgrammaticToolCalling.mjs +1 -1
  104. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  105. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  106. package/dist/esm/tools/ToolSearch.mjs.map +1 -1
  107. package/dist/esm/tools/handlers.mjs +1 -1
  108. package/dist/esm/tools/handlers.mjs.map +1 -1
  109. package/dist/esm/tools/schema.mjs.map +1 -1
  110. package/dist/esm/tools/search/content.mjs.map +1 -1
  111. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  112. package/dist/esm/tools/search/format.mjs.map +1 -1
  113. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  114. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  115. package/dist/esm/tools/search/schema.mjs.map +1 -1
  116. package/dist/esm/tools/search/search.mjs +1 -0
  117. package/dist/esm/tools/search/search.mjs.map +1 -1
  118. package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
  119. package/dist/esm/tools/search/tool.mjs.map +1 -1
  120. package/dist/esm/tools/search/utils.mjs.map +1 -1
  121. package/dist/esm/utils/events.mjs.map +1 -1
  122. package/dist/esm/utils/graph.mjs.map +1 -1
  123. package/dist/esm/utils/handlers.mjs.map +1 -1
  124. package/dist/esm/utils/llm.mjs.map +1 -1
  125. package/dist/esm/utils/misc.mjs.map +1 -1
  126. package/dist/esm/utils/run.mjs.map +1 -1
  127. package/dist/esm/utils/schema.mjs.map +1 -1
  128. package/dist/esm/utils/title.mjs.map +1 -1
  129. package/dist/esm/utils/tokens.mjs.map +1 -1
  130. package/dist/types/llm/bedrock/utils/message_outputs.d.ts +1 -1
  131. package/dist/types/llm/vertexai/index.d.ts +1 -1
  132. package/package.json +6 -3
  133. package/src/llm/bedrock/llm.spec.ts +233 -4
  134. package/src/llm/bedrock/utils/message_outputs.ts +51 -11
  135. package/src/llm/vertexai/index.ts +99 -6
  136. package/src/llm/vertexai/llm.spec.ts +114 -0
  137. package/src/scripts/bedrock-cache-debug.ts +250 -0
  138. package/src/scripts/thinking-vertexai.ts +168 -0
@@ -1,13 +1,82 @@
1
1
  import { ChatGoogle } from '@langchain/google-gauth';
2
2
  import { ChatConnection } from '@langchain/google-common';
3
3
  import type {
4
+ GeminiContent,
4
5
  GeminiRequest,
5
6
  GoogleAIModelRequestParams,
6
7
  GoogleAbstractedClient,
7
8
  } from '@langchain/google-common';
8
9
  import type { BaseMessage } from '@langchain/core/messages';
10
+ import { isAIMessage } from '@langchain/core/messages';
9
11
  import type { GoogleThinkingConfig, VertexAIClientOptions } from '@/types';
10
12
 
13
+ type AdditionalKwargs =
14
+ | undefined
15
+ | (BaseMessage['additional_kwargs'] & {
16
+ signatures?: Array<string | undefined>;
17
+ });
18
+
19
+ /**
20
+ * Fixes thought signatures on functionCall parts in the formatted Gemini request.
21
+ *
22
+ * `@langchain/google-common` stores signatures as a flat array in
23
+ * `additional_kwargs.signatures` (one per response part) and re-attaches them
24
+ * by index only when `signatures.length === parts.length`. This fails when:
25
+ * - The API omits a signature (length mismatch)
26
+ * - Streaming chunks merge with different part counts
27
+ * - The signature for a functionCall part is an empty string
28
+ *
29
+ * This function correlates each "model" content block in the formatted request
30
+ * back to its originating AI message, then re-attaches non-empty signatures
31
+ * that the library failed to apply.
32
+ */
33
+ function fixThoughtSignatures(
34
+ contents: GeminiContent[],
35
+ input: BaseMessage[]
36
+ ): void {
37
+ // Collect AI messages that have signatures, in order
38
+ const aiMessages = input.filter(
39
+ (msg) =>
40
+ isAIMessage(msg) &&
41
+ Array.isArray((msg.additional_kwargs as AdditionalKwargs)?.signatures) &&
42
+ (msg.additional_kwargs.signatures as string[]).length > 0
43
+ );
44
+
45
+ // Collect "model" content blocks from the formatted request, in order
46
+ const modelContents = contents.filter((c) => c.role === 'model');
47
+
48
+ // They should correspond 1:1 in order (both derived from the same input sequence)
49
+ const count = Math.min(aiMessages.length, modelContents.length);
50
+ for (let i = 0; i < count; i++) {
51
+ const msg = aiMessages[i];
52
+ const content = modelContents[i];
53
+ const signatures = (msg.additional_kwargs as AdditionalKwargs)?.signatures;
54
+
55
+ // Collect non-empty signatures that aren't already attached to any part
56
+ const attachedSignatures = new Set(
57
+ content.parts
58
+ .map((p) => p.thoughtSignature)
59
+ .filter((s): s is string => s != null && s !== '')
60
+ );
61
+ const availableSignatures = signatures?.filter(
62
+ (s) => s != null && s !== '' && !attachedSignatures.has(s)
63
+ );
64
+
65
+ // Assign available signatures to functionCall parts missing one, in order
66
+ let sigIdx = 0;
67
+ for (const part of content.parts) {
68
+ if (
69
+ 'functionCall' in part &&
70
+ (part.thoughtSignature == null || part.thoughtSignature === '') &&
71
+ sigIdx < (availableSignatures?.length ?? 0)
72
+ ) {
73
+ part.thoughtSignature = availableSignatures?.[sigIdx];
74
+ sigIdx++;
75
+ }
76
+ }
77
+ }
78
+ }
79
+
11
80
  class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
12
81
  thinkingConfig?: GoogleThinkingConfig;
13
82
 
@@ -28,15 +97,36 @@ class CustomChatConnection extends ChatConnection<VertexAIClientOptions> {
28
97
  }
29
98
  delete formattedData.generationConfig.thinkingConfig.thinkingBudget;
30
99
  }
31
- if (this.thinkingConfig?.thinkingLevel) {
100
+ if (
101
+ this.thinkingConfig?.thinkingLevel != null &&
102
+ this.thinkingConfig.thinkingLevel !== ''
103
+ ) {
32
104
  formattedData.generationConfig ??= {};
105
+ // thinkingLevel and thinkingBudget cannot coexist — the API rejects the request.
106
+ // Remove thinkingBudget when thinkingLevel is set.
107
+ const { thinkingBudget: _, ...existingThinkingConfig } =
108
+ (formattedData.generationConfig.thinkingConfig as
109
+ | Record<string, unknown>
110
+ | undefined) ?? {};
33
111
  (
34
112
  formattedData.generationConfig as Record<string, unknown>
35
113
  ).thinkingConfig = {
36
- ...formattedData.generationConfig.thinkingConfig,
114
+ ...existingThinkingConfig,
37
115
  thinkingLevel: this.thinkingConfig.thinkingLevel,
116
+ ...(this.thinkingConfig.includeThoughts != null && {
117
+ includeThoughts: this.thinkingConfig.includeThoughts,
118
+ }),
38
119
  };
39
120
  }
121
+ if (formattedData.contents) {
122
+ fixThoughtSignatures(formattedData.contents, input);
123
+ // gemini-3.1+ models reject role="function"; convert to role="user"
124
+ for (const content of formattedData.contents) {
125
+ if (content.role === 'function') {
126
+ (content as { role: string }).role = 'user';
127
+ }
128
+ }
129
+ }
40
130
  return formattedData;
41
131
  }
42
132
  }
@@ -350,18 +440,21 @@ export class ChatVertexAI extends ChatGoogle {
350
440
  }
351
441
  return params;
352
442
  }
353
-
354
443
  buildConnection(
355
- fields: VertexAIClientOptions,
444
+ fields: VertexAIClientOptions | undefined,
356
445
  client: GoogleAbstractedClient
357
446
  ): void {
447
+ // Note: buildConnection is called from super() BEFORE this.thinkingConfig is set,
448
+ // so we must read thinkingConfig from `fields` directly.
449
+ const thinkingConfig = fields?.thinkingConfig ?? this.thinkingConfig;
450
+
358
451
  const connection = new CustomChatConnection(
359
452
  { ...fields, ...this },
360
453
  this.caller,
361
454
  client,
362
455
  false
363
456
  );
364
- connection.thinkingConfig = this.thinkingConfig;
457
+ connection.thinkingConfig = thinkingConfig;
365
458
  this.connection = connection;
366
459
 
367
460
  const streamedConnection = new CustomChatConnection(
@@ -370,7 +463,7 @@ export class ChatVertexAI extends ChatGoogle {
370
463
  client,
371
464
  true
372
465
  );
373
- streamedConnection.thinkingConfig = this.thinkingConfig;
466
+ streamedConnection.thinkingConfig = thinkingConfig;
374
467
  this.streamedConnection = streamedConnection;
375
468
  }
376
469
  }
@@ -0,0 +1,114 @@
1
+ import { config } from 'dotenv';
2
+ config();
3
+ import { test, describe, jest } from '@jest/globals';
4
+
5
+ jest.setTimeout(90000);
6
+ import {
7
+ AIMessageChunk,
8
+ HumanMessage,
9
+ ToolMessage,
10
+ } from '@langchain/core/messages';
11
+ import { tool } from '@langchain/core/tools';
12
+ import { z } from 'zod/v3';
13
+ import { ChatVertexAI } from './index';
14
+
15
+ const gemini3Models = [
16
+ 'gemini-3-pro-preview',
17
+ 'gemini-3-flash-preview',
18
+ 'gemini-3.1-flash-lite-preview',
19
+ ];
20
+
21
+ const weatherTool = tool(async () => 'The weather is 80 degrees and sunny', {
22
+ name: 'weather',
23
+ description: 'Gets the current weather in a given location',
24
+ schema: z.object({
25
+ location: z.string().describe('The city to get the weather for'),
26
+ }),
27
+ });
28
+
29
+ describe.each(gemini3Models)(
30
+ 'Vertex AI reasoning with thinkingLevel (%s)',
31
+ (modelName) => {
32
+ const model = new ChatVertexAI({
33
+ model: modelName,
34
+ location: 'global',
35
+ maxRetries: 2,
36
+ thinkingConfig: {
37
+ thinkingLevel: 'HIGH',
38
+ includeThoughts: true,
39
+ },
40
+ });
41
+
42
+ test('invoke with thinkingLevel produces a response with reasoning tokens', async () => {
43
+ const result = await model.invoke('What is 2+2? Think step by step.');
44
+ expect(result.content).toBeDefined();
45
+ const reasoningTokens = (result.usage_metadata as Record<string, unknown>)
46
+ ?.output_token_details;
47
+ expect(reasoningTokens).toBeDefined();
48
+ expect(
49
+ (reasoningTokens as Record<string, number>)?.reasoning
50
+ ).toBeGreaterThan(0);
51
+ });
52
+ }
53
+ );
54
+
55
+ describe.each(gemini3Models)(
56
+ 'Vertex AI tool calling with thought signatures (%s)',
57
+ (modelName) => {
58
+ const model = new ChatVertexAI({
59
+ model: modelName,
60
+ location: 'global',
61
+ maxRetries: 2,
62
+ });
63
+ const modelWithTools = model.bindTools([weatherTool]);
64
+
65
+ test('invoke: tool call completes round-trip with thought signature', async () => {
66
+ const result = await modelWithTools.invoke(
67
+ 'What is the current weather in San Francisco?'
68
+ );
69
+ expect(result.tool_calls).toBeDefined();
70
+ expect(result.tool_calls!.length).toBeGreaterThanOrEqual(1);
71
+ expect(result.tool_calls![0].id).toBeDefined();
72
+
73
+ const toolMessage = new ToolMessage({
74
+ content: 'The weather is 80 degrees and sunny',
75
+ tool_call_id: result.tool_calls![0].id ?? '',
76
+ });
77
+
78
+ // Critical round-trip: sending the function call + tool result back to the API.
79
+ // Without proper thought_signature handling, this fails with
80
+ // "function call X is missing a thought_signature"
81
+ const finalResult = await model.invoke([
82
+ new HumanMessage('What is the current weather in San Francisco?'),
83
+ result,
84
+ toolMessage,
85
+ ]);
86
+ expect(finalResult.content).toBeDefined();
87
+ });
88
+
89
+ test('stream: tool call completes round-trip with thought signature', async () => {
90
+ let finalChunk: AIMessageChunk | undefined;
91
+ for await (const chunk of await modelWithTools.stream(
92
+ 'What is the current weather in San Francisco?'
93
+ )) {
94
+ finalChunk = finalChunk ? finalChunk.concat(chunk) : chunk;
95
+ }
96
+ expect(finalChunk).toBeDefined();
97
+ expect(finalChunk?.tool_calls).toBeDefined();
98
+ expect(finalChunk?.tool_calls!.length).toBeGreaterThanOrEqual(1);
99
+
100
+ const toolMessage = new ToolMessage({
101
+ content: 'The weather is 80 degrees and sunny',
102
+ tool_call_id: finalChunk?.tool_calls![0].id ?? '',
103
+ });
104
+
105
+ // Round-trip: send tool result back — verifies thought_signature handling
106
+ const finalResult = await model.invoke([
107
+ new HumanMessage('What is the current weather in San Francisco?'),
108
+ finalChunk!,
109
+ toolMessage,
110
+ ]);
111
+ expect(finalResult.content).toBeDefined();
112
+ });
113
+ }
114
+ );
@@ -0,0 +1,250 @@
1
+ /**
2
+ * Debug script to investigate cache token omission in Bedrock responses.
3
+ *
4
+ * This script:
5
+ * 1. Makes a streaming call to Bedrock and logs the raw metadata event
6
+ * 2. Shows exactly what fields the AWS SDK returns in usage (including cache tokens)
7
+ * 3. Shows what our handleConverseStreamMetadata produces vs what it should produce
8
+ * 4. Makes a multi-turn call to trigger caching and verify cache tokens appear
9
+ */
10
+ import { config } from 'dotenv';
11
+ config();
12
+ import { HumanMessage } from '@langchain/core/messages';
13
+ import type { AIMessageChunk } from '@langchain/core/messages';
14
+ import { concat } from '@langchain/core/utils/stream';
15
+ import {
16
+ ConverseStreamCommand,
17
+ BedrockRuntimeClient,
18
+ } from '@aws-sdk/client-bedrock-runtime';
19
+ import { CustomChatBedrockConverse } from '@/llm/bedrock';
20
+
21
+ const region = process.env.BEDROCK_AWS_REGION ?? 'us-east-1';
22
+ const credentials = {
23
+ accessKeyId: process.env.BEDROCK_AWS_ACCESS_KEY_ID!,
24
+ secretAccessKey: process.env.BEDROCK_AWS_SECRET_ACCESS_KEY!,
25
+ };
26
+
27
+ const MODEL_ID = 'us.anthropic.claude-sonnet-4-5-20250929-v1:0';
28
+
29
+ // A long system prompt to increase likelihood of cache usage
30
+ // Bedrock requires minimum 1024 tokens for prompt caching to activate
31
+ const SYSTEM_PROMPT = `You are an expert assistant. Here is a large context block to help trigger cache behavior:
32
+
33
+ ${Array(200).fill('This is padding content to make the prompt large enough to trigger Bedrock prompt caching. The minimum requirement for Anthropic models on Bedrock is 1024 tokens in the cached prefix. We need to ensure this prompt is well above that threshold. ').join('')}
34
+
35
+ When answering, be brief and direct.`;
36
+
37
+ async function rawSdkCall(): Promise<void> {
38
+ console.log('='.repeat(60));
39
+ console.log('TEST 1: Raw AWS SDK call - inspect metadata.usage directly');
40
+ console.log('='.repeat(60));
41
+
42
+ const client = new BedrockRuntimeClient({ region, credentials });
43
+
44
+ // First call - should create cache
45
+ // Use cachePoint block to explicitly enable prompt caching
46
+ console.log('\n--- Call 1 (cache write expected) ---');
47
+ const command1 = new ConverseStreamCommand({
48
+ modelId: MODEL_ID,
49
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
50
+ messages: [{ role: 'user', content: [{ text: 'What is 2+2?' }] }],
51
+ inferenceConfig: { maxTokens: 100 },
52
+ });
53
+
54
+ const response1 = await client.send(command1);
55
+ if (response1.stream) {
56
+ for await (const event of response1.stream) {
57
+ if (event.metadata != null) {
58
+ console.log('\nRAW metadata event (Call 1):');
59
+ console.dir(event.metadata, { depth: null });
60
+ console.log('\nRAW metadata.usage:');
61
+ console.dir(event.metadata.usage, { depth: null });
62
+ console.log('\nSpecific cache fields:');
63
+ console.log(
64
+ ' cacheReadInputTokens:',
65
+ (event.metadata.usage as Record<string, unknown>)
66
+ ?.cacheReadInputTokens
67
+ );
68
+ console.log(
69
+ ' cacheWriteInputTokens:',
70
+ (event.metadata.usage as Record<string, unknown>)
71
+ ?.cacheWriteInputTokens
72
+ );
73
+ }
74
+ }
75
+ }
76
+
77
+ // Second call - should read from cache
78
+ console.log('\n--- Call 2 (cache read expected) ---');
79
+ const command2 = new ConverseStreamCommand({
80
+ modelId: MODEL_ID,
81
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
82
+ messages: [
83
+ { role: 'user', content: [{ text: 'What is 2+2?' }] },
84
+ { role: 'assistant', content: [{ text: '4' }] },
85
+ { role: 'user', content: [{ text: 'And what is 3+3?' }] },
86
+ ],
87
+ inferenceConfig: { maxTokens: 100 },
88
+ });
89
+
90
+ const response2 = await client.send(command2);
91
+ if (response2.stream) {
92
+ for await (const event of response2.stream) {
93
+ if (event.metadata != null) {
94
+ console.log('\nRAW metadata event (Call 2):');
95
+ console.dir(event.metadata, { depth: null });
96
+ console.log('\nRAW metadata.usage:');
97
+ console.dir(event.metadata.usage, { depth: null });
98
+ console.log('\nSpecific cache fields:');
99
+ console.log(
100
+ ' cacheReadInputTokens:',
101
+ (event.metadata.usage as Record<string, unknown>)
102
+ ?.cacheReadInputTokens
103
+ );
104
+ console.log(
105
+ ' cacheWriteInputTokens:',
106
+ (event.metadata.usage as Record<string, unknown>)
107
+ ?.cacheWriteInputTokens
108
+ );
109
+ }
110
+ }
111
+ }
112
+ }
113
+
114
+ async function wrapperStreamCallNoCachePoint(): Promise<void> {
115
+ console.log('\n' + '='.repeat(60));
116
+ console.log(
117
+ 'TEST 2: CustomChatBedrockConverse stream (NO cachePoint) - check usage_metadata'
118
+ );
119
+ console.log('='.repeat(60));
120
+ console.log('(Without cachePoint, Bedrock does NOT return cache tokens)');
121
+
122
+ const model = new CustomChatBedrockConverse({
123
+ model: MODEL_ID,
124
+ region,
125
+ credentials,
126
+ maxTokens: 100,
127
+ streaming: true,
128
+ streamUsage: true,
129
+ });
130
+
131
+ console.log('\n--- Wrapper Call (no cachePoint) ---');
132
+ const messages1 = [new HumanMessage(SYSTEM_PROMPT + '\n\nWhat is 2+2?')];
133
+ let finalChunk1: AIMessageChunk | undefined;
134
+
135
+ for await (const chunk of await model.stream(messages1)) {
136
+ finalChunk1 = finalChunk1 ? concat(finalChunk1, chunk) : chunk;
137
+ }
138
+
139
+ console.log(
140
+ '\nFinal usage_metadata:',
141
+ JSON.stringify(finalChunk1!.usage_metadata)
142
+ );
143
+ console.log('(No cache tokens expected since no cachePoint block was sent)');
144
+ }
145
+
146
+ async function wrapperStreamCallWithCachePoint(): Promise<void> {
147
+ console.log('\n' + '='.repeat(60));
148
+ console.log(
149
+ 'TEST 3: Raw SDK with cachePoint -> verify handleConverseStreamMetadata extracts cache tokens'
150
+ );
151
+ console.log('='.repeat(60));
152
+
153
+ // We use the raw SDK with cachePoint to trigger caching, then verify
154
+ // that our handleConverseStreamMetadata function properly extracts cache fields
155
+ const { handleConverseStreamMetadata } = await import(
156
+ '@/llm/bedrock/utils/message_outputs'
157
+ );
158
+
159
+ const client = new BedrockRuntimeClient({ region, credentials });
160
+
161
+ // Call 1 - establish cache
162
+ console.log('\n--- Call 1 (cache write) ---');
163
+ const command1 = new ConverseStreamCommand({
164
+ modelId: MODEL_ID,
165
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
166
+ messages: [{ role: 'user', content: [{ text: 'What is 2+2?' }] }],
167
+ inferenceConfig: { maxTokens: 100 },
168
+ });
169
+
170
+ const response1 = await client.send(command1);
171
+ if (response1.stream) {
172
+ for await (const event of response1.stream) {
173
+ if (event.metadata != null) {
174
+ console.log('Raw usage:', JSON.stringify(event.metadata.usage));
175
+
176
+ // Test our handler
177
+ const chunk = handleConverseStreamMetadata(event.metadata, {
178
+ streamUsage: true,
179
+ });
180
+ console.log(
181
+ 'handleConverseStreamMetadata output usage_metadata:',
182
+ JSON.stringify(chunk.message.usage_metadata)
183
+ );
184
+
185
+ const hasDetails =
186
+ chunk.message.usage_metadata?.input_token_details != null;
187
+ console.log(
188
+ `Has input_token_details: ${hasDetails}`,
189
+ hasDetails
190
+ ? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
191
+ : '(MISSING - BUG!)'
192
+ );
193
+ }
194
+ }
195
+ }
196
+
197
+ // Call 2 - read from cache
198
+ console.log('\n--- Call 2 (cache read) ---');
199
+ const command2 = new ConverseStreamCommand({
200
+ modelId: MODEL_ID,
201
+ system: [{ text: SYSTEM_PROMPT }, { cachePoint: { type: 'default' } }],
202
+ messages: [
203
+ { role: 'user', content: [{ text: 'What is 2+2?' }] },
204
+ { role: 'assistant', content: [{ text: '4' }] },
205
+ { role: 'user', content: [{ text: 'What is 3+3?' }] },
206
+ ],
207
+ inferenceConfig: { maxTokens: 100 },
208
+ });
209
+
210
+ const response2 = await client.send(command2);
211
+ if (response2.stream) {
212
+ for await (const event of response2.stream) {
213
+ if (event.metadata != null) {
214
+ console.log('Raw usage:', JSON.stringify(event.metadata.usage));
215
+
216
+ const chunk = handleConverseStreamMetadata(event.metadata, {
217
+ streamUsage: true,
218
+ });
219
+ console.log(
220
+ 'handleConverseStreamMetadata output usage_metadata:',
221
+ JSON.stringify(chunk.message.usage_metadata)
222
+ );
223
+
224
+ const hasDetails =
225
+ chunk.message.usage_metadata?.input_token_details != null;
226
+ console.log(
227
+ `Has input_token_details: ${hasDetails}`,
228
+ hasDetails
229
+ ? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
230
+ : '(MISSING - BUG!)'
231
+ );
232
+ }
233
+ }
234
+ }
235
+ }
236
+
237
+ async function main(): Promise<void> {
238
+ console.log('Bedrock Cache Token Debug Script');
239
+ console.log(`Model: ${MODEL_ID}`);
240
+ console.log(`Region: ${region}\n`);
241
+
242
+ await rawSdkCall();
243
+ await wrapperStreamCallNoCachePoint();
244
+ await wrapperStreamCallWithCachePoint();
245
+ }
246
+
247
+ main().catch((err) => {
248
+ console.error('Fatal error:', err);
249
+ process.exit(1);
250
+ });
@@ -0,0 +1,168 @@
1
+ // src/scripts/thinking-vertexai.ts
2
+ import { config } from 'dotenv';
3
+ config();
4
+ import { HumanMessage, BaseMessage } from '@langchain/core/messages';
5
+ import type { UsageMetadata } from '@langchain/core/messages';
6
+ import * as t from '@/types';
7
+ import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
8
+ import { ToolEndHandler, ModelEndHandler } from '@/events';
9
+ import { GraphEvents, Providers } from '@/common';
10
+ import { getLLMConfig } from '@/utils/llmConfig';
11
+ import { getArgs } from '@/scripts/args';
12
+ import { Run } from '@/run';
13
+
14
+ const conversationHistory: BaseMessage[] = [];
15
+ let _contentParts: t.MessageContentComplex[] = [];
16
+ const collectedUsage: UsageMetadata[] = [];
17
+
18
+ async function testVertexAIThinking(): Promise<void> {
19
+ const { userName } = await getArgs();
20
+ const instructions = `You are a helpful AI assistant for ${userName}. When answering questions, be thorough in your reasoning.`;
21
+ const { contentParts, aggregateContent } = createContentAggregator();
22
+ _contentParts = contentParts as t.MessageContentComplex[];
23
+
24
+ // Set up event handlers
25
+ const customHandlers = {
26
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
27
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
28
+ [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
29
+ [GraphEvents.ON_RUN_STEP_COMPLETED]: {
30
+ handle: (
31
+ event: GraphEvents.ON_RUN_STEP_COMPLETED,
32
+ data: t.StreamEventData
33
+ ): void => {
34
+ console.log('====== ON_RUN_STEP_COMPLETED ======');
35
+ aggregateContent({
36
+ event,
37
+ data: data as unknown as { result: t.ToolEndEvent },
38
+ });
39
+ },
40
+ },
41
+ [GraphEvents.ON_RUN_STEP]: {
42
+ handle: (event: GraphEvents.ON_RUN_STEP, data: t.RunStep) => {
43
+ aggregateContent({ event, data });
44
+ },
45
+ },
46
+ [GraphEvents.ON_RUN_STEP_DELTA]: {
47
+ handle: (
48
+ event: GraphEvents.ON_RUN_STEP_DELTA,
49
+ data: t.RunStepDeltaEvent
50
+ ) => {
51
+ aggregateContent({ event, data });
52
+ },
53
+ },
54
+ [GraphEvents.ON_MESSAGE_DELTA]: {
55
+ handle: (
56
+ event: GraphEvents.ON_MESSAGE_DELTA,
57
+ data: t.MessageDeltaEvent
58
+ ) => {
59
+ aggregateContent({ event, data });
60
+ },
61
+ },
62
+ [GraphEvents.ON_REASONING_DELTA]: {
63
+ handle: (
64
+ event: GraphEvents.ON_REASONING_DELTA,
65
+ data: t.ReasoningDeltaEvent
66
+ ) => {
67
+ console.log(
68
+ '[ON_REASONING_DELTA]',
69
+ JSON.stringify(data.delta.content?.[0]).slice(0, 100)
70
+ );
71
+ aggregateContent({ event, data });
72
+ },
73
+ },
74
+ };
75
+
76
+ const baseLlmConfig = getLLMConfig(Providers.VERTEXAI);
77
+
78
+ const llmConfig = {
79
+ ...baseLlmConfig,
80
+ model: 'gemini-3-flash-preview',
81
+ location: 'global',
82
+ streaming: true,
83
+ streamUsage: true,
84
+ thinkingConfig: {
85
+ thinkingLevel: 'HIGH',
86
+ includeThoughts: true,
87
+ },
88
+ };
89
+
90
+ const run = await Run.create<t.IState>({
91
+ runId: 'test-vertexai-thinking-id',
92
+ graphConfig: {
93
+ instructions,
94
+ type: 'standard',
95
+ llmConfig,
96
+ },
97
+ returnContent: true,
98
+ skipCleanup: true,
99
+ customHandlers: customHandlers as t.RunConfig['customHandlers'],
100
+ });
101
+
102
+ const streamConfig = {
103
+ configurable: {
104
+ thread_id: 'vertexai-thinking-test-thread',
105
+ },
106
+ streamMode: 'values',
107
+ version: 'v2' as const,
108
+ };
109
+
110
+ // Test 1: Regular thinking mode
111
+ console.log('\n\nTest 1: Vertex AI thinking mode with thinkingLevel=HIGH');
112
+ const userMessage1 =
113
+ 'How many r\'s are in the word "strawberry"? Think carefully.';
114
+ conversationHistory.push(new HumanMessage(userMessage1));
115
+
116
+ console.log('Running first query with Vertex AI thinking enabled...');
117
+ const firstInputs = { messages: [...conversationHistory] };
118
+ await run.processStream(firstInputs, streamConfig);
119
+
120
+ // Extract and display results
121
+ const finalMessages = run.getRunMessages();
122
+ console.log('\n\nFinal messages after Test 1:');
123
+ console.dir(finalMessages, { depth: null });
124
+
125
+ // Test 2: Multi-turn conversation
126
+ console.log(
127
+ '\n\nTest 2: Multi-turn conversation with Vertex AI thinking enabled'
128
+ );
129
+ const userMessage2 =
130
+ 'Now count the number of letters in "Mississippi". Explain step by step.';
131
+ conversationHistory.push(new HumanMessage(userMessage2));
132
+
133
+ console.log('Running second query with Vertex AI thinking enabled...');
134
+ const secondInputs = { messages: [...conversationHistory] };
135
+ await run.processStream(secondInputs, streamConfig);
136
+
137
+ const finalMessages2 = run.getRunMessages();
138
+ console.log('\n\nVertex AI thinking feature test completed!');
139
+ console.dir(finalMessages2, { depth: null });
140
+
141
+ console.log('\n\nContent parts:');
142
+ console.dir(_contentParts, { depth: null });
143
+
144
+ console.log('\n\nCollected usage:');
145
+ console.dir(collectedUsage, { depth: null });
146
+ }
147
+
148
+ process.on('unhandledRejection', (reason, promise) => {
149
+ console.error('Unhandled Rejection at:', promise, 'reason:', reason);
150
+ console.log('Conversation history:');
151
+ console.dir(conversationHistory, { depth: null });
152
+ console.log('Content parts:');
153
+ console.dir(_contentParts, { depth: null });
154
+ process.exit(1);
155
+ });
156
+
157
+ process.on('uncaughtException', (err) => {
158
+ console.error('Uncaught Exception:', err);
159
+ });
160
+
161
+ testVertexAIThinking().catch((err) => {
162
+ console.error(err);
163
+ console.log('Conversation history:');
164
+ console.dir(conversationHistory, { depth: null });
165
+ console.log('Content parts:');
166
+ console.dir(_contentParts, { depth: null });
167
+ process.exit(1);
168
+ });