@librechat/agents 3.1.55 → 3.1.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/cjs/graphs/Graph.cjs +1 -1
  2. package/dist/cjs/llm/openai/index.cjs +1 -1
  3. package/dist/cjs/main.cjs +1 -0
  4. package/dist/cjs/main.cjs.map +1 -1
  5. package/dist/cjs/messages/format.cjs +118 -32
  6. package/dist/cjs/messages/format.cjs.map +1 -1
  7. package/dist/cjs/run.cjs +5 -2
  8. package/dist/cjs/run.cjs.map +1 -1
  9. package/dist/cjs/stream.cjs +9 -0
  10. package/dist/cjs/stream.cjs.map +1 -1
  11. package/dist/cjs/tools/ToolNode.cjs +1 -1
  12. package/dist/cjs/utils/tokens.cjs +33 -45
  13. package/dist/cjs/utils/tokens.cjs.map +1 -1
  14. package/dist/esm/graphs/Graph.mjs +1 -1
  15. package/dist/esm/llm/openai/index.mjs +1 -1
  16. package/dist/esm/main.mjs +1 -1
  17. package/dist/esm/messages/format.mjs +119 -33
  18. package/dist/esm/messages/format.mjs.map +1 -1
  19. package/dist/esm/run.mjs +5 -2
  20. package/dist/esm/run.mjs.map +1 -1
  21. package/dist/esm/stream.mjs +9 -0
  22. package/dist/esm/stream.mjs.map +1 -1
  23. package/dist/esm/tools/ToolNode.mjs +1 -1
  24. package/dist/esm/utils/tokens.mjs +33 -46
  25. package/dist/esm/utils/tokens.mjs.map +1 -1
  26. package/dist/types/types/graph.d.ts +2 -0
  27. package/dist/types/types/stream.d.ts +2 -0
  28. package/dist/types/utils/tokens.d.ts +6 -18
  29. package/package.json +3 -2
  30. package/src/messages/ensureThinkingBlock.test.ts +502 -27
  31. package/src/messages/format.ts +155 -44
  32. package/src/run.ts +6 -2
  33. package/src/scripts/bedrock-cache-debug.ts +15 -15
  34. package/src/scripts/code_exec_multi_session.ts +8 -13
  35. package/src/scripts/image.ts +2 -1
  36. package/src/scripts/multi-agent-parallel-start.ts +3 -4
  37. package/src/scripts/multi-agent-sequence.ts +3 -4
  38. package/src/scripts/single-agent-metadata-test.ts +3 -6
  39. package/src/scripts/test-tool-before-handoff-role-order.ts +2 -3
  40. package/src/scripts/test-tools-before-handoff.ts +2 -3
  41. package/src/scripts/tools.ts +1 -7
  42. package/src/specs/token-memoization.test.ts +35 -34
  43. package/src/specs/tokens.test.ts +64 -0
  44. package/src/stream.ts +12 -0
  45. package/src/types/graph.ts +2 -0
  46. package/src/types/stream.ts +2 -0
  47. package/src/utils/tokens.ts +43 -54
@@ -6,7 +6,6 @@ import {
6
6
  BaseMessage,
7
7
  HumanMessage,
8
8
  SystemMessage,
9
- getBufferString,
10
9
  } from '@langchain/core/messages';
11
10
  import type { MessageContentImageUrl } from '@langchain/core/messages';
12
11
  import type { ToolCall } from '@langchain/core/messages/tool';
@@ -998,6 +997,113 @@ export function shiftIndexTokenCountMap(
998
997
  return shiftedMap;
999
998
  }
1000
999
 
1000
+ /** Block types that contain binary image data and must be preserved structurally. */
1001
+ const IMAGE_BLOCK_TYPES = new Set(['image_url', 'image']);
1002
+
1003
+ /** Checks whether a BaseMessage is a tool-role message. */
1004
+ const isToolMessage = (m: BaseMessage): boolean =>
1005
+ m instanceof ToolMessage || ('role' in m && (m as any).role === 'tool');
1006
+
1007
+ /** Flushes accumulated text chunks into `parts` as a single text block. */
1008
+ function flushTextChunks(
1009
+ textChunks: string[],
1010
+ parts: MessageContentComplex[]
1011
+ ): void {
1012
+ if (textChunks.length === 0) {
1013
+ return;
1014
+ }
1015
+ parts.push({
1016
+ type: ContentTypes.TEXT,
1017
+ text: textChunks.join('\n'),
1018
+ } as MessageContentComplex);
1019
+ textChunks.length = 0;
1020
+ }
1021
+
1022
+ /**
1023
+ * Appends a single message's content to the running `textChunks` / `parts`
1024
+ * accumulators. Image blocks are shallow-copied into `parts` as-is so that
1025
+ * binary data (base64 images) never becomes text tokens. All other block
1026
+ * types are serialized to text — unrecognized types are JSON-serialized
1027
+ * rather than silently dropped.
1028
+ *
1029
+ * When `content` is an array containing tool_use blocks, `tool_calls` is NOT
1030
+ * additionally serialized (avoiding double output). `tool_calls` is used as
1031
+ * a fallback when `content` is a plain string or an array with no tool_use.
1032
+ */
1033
+ function appendMessageContent(
1034
+ msg: BaseMessage,
1035
+ role: string,
1036
+ textChunks: string[],
1037
+ parts: MessageContentComplex[]
1038
+ ): void {
1039
+ const { content } = msg;
1040
+
1041
+ if (typeof content === 'string') {
1042
+ if (content) {
1043
+ textChunks.push(`${role}: ${content}`);
1044
+ }
1045
+ appendToolCalls(msg, role, textChunks);
1046
+ return;
1047
+ }
1048
+
1049
+ if (!Array.isArray(content)) {
1050
+ appendToolCalls(msg, role, textChunks);
1051
+ return;
1052
+ }
1053
+
1054
+ let hasToolUseBlock = false;
1055
+
1056
+ for (const block of content as ExtendedMessageContent[]) {
1057
+ if (IMAGE_BLOCK_TYPES.has(block.type ?? '')) {
1058
+ flushTextChunks(textChunks, parts);
1059
+ parts.push({ ...block } as MessageContentComplex);
1060
+ continue;
1061
+ }
1062
+
1063
+ if (block.type === 'tool_use') {
1064
+ hasToolUseBlock = true;
1065
+ textChunks.push(
1066
+ `${role}: [tool_use] ${String(block.name ?? '')} ${JSON.stringify(block.input ?? {})}`
1067
+ );
1068
+ continue;
1069
+ }
1070
+
1071
+ const text = block.text ?? block.input;
1072
+ if (typeof text === 'string' && text) {
1073
+ textChunks.push(`${role}: ${text}`);
1074
+ continue;
1075
+ }
1076
+
1077
+ // Fallback: serialize unrecognized block types to preserve context
1078
+ if (block.type != null && block.type !== '') {
1079
+ textChunks.push(`${role}: [${block.type}] ${JSON.stringify(block)}`);
1080
+ }
1081
+ }
1082
+
1083
+ // If content array had no tool_use blocks, fall back to tool_calls metadata
1084
+ // (handles edge case: empty content array with tool_calls populated)
1085
+ if (!hasToolUseBlock) {
1086
+ appendToolCalls(msg, role, textChunks);
1087
+ }
1088
+ }
1089
+
1090
+ function appendToolCalls(
1091
+ msg: BaseMessage,
1092
+ role: string,
1093
+ textChunks: string[]
1094
+ ): void {
1095
+ if (role !== 'AI') {
1096
+ return;
1097
+ }
1098
+ const aiMsg = msg as AIMessage;
1099
+ if (!aiMsg.tool_calls || aiMsg.tool_calls.length === 0) {
1100
+ return;
1101
+ }
1102
+ for (const tc of aiMsg.tool_calls) {
1103
+ textChunks.push(`AI: [tool_call] ${tc.name}(${JSON.stringify(tc.args)})`);
1104
+ }
1105
+ }
1106
+
1001
1107
  /**
1002
1108
  * Ensures compatibility when switching from a non-thinking agent to a thinking-enabled agent.
1003
1109
  * Converts AI messages with tool calls (that lack thinking/reasoning blocks) into buffer strings,
@@ -1021,19 +1127,27 @@ export function ensureThinkingBlockInMessages(
1021
1127
  return messages;
1022
1128
  }
1023
1129
 
1024
- // If the last message is already a HumanMessage, there is no trailing tool
1025
- // sequence to convert return early to preserve prompt caching and avoid
1026
- // redundant token overhead from re-processing the entire history.
1027
- const lastMsg = messages[messages.length - 1];
1028
- const lastIsHuman =
1029
- lastMsg instanceof HumanMessage ||
1030
- ('role' in lastMsg && (lastMsg as any).role === 'user');
1031
- if (lastIsHuman) {
1130
+ // Find the last HumanMessage. Only the trailing sequence after it needs
1131
+ // validation earlier messages are history already accepted by the provider.
1132
+ let lastHumanIndex = -1;
1133
+ for (let k = messages.length - 1; k >= 0; k--) {
1134
+ const m = messages[k];
1135
+ if (
1136
+ m instanceof HumanMessage ||
1137
+ ('role' in m && (m as any).role === 'user')
1138
+ ) {
1139
+ lastHumanIndex = k;
1140
+ break;
1141
+ }
1142
+ }
1143
+
1144
+ if (lastHumanIndex === messages.length - 1) {
1032
1145
  return messages;
1033
1146
  }
1034
1147
 
1035
- const result: BaseMessage[] = [];
1036
- let i = 0;
1148
+ const result: BaseMessage[] =
1149
+ lastHumanIndex >= 0 ? messages.slice(0, lastHumanIndex + 1) : [];
1150
+ let i = lastHumanIndex + 1;
1037
1151
 
1038
1152
  while (i < messages.length) {
1039
1153
  const msg = messages[i];
@@ -1059,21 +1173,24 @@ export function ensureThinkingBlockInMessages(
1059
1173
  let hasThinkingBlock = false;
1060
1174
 
1061
1175
  if (contentIsArray && aiMsg.content.length > 0) {
1062
- const content = aiMsg.content as ExtendedMessageContent[];
1063
- hasToolUse =
1064
- hasToolUse ||
1065
- content.some((c) => typeof c === 'object' && c.type === 'tool_use');
1066
- // Check ALL content blocks for thinking/reasoning, not just [0].
1067
- // Bedrock may emit a whitespace text chunk before the thinking block,
1068
- // pushing the reasoning_content to index 1+.
1069
- hasThinkingBlock = content.some(
1070
- (c) =>
1071
- typeof c === 'object' &&
1072
- (c.type === ContentTypes.THINKING ||
1073
- c.type === ContentTypes.REASONING_CONTENT ||
1074
- c.type === ContentTypes.REASONING ||
1075
- c.type === 'redacted_thinking')
1076
- );
1176
+ for (const c of aiMsg.content as ExtendedMessageContent[]) {
1177
+ if (typeof c !== 'object') {
1178
+ continue;
1179
+ }
1180
+ if (c.type === 'tool_use') {
1181
+ hasToolUse = true;
1182
+ } else if (
1183
+ c.type === ContentTypes.THINKING ||
1184
+ c.type === ContentTypes.REASONING_CONTENT ||
1185
+ c.type === ContentTypes.REASONING ||
1186
+ c.type === 'redacted_thinking'
1187
+ ) {
1188
+ hasThinkingBlock = true;
1189
+ }
1190
+ if (hasToolUse && hasThinkingBlock) {
1191
+ break;
1192
+ }
1193
+ }
1077
1194
  }
1078
1195
 
1079
1196
  // Bedrock also stores reasoning in additional_kwargs (may not be in content array)
@@ -1100,28 +1217,22 @@ export function ensureThinkingBlockInMessages(
1100
1217
  continue;
1101
1218
  }
1102
1219
 
1103
- // Collect the AI message and any following tool messages
1104
- const toolSequence: BaseMessage[] = [msg];
1105
- let j = i + 1;
1220
+ // Build structured content in a single pass over the AI + following
1221
+ // ToolMessages preserves image blocks as-is to avoid serializing
1222
+ // binary data as text (which caused 174× token amplification).
1223
+ const parts: MessageContentComplex[] = [];
1224
+ const textChunks: string[] = ['[Previous agent context]'];
1225
+
1226
+ appendMessageContent(msg, 'AI', textChunks, parts);
1106
1227
 
1107
- // Look ahead for tool messages that belong to this AI message
1108
- const isToolMsg = (m: BaseMessage): boolean =>
1109
- m instanceof ToolMessage || ('role' in m && (m as any).role === 'tool');
1110
- while (j < messages.length && isToolMsg(messages[j])) {
1111
- toolSequence.push(messages[j]);
1228
+ let j = i + 1;
1229
+ while (j < messages.length && isToolMessage(messages[j])) {
1230
+ appendMessageContent(messages[j], 'Tool', textChunks, parts);
1112
1231
  j++;
1113
1232
  }
1114
1233
 
1115
- // Convert the sequence to a buffer string and wrap in a HumanMessage
1116
- // This avoids the thinking block requirement which only applies to AI messages
1117
- const bufferString = getBufferString(toolSequence);
1118
- result.push(
1119
- new HumanMessage({
1120
- content: `[Previous agent context]\n${bufferString}`,
1121
- })
1122
- );
1123
-
1124
- // Skip the messages we've processed
1234
+ flushTextChunks(textChunks, parts);
1235
+ result.push(new HumanMessage({ content: parts }));
1125
1236
  i = j;
1126
1237
  } else {
1127
1238
  // Keep the message as is
package/src/run.ts CHANGED
@@ -16,9 +16,9 @@ import {
16
16
  createCompletionTitleRunnable,
17
17
  createTitleRunnable,
18
18
  } from '@/utils/title';
19
+ import { createTokenCounter, encodingForModel } from '@/utils/tokens';
19
20
  import { GraphEvents, Callback, TitleMethod } from '@/common';
20
21
  import { MultiAgentGraph } from '@/graphs/MultiAgentGraph';
21
- import { createTokenCounter } from '@/utils/tokens';
22
22
  import { StandardGraph } from '@/graphs/Graph';
23
23
  import { HandlerRegistry } from '@/events';
24
24
  import { isOpenAILike } from '@/utils/llm';
@@ -166,7 +166,11 @@ export class Run<_T extends t.BaseGraphState> {
166
166
  ): Promise<Run<T>> {
167
167
  /** Create tokenCounter if indexTokenCountMap is provided but tokenCounter is not */
168
168
  if (config.indexTokenCountMap && !config.tokenCounter) {
169
- config.tokenCounter = await createTokenCounter();
169
+ const gc = config.graphConfig;
170
+ const clientOpts =
171
+ 'agents' in gc ? gc.agents[0]?.clientOptions : gc.clientOptions;
172
+ const model = (clientOpts as { model?: string } | undefined)?.model ?? '';
173
+ config.tokenCounter = await createTokenCounter(encodingForModel(model));
170
174
  }
171
175
  return new Run<T>(config);
172
176
  }
@@ -9,13 +9,13 @@
9
9
  */
10
10
  import { config } from 'dotenv';
11
11
  config();
12
- import { HumanMessage } from '@langchain/core/messages';
13
- import type { AIMessageChunk } from '@langchain/core/messages';
14
12
  import { concat } from '@langchain/core/utils/stream';
13
+ import { HumanMessage } from '@langchain/core/messages';
15
14
  import {
16
- ConverseStreamCommand,
17
15
  BedrockRuntimeClient,
16
+ ConverseStreamCommand,
18
17
  } from '@aws-sdk/client-bedrock-runtime';
18
+ import type { AIMessageChunk } from '@langchain/core/messages';
19
19
  import { CustomChatBedrockConverse } from '@/llm/bedrock';
20
20
 
21
21
  const region = process.env.BEDROCK_AWS_REGION ?? 'us-east-1';
@@ -62,12 +62,12 @@ async function rawSdkCall(): Promise<void> {
62
62
  console.log('\nSpecific cache fields:');
63
63
  console.log(
64
64
  ' cacheReadInputTokens:',
65
- (event.metadata.usage as Record<string, unknown>)
65
+ (event.metadata.usage as unknown as Record<string, unknown>)
66
66
  ?.cacheReadInputTokens
67
67
  );
68
68
  console.log(
69
69
  ' cacheWriteInputTokens:',
70
- (event.metadata.usage as Record<string, unknown>)
70
+ (event.metadata.usage as unknown as Record<string, unknown>)
71
71
  ?.cacheWriteInputTokens
72
72
  );
73
73
  }
@@ -98,12 +98,12 @@ async function rawSdkCall(): Promise<void> {
98
98
  console.log('\nSpecific cache fields:');
99
99
  console.log(
100
100
  ' cacheReadInputTokens:',
101
- (event.metadata.usage as Record<string, unknown>)
101
+ (event.metadata.usage as unknown as Record<string, unknown>)
102
102
  ?.cacheReadInputTokens
103
103
  );
104
104
  console.log(
105
105
  ' cacheWriteInputTokens:',
106
- (event.metadata.usage as Record<string, unknown>)
106
+ (event.metadata.usage as unknown as Record<string, unknown>)
107
107
  ?.cacheWriteInputTokens
108
108
  );
109
109
  }
@@ -177,17 +177,17 @@ async function wrapperStreamCallWithCachePoint(): Promise<void> {
177
177
  const chunk = handleConverseStreamMetadata(event.metadata, {
178
178
  streamUsage: true,
179
179
  });
180
+ const msg = chunk.message as AIMessageChunk;
180
181
  console.log(
181
182
  'handleConverseStreamMetadata output usage_metadata:',
182
- JSON.stringify(chunk.message.usage_metadata)
183
+ JSON.stringify(msg.usage_metadata)
183
184
  );
184
185
 
185
- const hasDetails =
186
- chunk.message.usage_metadata?.input_token_details != null;
186
+ const hasDetails = msg.usage_metadata?.input_token_details != null;
187
187
  console.log(
188
188
  `Has input_token_details: ${hasDetails}`,
189
189
  hasDetails
190
- ? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
190
+ ? JSON.stringify(msg.usage_metadata!.input_token_details)
191
191
  : '(MISSING - BUG!)'
192
192
  );
193
193
  }
@@ -216,17 +216,17 @@ async function wrapperStreamCallWithCachePoint(): Promise<void> {
216
216
  const chunk = handleConverseStreamMetadata(event.metadata, {
217
217
  streamUsage: true,
218
218
  });
219
+ const msg = chunk.message as AIMessageChunk;
219
220
  console.log(
220
221
  'handleConverseStreamMetadata output usage_metadata:',
221
- JSON.stringify(chunk.message.usage_metadata)
222
+ JSON.stringify(msg.usage_metadata)
222
223
  );
223
224
 
224
- const hasDetails =
225
- chunk.message.usage_metadata?.input_token_details != null;
225
+ const hasDetails = msg.usage_metadata?.input_token_details != null;
226
226
  console.log(
227
227
  `Has input_token_details: ${hasDetails}`,
228
228
  hasDetails
229
- ? JSON.stringify(chunk.message.usage_metadata!.input_token_details)
229
+ ? JSON.stringify(msg.usage_metadata!.input_token_details)
230
230
  : '(MISSING - BUG!)'
231
231
  );
232
232
  }
@@ -44,8 +44,8 @@ function printSessionContext(run: Run<t.IState>, label: string): void {
44
44
  }
45
45
 
46
46
  console.log(` Latest session_id: ${session.session_id}`);
47
- console.log(` Files tracked: ${session.files.length}`);
48
- for (const file of session.files) {
47
+ console.log(` Files tracked: ${session.files?.length ?? 0}`);
48
+ for (const file of session.files ?? []) {
49
49
  console.log(` - ${file.name} (session: ${file.session_id})`);
50
50
  }
51
51
  }
@@ -199,25 +199,20 @@ Tell me what version it shows.
199
199
  | undefined;
200
200
 
201
201
  if (finalSession) {
202
- const uniqueSessionIds = new Set(
203
- finalSession.files.map((f) => f.session_id)
204
- );
205
- console.log(`\nTotal files tracked: ${finalSession.files.length}`);
202
+ const files = finalSession.files ?? [];
203
+ const uniqueSessionIds = new Set(files.map((f) => f.session_id));
204
+ console.log(`\nTotal files tracked: ${files.length}`);
206
205
  console.log(`Unique session_ids: ${uniqueSessionIds.size}`);
207
206
  console.log('\nFiles:');
208
- for (const file of finalSession.files) {
207
+ for (const file of files) {
209
208
  console.log(
210
209
  ` - ${file.name} (session: ${file.session_id?.slice(0, 20)}...)`
211
210
  );
212
211
  }
213
212
 
214
213
  // Verify expectations
215
- const fileACount = finalSession.files.filter(
216
- (f) => f.name === 'file_a.txt'
217
- ).length;
218
- const fileBCount = finalSession.files.filter(
219
- (f) => f.name === 'file_b.txt'
220
- ).length;
214
+ const fileACount = files.filter((f) => f.name === 'file_a.txt').length;
215
+ const fileBCount = files.filter((f) => f.name === 'file_b.txt').length;
221
216
 
222
217
  console.log('\n✓ Checks:');
223
218
  console.log(` file_a.txt count: ${fileACount} (expected: 1, latest wins)`);
@@ -1,4 +1,4 @@
1
- // src/scripts/cli.ts
1
+ // src/scripts/image.ts
2
2
  import { config } from 'dotenv';
3
3
  config();
4
4
  import { HumanMessage, AIMessage, BaseMessage } from '@langchain/core/messages';
@@ -10,6 +10,7 @@ import {
10
10
  ModelEndHandler,
11
11
  createMetadataAggregator,
12
12
  } from '@/events';
13
+ // @ts-expect-error — example module not in current codebase
13
14
  import { fetchRandomImageTool, fetchRandomImageURL } from '@/tools/example';
14
15
  import { getLLMConfig } from '@/utils/llmConfig';
15
16
  import { getArgs } from '@/scripts/args';
@@ -25,8 +25,7 @@ async function testParallelFromStart() {
25
25
  console.log('Testing Parallel From Start Multi-Agent System...\n');
26
26
 
27
27
  // Set up content aggregator
28
- const { contentParts, aggregateContent, contentMetadataMap } =
29
- createContentAggregator();
28
+ const { contentParts, aggregateContent, stepMap } = createContentAggregator();
30
29
 
31
30
  // Define two agents - both have NO incoming edges, so they run in parallel from the start
32
31
  const agents: t.AgentInputs[] = [
@@ -253,8 +252,8 @@ async function testParallelFromStart() {
253
252
  console.log('Final content parts:', contentParts.length, 'parts');
254
253
  console.log('\n=== Content Parts (clean, no metadata) ===');
255
254
  console.dir(contentParts, { depth: null });
256
- console.log('\n=== Content Metadata Map (separate from content) ===');
257
- console.dir(Object.fromEntries(contentMetadataMap), { depth: null });
255
+ console.log('\n=== Step Map (separate from content) ===');
256
+ console.dir(Object.fromEntries(stepMap), { depth: null });
258
257
 
259
258
  await sleep(3000);
260
259
  } catch (error) {
@@ -22,8 +22,7 @@ async function testSequentialMultiAgent() {
22
22
  console.log('Testing Sequential Multi-Agent System (A → B → C)...\n');
23
23
 
24
24
  // Set up content aggregator
25
- const { contentParts, aggregateContent, contentMetadataMap } =
26
- createContentAggregator();
25
+ const { contentParts, aggregateContent, stepMap } = createContentAggregator();
27
26
 
28
27
  // Define three simple agents
29
28
  const agents: t.AgentInputs[] = [
@@ -198,8 +197,8 @@ async function testSequentialMultiAgent() {
198
197
  console.log(`Total content parts: ${contentParts.length}`);
199
198
  console.log('\n=== Content Parts (clean, no metadata) ===');
200
199
  console.dir(contentParts, { depth: null });
201
- console.log('\n=== Content Metadata Map (separate from content) ===');
202
- console.dir(Object.fromEntries(contentMetadataMap), { depth: null });
200
+ console.log('\n=== Step Map (separate from content) ===');
201
+ console.dir(Object.fromEntries(stepMap), { depth: null });
203
202
 
204
203
  // Display the sequential responses
205
204
  const aiMessages = conversationHistory.filter(
@@ -20,8 +20,7 @@ async function testSingleAgent() {
20
20
  console.log('Testing Single Agent with Metadata Logging...\n');
21
21
 
22
22
  // Set up content aggregator
23
- const { contentParts, aggregateContent, contentMetadataMap } =
24
- createContentAggregator();
23
+ const { contentParts, aggregateContent, stepMap } = createContentAggregator();
25
24
 
26
25
  const startTime = Date.now();
27
26
 
@@ -183,10 +182,8 @@ async function testSingleAgent() {
183
182
  console.log('Final content parts:', contentParts.length, 'parts');
184
183
  console.log('\n=== Content Parts (clean, no metadata) ===');
185
184
  console.dir(contentParts, { depth: null });
186
- console.log(
187
- '\n=== Content Metadata Map (should be empty for single-agent) ==='
188
- );
189
- console.dir(Object.fromEntries(contentMetadataMap), { depth: null });
185
+ console.log('\n=== Step Map (should be empty for single-agent) ===');
186
+ console.dir(Object.fromEntries(stepMap), { depth: null });
190
187
  console.log('====================================\n');
191
188
 
192
189
  await sleep(3000);
@@ -42,10 +42,9 @@ async function testToolBeforeHandoffRoleOrder(): Promise<void> {
42
42
  let handoffOccurred = false;
43
43
 
44
44
  const customHandlers = {
45
- [GraphEvents.TOOL_END]: new ToolEndHandler(undefined, (name?: string) => {
45
+ [GraphEvents.TOOL_END]: new ToolEndHandler(async () => {
46
46
  toolCallCount++;
47
- console.log(`\n Tool completed: ${name} (total: ${toolCallCount})`);
48
- return true;
47
+ console.log(`\n Tool completed (total: ${toolCallCount})`);
49
48
  }),
50
49
  [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
51
50
  [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
@@ -28,9 +28,8 @@ async function testToolsBeforeHandoff() {
28
28
 
29
29
  // Create custom handlers
30
30
  const customHandlers = {
31
- [GraphEvents.TOOL_END]: new ToolEndHandler(undefined, (name?: string) => {
32
- console.log(`\n✅ Tool completed: ${name}`);
33
- return true;
31
+ [GraphEvents.TOOL_END]: new ToolEndHandler(async () => {
32
+ console.log('\n✅ Tool completed');
34
33
  }),
35
34
  [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
36
35
  [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
@@ -18,13 +18,7 @@ async function testStandardStreaming(): Promise<void> {
18
18
  const { userName, location, provider, currentDate } = await getArgs();
19
19
  const { contentParts, aggregateContent } = createContentAggregator();
20
20
  const customHandlers = {
21
- [GraphEvents.TOOL_END]: new ToolEndHandler(
22
- undefined,
23
- undefined,
24
- (name?: string) => {
25
- return true;
26
- }
27
- ),
21
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
28
22
  [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
29
23
  [GraphEvents.CHAT_MODEL_START]: {
30
24
  handle: (
@@ -1,39 +1,40 @@
1
1
  import { HumanMessage } from '@langchain/core/messages';
2
- import { createTokenCounter } from '@/utils/tokens';
2
+ import { createTokenCounter, TokenEncoderManager } from '@/utils/tokens';
3
+
4
+ jest.setTimeout(5000);
3
5
 
4
6
  describe('Token encoder memoization', () => {
5
- jest.setTimeout(45000);
6
-
7
- test('fetches BPE once and reuses encoder across counters', async () => {
8
- const originalFetch = global.fetch;
9
- let fetchCalls = 0;
10
- global.fetch = (async (...args: Parameters<typeof fetch>) => {
11
- fetchCalls += 1;
12
- // Delegate to real fetch
13
- return await originalFetch(...args);
14
- }) as typeof fetch;
15
-
16
- try {
17
- const counter1 = await createTokenCounter();
18
- const counter2 = await createTokenCounter();
19
-
20
- const m1 = new HumanMessage('hello world');
21
- const m2 = new HumanMessage('another short text');
22
-
23
- const c11 = counter1(m1);
24
- const c12 = counter1(m2);
25
- const c21 = counter2(m1);
26
- const c22 = counter2(m2);
27
-
28
- expect(c11).toBeGreaterThan(0);
29
- expect(c12).toBeGreaterThan(0);
30
- expect(c21).toBe(c11);
31
- expect(c22).toBe(c12);
32
-
33
- // Only one fetch for the shared encoder
34
- expect(fetchCalls).toBe(1);
35
- } finally {
36
- global.fetch = originalFetch;
37
- }
7
+ beforeEach(() => {
8
+ TokenEncoderManager.reset();
9
+ });
10
+
11
+ test('reuses the same tokenizer across counter calls', async () => {
12
+ expect(TokenEncoderManager.isInitialized()).toBe(false);
13
+
14
+ const counter1 = await createTokenCounter();
15
+ expect(TokenEncoderManager.isInitialized()).toBe(true);
16
+
17
+ const counter2 = await createTokenCounter();
18
+
19
+ const m1 = new HumanMessage('hello world');
20
+ const m2 = new HumanMessage('another short text');
21
+
22
+ const c11 = counter1(m1);
23
+ const c12 = counter1(m2);
24
+ const c21 = counter2(m1);
25
+ const c22 = counter2(m2);
26
+
27
+ expect(c11).toBeGreaterThan(0);
28
+ expect(c12).toBeGreaterThan(0);
29
+ expect(c21).toBe(c11);
30
+ expect(c22).toBe(c12);
31
+ });
32
+
33
+ test('reset clears cached tokenizers', async () => {
34
+ await createTokenCounter();
35
+ expect(TokenEncoderManager.isInitialized()).toBe(true);
36
+
37
+ TokenEncoderManager.reset();
38
+ expect(TokenEncoderManager.isInitialized()).toBe(false);
38
39
  });
39
40
  });
@@ -0,0 +1,64 @@
1
+ import { HumanMessage } from '@langchain/core/messages';
2
+ import {
3
+ encodingForModel,
4
+ createTokenCounter,
5
+ TokenEncoderManager,
6
+ } from '@/utils/tokens';
7
+
8
+ describe('encodingForModel', () => {
9
+ test('returns claude for Claude model strings', () => {
10
+ expect(encodingForModel('claude-3-5-sonnet-20241022')).toBe('claude');
11
+ expect(encodingForModel('claude-3-haiku-20240307')).toBe('claude');
12
+ });
13
+
14
+ test('handles Bedrock Claude ARNs', () => {
15
+ expect(encodingForModel('anthropic.claude-3-5-sonnet-20241022-v2:0')).toBe(
16
+ 'claude'
17
+ );
18
+ });
19
+
20
+ test('is case-insensitive', () => {
21
+ expect(encodingForModel('CLAUDE-3-HAIKU')).toBe('claude');
22
+ expect(encodingForModel('Claude-3-Opus')).toBe('claude');
23
+ });
24
+
25
+ test('returns o200k_base for non-Claude models', () => {
26
+ expect(encodingForModel('gpt-4o')).toBe('o200k_base');
27
+ expect(encodingForModel('gemini-2.0-flash')).toBe('o200k_base');
28
+ expect(encodingForModel('mistral-large')).toBe('o200k_base');
29
+ });
30
+
31
+ test('returns o200k_base for empty string', () => {
32
+ expect(encodingForModel('')).toBe('o200k_base');
33
+ });
34
+ });
35
+
36
+ describe('createTokenCounter with different encodings', () => {
37
+ beforeEach(() => {
38
+ TokenEncoderManager.reset();
39
+ });
40
+
41
+ test('claude encoding produces valid token counts', async () => {
42
+ const counter = await createTokenCounter('claude');
43
+ const msg = new HumanMessage('Hello, world!');
44
+ const count = counter(msg);
45
+ expect(count).toBeGreaterThan(0);
46
+ });
47
+
48
+ test('o200k_base encoding produces valid token counts', async () => {
49
+ const counter = await createTokenCounter('o200k_base');
50
+ const msg = new HumanMessage('Hello, world!');
51
+ const count = counter(msg);
52
+ expect(count).toBeGreaterThan(0);
53
+ });
54
+
55
+ test('both encodings can be initialized and used independently', async () => {
56
+ const claudeCounter = await createTokenCounter('claude');
57
+ const o200kCounter = await createTokenCounter('o200k_base');
58
+ expect(TokenEncoderManager.isInitialized()).toBe(true);
59
+
60
+ const msg = new HumanMessage('Test message for both encodings');
61
+ expect(claudeCounter(msg)).toBeGreaterThan(0);
62
+ expect(o200kCounter(msg)).toBeGreaterThan(0);
63
+ });
64
+ });