@librechat/agents 2.1.3 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,48 @@
1
1
  import { AIMessageChunk } from '@langchain/core/messages';
2
2
  import { ChatAnthropicMessages } from '@langchain/anthropic';
3
3
  import { ChatGenerationChunk } from '@langchain/core/outputs';
4
+ import type { BaseChatModelParams } from '@langchain/core/language_models/chat_models';
4
5
  import type { BaseMessage, MessageContentComplex } from '@langchain/core/messages';
5
6
  import type { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
6
7
  import type { AnthropicInput } from '@langchain/anthropic';
7
- import type { AnthropicMessageCreateParams, AnthropicStreamUsage, AnthropicMessageStartEvent, AnthropicMessageDeltaEvent } from '@/llm/anthropic/types';
8
+ import type { AnthropicMessageCreateParams, AnthropicStreamingMessageCreateParams, AnthropicStreamUsage, AnthropicMessageStartEvent, AnthropicMessageDeltaEvent } from '@/llm/anthropic/types';
8
9
  import { _makeMessageChunkFromAnthropicEvent } from './utils/message_outputs';
9
10
  import { _convertMessagesToAnthropicPayload } from './utils/message_inputs';
10
11
  import { TextStream } from '@/llm/text';
11
12
 
12
- function _toolsInParams(params: AnthropicMessageCreateParams): boolean {
13
+ function _toolsInParams(
14
+ params: AnthropicMessageCreateParams | AnthropicStreamingMessageCreateParams
15
+ ): boolean {
13
16
  return !!(params.tools && params.tools.length > 0);
14
17
  }
18
+ function _documentsInParams(
19
+ params: AnthropicMessageCreateParams | AnthropicStreamingMessageCreateParams
20
+ ): boolean {
21
+ for (const message of params.messages ?? []) {
22
+ if (typeof message.content === "string") {
23
+ continue;
24
+ }
25
+ for (const block of message.content ?? []) {
26
+ if (
27
+ typeof block === "object" &&
28
+ block != null &&
29
+ block.type === "document" &&
30
+ typeof block.citations === "object" &&
31
+ block.citations.enabled
32
+ ) {
33
+ return true;
34
+ }
35
+ }
36
+ }
37
+ return false;
38
+ }
39
+
40
+ function _thinkingInParams(
41
+ params: AnthropicMessageCreateParams | AnthropicStreamingMessageCreateParams
42
+ ): boolean {
43
+ return !!(params.thinking && params.thinking.type === "enabled");
44
+ }
45
+
15
46
 
16
47
  function extractToken(chunk: AIMessageChunk): [string, 'string' | 'input' | 'content'] | [undefined] {
17
48
  if (typeof chunk.content === 'string') {
@@ -30,6 +61,12 @@ function extractToken(chunk: AIMessageChunk): [string, 'string' | 'input' | 'con
30
61
  'text' in chunk.content[0]
31
62
  ) {
32
63
  return [chunk.content[0].text, 'content'];
64
+ } else if (
65
+ Array.isArray(chunk.content) &&
66
+ chunk.content.length >= 1 &&
67
+ 'thinking' in chunk.content[0]
68
+ ) {
69
+ return [chunk.content[0].thinking, 'content'];
33
70
  }
34
71
  return [undefined];
35
72
  }
@@ -45,12 +82,14 @@ function cloneChunk(text: string, tokenType: string, chunk: AIMessageChunk): AIM
45
82
  return new AIMessageChunk(Object.assign({}, chunk, { content: [Object.assign({}, content, { text })] }));
46
83
  } else if (tokenType === 'content' && content.type === 'text_delta') {
47
84
  return new AIMessageChunk(Object.assign({}, chunk, { content: [Object.assign({}, content, { text })] }));
85
+ } else if (tokenType === 'content' && content.type?.startsWith('thinking')) {
86
+ return new AIMessageChunk(Object.assign({}, chunk, { content: [Object.assign({}, content, { thinking: text })] }));
48
87
  }
49
88
 
50
89
  return chunk;
51
90
  }
52
91
 
53
- export type CustomAnthropicInput = AnthropicInput & { _lc_stream_delay?: number };
92
+ export type CustomAnthropicInput = AnthropicInput & { _lc_stream_delay?: number } & BaseChatModelParams;
54
93
 
55
94
  export class CustomAnthropic extends ChatAnthropicMessages {
56
95
  _lc_stream_delay: number;
@@ -58,9 +97,9 @@ export class CustomAnthropic extends ChatAnthropicMessages {
58
97
  private message_delta: AnthropicMessageDeltaEvent | undefined;
59
98
  private tools_in_params?: boolean;
60
99
  private emitted_usage?: boolean;
61
- constructor(fields: CustomAnthropicInput) {
100
+ constructor(fields?: CustomAnthropicInput) {
62
101
  super(fields);
63
- this._lc_stream_delay = fields._lc_stream_delay ?? 25;
102
+ this._lc_stream_delay = fields?._lc_stream_delay ?? 25;
64
103
  }
65
104
 
66
105
  /**
@@ -76,19 +115,21 @@ export class CustomAnthropic extends ChatAnthropicMessages {
76
115
  if (!outputUsage) {
77
116
  return;
78
117
  }
79
- const totalUsage = {
80
- total_tokens: (inputUsage?.input_tokens ?? 0)
81
- + (inputUsage?.output_tokens ?? 0)
82
- + (inputUsage?.cache_creation_input_tokens ?? 0)
83
- + (inputUsage?.cache_read_input_tokens ?? 0)
84
- + (outputUsage.input_tokens ?? 0)
85
- + (outputUsage.output_tokens ?? 0)
86
- + (outputUsage.cache_creation_input_tokens ?? 0)
87
- + (outputUsage.cache_read_input_tokens ?? 0),
118
+ const totalUsage: AnthropicStreamUsage = {
119
+ input_tokens: inputUsage?.input_tokens ?? 0,
120
+ output_tokens: outputUsage?.output_tokens ?? 0,
121
+ total_tokens: (inputUsage?.input_tokens ?? 0) + (outputUsage?.output_tokens ?? 0),
88
122
  };
89
123
 
124
+ if (inputUsage?.cache_creation_input_tokens != null || inputUsage?.cache_read_input_tokens != null) {
125
+ totalUsage.input_token_details = {
126
+ cache_creation: inputUsage.cache_creation_input_tokens ?? 0,
127
+ cache_read: inputUsage.cache_read_input_tokens ?? 0,
128
+ };
129
+ }
130
+
90
131
  this.emitted_usage = true;
91
- return Object.assign(totalUsage, inputUsage, outputUsage);
132
+ return totalUsage;
92
133
  }
93
134
 
94
135
  resetTokenEvents(): void {
@@ -131,12 +172,15 @@ export class CustomAnthropic extends ChatAnthropicMessages {
131
172
  ): AsyncGenerator<ChatGenerationChunk> {
132
173
  const params = this.invocationParams(options);
133
174
  const formattedMessages = _convertMessagesToAnthropicPayload(messages);
134
- this.tools_in_params = _toolsInParams({
175
+ const payload = {
135
176
  ...params,
136
177
  ...formattedMessages,
137
- stream: false,
138
- });
139
- const coerceContentToString = !this.tools_in_params;
178
+ stream: true,
179
+ } as const;
180
+ const coerceContentToString =
181
+ !_toolsInParams(payload) &&
182
+ !_documentsInParams(payload) &&
183
+ !_thinkingInParams(payload);
140
184
 
141
185
  const stream = await this.createStreamWithRetry(
142
186
  {
@@ -157,10 +201,9 @@ export class CustomAnthropic extends ChatAnthropicMessages {
157
201
  throw new Error('AbortError: User aborted the request.');
158
202
  }
159
203
 
160
- const type = data.type ?? '';
161
- if (type === 'message_start') {
204
+ if (data.type === 'message_start') {
162
205
  this.message_start = data as AnthropicMessageStartEvent;
163
- } else if (type === 'message_delta') {
206
+ } else if (data.type === 'message_delta') {
164
207
  this.message_delta = data as AnthropicMessageDeltaEvent;
165
208
  }
166
209
 
@@ -57,12 +57,10 @@ export interface AnthropicStreamUsage {
57
57
  * The number of cache creation input tokens used (write operations)
58
58
  */
59
59
  cache_creation_input_tokens?: number;
60
-
61
60
  /**
62
61
  * The number of cache input tokens used (read operations)
63
62
  */
64
63
  cache_read_input_tokens?: number;
65
-
66
64
  /**
67
65
  * The number of output tokens generated in the response
68
66
  */
@@ -71,4 +69,11 @@ export interface AnthropicStreamUsage {
71
69
  * The total number of tokens generated in the response
72
70
  */
73
71
  total_tokens: number;
72
+ /**
73
+ * Details about input token usage
74
+ */
75
+ input_token_details?: {
76
+ cache_creation: number;
77
+ cache_read: number;
78
+ };
74
79
  }
@@ -47,16 +47,28 @@ export function _makeMessageChunkFromAnthropicEvent(
47
47
  filteredAdditionalKwargs[key] = value;
48
48
  }
49
49
  }
50
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
51
+ const { input_tokens, output_tokens, ...rest }: Record<string, any> =
52
+ usage ?? {};
50
53
  const usageMetadata: UsageMetadata = {
51
- input_tokens: usage.input_tokens,
52
- output_tokens: usage.output_tokens,
53
- total_tokens: usage.input_tokens + usage.output_tokens,
54
+ input_tokens,
55
+ output_tokens,
56
+ total_tokens: input_tokens + output_tokens,
57
+ input_token_details: {
58
+ cache_creation: rest.cache_creation_input_tokens,
59
+ cache_read: rest.cache_read_input_tokens,
60
+ },
54
61
  };
55
62
  return {
56
63
  chunk: new AIMessageChunk({
57
64
  content: fields.coerceContentToString ? '' : [],
58
65
  additional_kwargs: filteredAdditionalKwargs,
59
66
  usage_metadata: fields.streamUsage ? usageMetadata : undefined,
67
+ response_metadata: {
68
+ usage: {
69
+ ...rest,
70
+ },
71
+ },
60
72
  id: data.message.id,
61
73
  }),
62
74
  };
@@ -65,6 +77,12 @@ export function _makeMessageChunkFromAnthropicEvent(
65
77
  input_tokens: 0,
66
78
  output_tokens: data.usage.output_tokens,
67
79
  total_tokens: data.usage.output_tokens,
80
+ input_token_details: {
81
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
82
+ cache_creation: (data.usage as any).cache_creation_input_tokens,
83
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
84
+ cache_read: (data.usage as any).cache_read_input_tokens,
85
+ },
68
86
  };
69
87
  return {
70
88
  chunk: new AIMessageChunk({
@@ -189,6 +207,29 @@ export function _makeMessageChunkFromAnthropicEvent(
189
207
  }),
190
208
  };
191
209
  }
210
+ } else if (
211
+ data.type === "content_block_start" &&
212
+ data.content_block.type === "redacted_thinking"
213
+ ) {
214
+ return {
215
+ chunk: new AIMessageChunk({
216
+ content: fields.coerceContentToString
217
+ ? ""
218
+ : [{ index: data.index, ...data.content_block }],
219
+ }),
220
+ };
221
+ } else if (
222
+ data.type === "content_block_start" &&
223
+ data.content_block.type === "thinking"
224
+ ) {
225
+ const content = data.content_block.thinking;
226
+ return {
227
+ chunk: new AIMessageChunk({
228
+ content: fields.coerceContentToString
229
+ ? content
230
+ : [{ index: data.index, ...data.content_block }],
231
+ }),
232
+ };
192
233
  }
193
234
 
194
235
  return null;
@@ -200,13 +241,17 @@ export function anthropicResponseToChatMessages(
200
241
  ): ChatGeneration[] {
201
242
  const usage: Record<string, number> | null | undefined =
202
243
  additionalKwargs.usage as Record<string, number> | null | undefined;
203
- const usageMetadata =
244
+ const usageMetadata =
204
245
  usage != null
205
246
  ? {
206
- input_tokens: usage.input_tokens ?? 0,
207
- output_tokens: usage.output_tokens ?? 0,
208
- total_tokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
209
- }
247
+ input_tokens: usage.input_tokens ?? 0,
248
+ output_tokens: usage.output_tokens ?? 0,
249
+ total_tokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0),
250
+ input_token_details: {
251
+ cache_creation: usage.cache_creation_input_tokens,
252
+ cache_read: usage.cache_read_input_tokens,
253
+ },
254
+ }
210
255
  : undefined;
211
256
  if (messages.length === 1 && messages[0].type === 'text') {
212
257
  return [
@@ -0,0 +1,152 @@
1
+ // src/scripts/test-thinking.ts
2
+ import { config } from 'dotenv';
3
+ config();
4
+ import { HumanMessage, SystemMessage, BaseMessage } from '@langchain/core/messages';
5
+ import type { UsageMetadata } from '@langchain/core/messages';
6
+ import * as t from '@/types';
7
+ import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
8
+ import { ToolEndHandler, ModelEndHandler } from '@/events';
9
+ import { GraphEvents, Providers } from '@/common';
10
+ import { getLLMConfig } from '@/utils/llmConfig';
11
+ import { getArgs } from '@/scripts/args';
12
+ import { Run } from '@/run';
13
+
14
+ const conversationHistory: BaseMessage[] = [];
15
+ let _contentParts: t.MessageContentComplex[] = [];
16
+ const collectedUsage: UsageMetadata[] = [];
17
+
18
+ async function testThinking(): Promise<void> {
19
+ const { userName } = await getArgs();
20
+ const instructions = `You are a helpful AI assistant for ${userName}. When answering questions, be thorough in your reasoning.`;
21
+ const { contentParts, aggregateContent } = createContentAggregator();
22
+ _contentParts = contentParts as t.MessageContentComplex[];
23
+
24
+ // Set up event handlers
25
+ const customHandlers = {
26
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
27
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
28
+ [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
29
+ [GraphEvents.ON_RUN_STEP_COMPLETED]: {
30
+ handle: (event: GraphEvents.ON_RUN_STEP_COMPLETED, data: t.StreamEventData): void => {
31
+ console.log('====== ON_RUN_STEP_COMPLETED ======');
32
+ aggregateContent({ event, data: data as unknown as { result: t.ToolEndEvent } });
33
+ }
34
+ },
35
+ [GraphEvents.ON_RUN_STEP]: {
36
+ handle: (event: GraphEvents.ON_RUN_STEP, data: t.RunStep) => {
37
+ aggregateContent({ event, data });
38
+ },
39
+ },
40
+ [GraphEvents.ON_RUN_STEP_DELTA]: {
41
+ handle: (event: GraphEvents.ON_RUN_STEP_DELTA, data: t.RunStepDeltaEvent) => {
42
+ aggregateContent({ event, data });
43
+ },
44
+ },
45
+ [GraphEvents.ON_MESSAGE_DELTA]: {
46
+ handle: (event: GraphEvents.ON_MESSAGE_DELTA, data: t.MessageDeltaEvent) => {
47
+ aggregateContent({ event, data });
48
+ },
49
+ },
50
+ [GraphEvents.ON_REASONING_DELTA]: {
51
+ handle: (event: GraphEvents.ON_REASONING_DELTA, data: t.ReasoningDeltaEvent) => {
52
+ aggregateContent({ event, data });
53
+ },
54
+ },
55
+ };
56
+
57
+ const baseLlmConfig: t.LLMConfig = getLLMConfig(Providers.ANTHROPIC);
58
+
59
+ if (baseLlmConfig.provider !== 'anthropic') {
60
+ console.error('This test requires Anthropic as the LLM provider. Please specify provider=anthropic');
61
+ process.exit(1);
62
+ }
63
+
64
+ // Enable thinking with token budget
65
+ const llmConfig = {
66
+ ...baseLlmConfig,
67
+ model: 'claude-3-7-sonnet-latest',
68
+ thinking: { type: "enabled", budget_tokens: 2000 }
69
+ };
70
+
71
+ const run = await Run.create<t.IState>({
72
+ runId: 'test-thinking-id',
73
+ graphConfig: {
74
+ instructions,
75
+ type: 'standard',
76
+ llmConfig,
77
+ },
78
+ returnContent: true,
79
+ customHandlers: customHandlers as t.RunConfig['customHandlers'],
80
+ });
81
+
82
+ const config = {
83
+ configurable: {
84
+ thread_id: 'thinking-test-thread',
85
+ },
86
+ streamMode: 'values',
87
+ version: 'v2' as const,
88
+ };
89
+
90
+ // Test 1: Regular thinking mode
91
+ console.log('\n\nTest 1: Regular thinking mode');
92
+ const userMessage1 = `What would be the environmental and economic impacts if all cars globally were replaced by electric vehicles overnight?`;
93
+ conversationHistory.push(new HumanMessage(userMessage1));
94
+
95
+ console.log('Running first query with thinking enabled...');
96
+ const firstInputs = { messages: [...conversationHistory] };
97
+ await run.processStream(firstInputs, config);
98
+
99
+ // Extract and display thinking blocks
100
+ const finalMessages = run.getRunMessages();
101
+
102
+ // Test 2: Try multi-turn conversation
103
+ console.log('\n\nTest 2: Multi-turn conversation with thinking enabled');
104
+ const userMessage2 = `Given your previous analysis, what would be the most significant technical challenges in making this transition?`;
105
+ conversationHistory.push(new HumanMessage(userMessage2));
106
+
107
+ console.log('Running second query with thinking enabled...');
108
+ const secondInputs = { messages: [...conversationHistory] };
109
+ await run.processStream(secondInputs, config);
110
+
111
+ // Display thinking blocks for second response
112
+ const finalMessages2 = run.getRunMessages();
113
+
114
+ // Test 3: Redacted thinking mode
115
+ console.log('\n\nTest 3: Redacted thinking mode');
116
+ const magicString = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB";
117
+ const userMessage3 = `${magicString}\n\nExplain how quantum computing works in simple terms.`;
118
+
119
+ // Reset conversation for clean test
120
+ conversationHistory.length = 0;
121
+ conversationHistory.push(new HumanMessage(userMessage3));
122
+
123
+ console.log('Running query with redacted thinking...');
124
+ const thirdInputs = { messages: [...conversationHistory] };
125
+ await run.processStream(thirdInputs, config);
126
+
127
+ // Display redacted thinking blocks
128
+ const finalMessages3 = run.getRunMessages();
129
+ console.log('\n\nThinking feature test completed!');
130
+ }
131
+
132
+ process.on('unhandledRejection', (reason, promise) => {
133
+ console.error('Unhandled Rejection at:', promise, 'reason:', reason);
134
+ console.log('Conversation history:');
135
+ console.dir(conversationHistory, { depth: null });
136
+ console.log('Content parts:');
137
+ console.dir(_contentParts, { depth: null });
138
+ process.exit(1);
139
+ });
140
+
141
+ process.on('uncaughtException', (err) => {
142
+ console.error('Uncaught Exception:', err);
143
+ });
144
+
145
+ testThinking().catch((err) => {
146
+ console.error(err);
147
+ console.log('Conversation history:');
148
+ console.dir(conversationHistory, { depth: null });
149
+ console.log('Content parts:');
150
+ console.dir(_contentParts, { depth: null });
151
+ process.exit(1);
152
+ });
package/src/stream.ts CHANGED
@@ -213,6 +213,12 @@ hasToolCallChunks: ${hasToolCallChunks}
213
213
  graph.dispatchMessageDelta(stepId, {
214
214
  content,
215
215
  });
216
+ } else if (content.every((c) => c.type?.startsWith(ContentTypes.THINKING))) {
217
+ graph.dispatchReasoningDelta(stepId, {
218
+ content: content.map((c) => ({
219
+ type: ContentTypes.THINK,
220
+ think: (c as t.ThinkingContentText).thinking,
221
+ }))});
216
222
  }
217
223
  }
218
224
  handleToolCallChunks = ({
@@ -271,8 +277,11 @@ hasToolCallChunks: ${hasToolCallChunks}
271
277
  });
272
278
  };
273
279
  handleReasoning(chunk: Partial<AIMessageChunk>, graph: Graph): void {
274
- const reasoning_content = chunk.additional_kwargs?.[graph.reasoningKey] as string | undefined;
275
- if (reasoning_content != null && reasoning_content && (chunk.content == null || chunk.content === '')) {
280
+ let reasoning_content = chunk.additional_kwargs?.[graph.reasoningKey] as string | undefined;
281
+ if (Array.isArray(chunk.content) && chunk.content[0]?.type === 'thinking') {
282
+ reasoning_content = 'valid';
283
+ }
284
+ if (reasoning_content != null && reasoning_content && (chunk.content == null || chunk.content === '' || reasoning_content === 'valid')) {
276
285
  graph.currentTokenType = ContentTypes.THINK;
277
286
  graph.tokenTypeSwitch = 'reasoning';
278
287
  return;
@@ -216,9 +216,16 @@ export type ReasoningContentText = {
216
216
  think: string;
217
217
  };
218
218
 
219
+ /** Anthropic's Reasoning Content Block Format */
220
+ export type ThinkingContentText = {
221
+ type: ContentTypes.THINKING;
222
+ index?: number;
223
+ thinking: string;
224
+ };
225
+
219
226
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
220
- export type MessageContentComplex = (ReasoningContentText | MessageContentText | MessageContentImageUrl | (Record<string, any> & {
221
- type?: 'text' | 'image_url' | 'think' | string;
227
+ export type MessageContentComplex = (ThinkingContentText | ReasoningContentText | MessageContentText | MessageContentImageUrl | (Record<string, any> & {
228
+ type?: 'text' | 'image_url' | 'think' | 'thinking' | string;
222
229
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
223
230
  }) | (Record<string, any> & {
224
231
  type?: never;