@librechat/agents 2.2.8 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import { AIMessage } from '@langchain/core/messages';
1
2
  import type { BaseMessage, UsageMetadata } from '@langchain/core/messages';
2
3
  import type { TokenCounter } from '@/types/run';
3
4
  export type PruneMessagesFactoryParams = {
@@ -5,6 +6,7 @@ export type PruneMessagesFactoryParams = {
5
6
  startIndex: number;
6
7
  tokenCounter: TokenCounter;
7
8
  indexTokenCountMap: Record<string, number>;
9
+ thinkingEnabled?: boolean;
8
10
  };
9
11
  export type PruneMessagesParams = {
10
12
  messages: BaseMessage[];
@@ -45,11 +47,15 @@ function getMessagesWithinTokenLimit({
45
47
  maxContextTokens,
46
48
  indexTokenCountMap,
47
49
  startOnMessageType,
50
+ thinkingEnabled,
51
+ tokenCounter,
48
52
  }: {
49
53
  messages: BaseMessage[];
50
54
  maxContextTokens: number;
51
55
  indexTokenCountMap: Record<string, number>;
52
56
  startOnMessageType?: string;
57
+ thinkingEnabled?: boolean;
58
+ tokenCounter?: TokenCounter;
53
59
  }): {
54
60
  context: BaseMessage[];
55
61
  remainingContextTokens: number;
@@ -87,19 +93,204 @@ function getMessagesWithinTokenLimit({
87
93
  }
88
94
  }
89
95
 
90
- if (startOnMessageType && context.length > 0) {
91
- const requiredTypeIndex = context.findIndex(msg => msg.getType() === startOnMessageType);
92
-
93
- if (requiredTypeIndex > 0) {
94
- context = context.slice(requiredTypeIndex);
95
- }
96
+ // Handle startOnMessageType requirement
97
+ if (startOnMessageType && context.length > 0) {
98
+ const requiredTypeIndex = context.findIndex(msg => msg.getType() === startOnMessageType);
99
+
100
+ if (requiredTypeIndex > 0) {
101
+ context = context.slice(requiredTypeIndex);
96
102
  }
97
103
  }
98
-
104
+
105
+ // Add system message if it exists
99
106
  if (instructions && _messages.length > 0) {
100
107
  context.push(_messages[0] as BaseMessage);
101
108
  messages.shift();
102
109
  }
110
+
111
+ // Handle thinking mode requirement for Anthropic
112
+ if (thinkingEnabled && context.length > 0 && tokenCounter) {
113
+ // Check if the latest message is an assistant message
114
+ const latestMessageIsAssistant = _messages.length > 0 && _messages[_messages.length - 1].getType() === 'ai';
115
+
116
+ // Process only if we have an assistant message in the context
117
+ const firstAssistantIndex = context.findIndex(msg => msg.getType() === 'ai');
118
+
119
+ if (firstAssistantIndex >= 0) {
120
+ const firstAssistantMsg = context[firstAssistantIndex];
121
+
122
+ // Check if the first assistant message already has a thinking block
123
+ const hasThinkingBlock = Array.isArray(firstAssistantMsg.content) &&
124
+ firstAssistantMsg.content.some(item =>
125
+ item && typeof item === 'object' && item.type === 'thinking');
126
+
127
+ // Only proceed if we need to add thinking blocks
128
+ if (!hasThinkingBlock) {
129
+ // Collect thinking blocks from pruned assistant messages, starting from the most recent
130
+ const thinkingBlocks: any[] = [];
131
+
132
+ // Look through pruned messages for thinking blocks, starting from the end (most recent)
133
+ for (let i = messages.length - 1; i >= 0; i--) {
134
+ const msg = messages[i];
135
+ if (msg.getType() === 'ai' && Array.isArray(msg.content)) {
136
+ for (const item of msg.content) {
137
+ if (item && typeof item === 'object' && item.type === 'thinking') {
138
+ thinkingBlocks.push(item);
139
+ // We only need one thinking block
140
+ break;
141
+ }
142
+ }
143
+ if (thinkingBlocks.length > 0) break; // Stop after finding one thinking block
144
+ }
145
+ }
146
+
147
+ // If we found thinking blocks, add them to the first assistant message
148
+ if (thinkingBlocks.length > 0) {
149
+ // Calculate token count of original message
150
+ const originalTokenCount = tokenCounter(firstAssistantMsg);
151
+
152
+ // Create a new content array with thinking blocks at the beginning
153
+ let newContent: any[];
154
+
155
+ if (Array.isArray(firstAssistantMsg.content)) {
156
+ // Keep the original content (excluding any existing thinking blocks)
157
+ const originalContent = firstAssistantMsg.content.filter(item =>
158
+ !(item && typeof item === 'object' && item.type === 'thinking'));
159
+
160
+ newContent = [...thinkingBlocks, ...originalContent];
161
+ } else if (typeof firstAssistantMsg.content === 'string') {
162
+ newContent = [
163
+ ...thinkingBlocks,
164
+ { type: 'text', text: firstAssistantMsg.content }
165
+ ];
166
+ } else {
167
+ newContent = thinkingBlocks;
168
+ }
169
+
170
+ // Create a new message with the updated content
171
+ const newMessage = new AIMessage({
172
+ content: newContent,
173
+ additional_kwargs: firstAssistantMsg.additional_kwargs,
174
+ response_metadata: firstAssistantMsg.response_metadata,
175
+ });
176
+
177
+ // Calculate token count of new message
178
+ const newTokenCount = tokenCounter(newMessage);
179
+
180
+ // Adjust current token count
181
+ currentTokenCount += (newTokenCount - originalTokenCount);
182
+
183
+ // Replace the first assistant message
184
+ context[firstAssistantIndex] = newMessage;
185
+
186
+ // If we've exceeded the token limit, we need to prune more messages
187
+ if (currentTokenCount > remainingContextTokens) {
188
+ // Build a map of tool call IDs to track AI <--> tool message correspondences
189
+ const toolCallIdMap = new Map<string, number>();
190
+
191
+ // Identify tool call IDs in the context
192
+ for (let i = 0; i < context.length; i++) {
193
+ const msg = context[i];
194
+
195
+ // Check for tool calls in AI messages
196
+ if (msg.getType() === 'ai' && Array.isArray(msg.content)) {
197
+ for (const item of msg.content) {
198
+ if (item && typeof item === 'object' && item.type === 'tool_use' && item.id) {
199
+ toolCallIdMap.set(item.id, i);
200
+ }
201
+ }
202
+ }
203
+
204
+ // Check for tool messages
205
+ if (msg.getType() === 'tool' && 'tool_call_id' in msg && typeof msg.tool_call_id === 'string') {
206
+ toolCallIdMap.set(msg.tool_call_id, i);
207
+ }
208
+ }
209
+
210
+ // Track which messages to remove
211
+ const indicesToRemove = new Set<number>();
212
+
213
+ // Start removing messages from the end, but preserve AI <--> tool message correspondences
214
+ let i = context.length - 1;
215
+ while (i > firstAssistantIndex && currentTokenCount > remainingContextTokens) {
216
+ const msgToRemove = context[i];
217
+
218
+ // Check if this is a tool message or has tool calls
219
+ let canRemove = true;
220
+
221
+ if (msgToRemove.getType() === 'tool' && 'tool_call_id' in msgToRemove && typeof msgToRemove.tool_call_id === 'string') {
222
+ // If this is a tool message, check if we need to keep its corresponding AI message
223
+ const aiIndex = toolCallIdMap.get(msgToRemove.tool_call_id);
224
+ if (aiIndex !== undefined && aiIndex !== i && !indicesToRemove.has(aiIndex)) {
225
+ // We need to remove both the tool message and its corresponding AI message
226
+ indicesToRemove.add(i);
227
+ indicesToRemove.add(aiIndex);
228
+ currentTokenCount -= (tokenCounter(msgToRemove) + tokenCounter(context[aiIndex]));
229
+ canRemove = false;
230
+ }
231
+ } else if (msgToRemove.getType() === 'ai' && Array.isArray(msgToRemove.content)) {
232
+ // If this is an AI message with tool calls, check if we need to keep its corresponding tool messages
233
+ for (const item of msgToRemove.content) {
234
+ if (item && typeof item === 'object' && item.type === 'tool_use' && item.id) {
235
+ const toolIndex = toolCallIdMap.get(item.id as string);
236
+ if (toolIndex !== undefined && toolIndex !== i && !indicesToRemove.has(toolIndex)) {
237
+ // We need to remove both the AI message and its corresponding tool message
238
+ indicesToRemove.add(i);
239
+ indicesToRemove.add(toolIndex);
240
+ currentTokenCount -= (tokenCounter(msgToRemove) + tokenCounter(context[toolIndex]));
241
+ canRemove = false;
242
+ break;
243
+ }
244
+ }
245
+ }
246
+ }
247
+
248
+ // If we can remove this message individually
249
+ if (canRemove && !indicesToRemove.has(i)) {
250
+ indicesToRemove.add(i);
251
+ currentTokenCount -= tokenCounter(msgToRemove);
252
+ }
253
+
254
+ i--;
255
+ }
256
+
257
+ // Remove messages in reverse order to avoid index shifting
258
+ const sortedIndices = Array.from(indicesToRemove).sort((a, b) => b - a);
259
+ for (const index of sortedIndices) {
260
+ context.splice(index, 1);
261
+ }
262
+
263
+ // Update remainingContextTokens to reflect the new token count
264
+ remainingContextTokens = maxContextTokens - currentTokenCount;
265
+ }
266
+ }
267
+ }
268
+ }
269
+
270
+ // If the latest message is an assistant message, ensure an assistant message appears early in the context
271
+ // but maintain system message precedence
272
+ if (latestMessageIsAssistant && context.length > 0) {
273
+ // Find the first assistant message in the context
274
+ const assistantIndex = context.findIndex(msg => msg.getType() === 'ai');
275
+ if (assistantIndex > 0) {
276
+ // Check if there's a system message at the beginning
277
+ const hasSystemFirst = context[0].getType() === 'system';
278
+
279
+ // Move the assistant message to the appropriate position
280
+ const assistantMsg = context[assistantIndex];
281
+ context.splice(assistantIndex, 1);
282
+
283
+ if (hasSystemFirst) {
284
+ // Insert after the system message
285
+ context.splice(1, 0, assistantMsg);
286
+ } else {
287
+ // Insert at the beginning if no system message
288
+ context.unshift(assistantMsg);
289
+ }
290
+ }
291
+ }
292
+ }
293
+ }
103
294
 
104
295
  const prunedMemory = messages;
105
296
  summaryIndex = prunedMemory.length - 1;
@@ -121,6 +312,7 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
121
312
  const indexTokenCountMap = { ...factoryParams.indexTokenCountMap };
122
313
  let lastTurnStartIndex = factoryParams.startIndex;
123
314
  let totalTokens = (Object.values(indexTokenCountMap)).reduce((a, b) => a + b, 0);
315
+
124
316
  return function pruneMessages(params: PruneMessagesParams): {
125
317
  context: BaseMessage[];
126
318
  indexTokenCountMap: Record<string, number>;
@@ -167,11 +359,14 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
167
359
  return { context: params.messages, indexTokenCountMap };
168
360
  }
169
361
 
362
+ // Pass the tokenCounter to getMessagesWithinTokenLimit for token recalculation
170
363
  const { context } = getMessagesWithinTokenLimit({
171
364
  maxContextTokens: factoryParams.maxTokens,
172
365
  messages: params.messages,
173
366
  indexTokenCountMap,
174
367
  startOnMessageType: params.startOnMessageType,
368
+ thinkingEnabled: factoryParams.thinkingEnabled,
369
+ tokenCounter: factoryParams.tokenCounter,
175
370
  });
176
371
 
177
372
  return { context, indexTokenCountMap };
@@ -0,0 +1,162 @@
1
+ // src/scripts/cli.ts
2
+ import { config } from 'dotenv';
3
+ config();
4
+ import { HumanMessage, BaseMessage } from '@langchain/core/messages';
5
+ import type { RunnableConfig } from '@langchain/core/runnables';
6
+ import type * as t from '@/types';
7
+ import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
8
+ import { ToolEndHandler, ModelEndHandler, createMetadataAggregator } from '@/events';
9
+ import { getLLMConfig } from '@/utils/llmConfig';
10
+ import { getArgs } from '@/scripts/args';
11
+ import { GraphEvents } from '@/common';
12
+ import { Run } from '@/run';
13
+ import { createCodeExecutionTool } from '@/tools/CodeExecutor';
14
+
15
+ const conversationHistory: BaseMessage[] = [];
16
+
17
+ async function testCodeExecution(): Promise<void> {
18
+ const { userName, location, provider, currentDate } = await getArgs();
19
+ const { contentParts, aggregateContent } = createContentAggregator();
20
+ const customHandlers = {
21
+ [GraphEvents.TOOL_END]: new ToolEndHandler(),
22
+ [GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(),
23
+ [GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
24
+ [GraphEvents.ON_RUN_STEP_COMPLETED]: {
25
+ handle: (event: GraphEvents.ON_RUN_STEP_COMPLETED, data: t.StreamEventData): void => {
26
+ console.log('====== ON_RUN_STEP_COMPLETED ======');
27
+ console.dir(data, { depth: null });
28
+ aggregateContent({ event, data: data as unknown as { result: t.ToolEndEvent } });
29
+ }
30
+ },
31
+ [GraphEvents.ON_RUN_STEP]: {
32
+ handle: (event: GraphEvents.ON_RUN_STEP, data: t.StreamEventData): void => {
33
+ console.log('====== ON_RUN_STEP ======');
34
+ console.dir(data, { depth: null });
35
+ aggregateContent({ event, data: data as t.RunStep });
36
+ }
37
+ },
38
+ [GraphEvents.ON_RUN_STEP_DELTA]: {
39
+ handle: (event: GraphEvents.ON_RUN_STEP_DELTA, data: t.StreamEventData): void => {
40
+ console.log('====== ON_RUN_STEP_DELTA ======');
41
+ console.dir(data, { depth: null });
42
+ aggregateContent({ event, data: data as t.RunStepDeltaEvent });
43
+ }
44
+ },
45
+ [GraphEvents.ON_MESSAGE_DELTA]: {
46
+ handle: (event: GraphEvents.ON_MESSAGE_DELTA, data: t.StreamEventData): void => {
47
+ console.log('====== ON_MESSAGE_DELTA ======');
48
+ console.dir(data, { depth: null });
49
+ aggregateContent({ event, data: data as t.MessageDeltaEvent });
50
+ }
51
+ },
52
+ [GraphEvents.TOOL_START]: {
53
+ handle: (_event: string, data: t.StreamEventData, metadata?: Record<string, unknown>): void => {
54
+ console.log('====== TOOL_START ======');
55
+ console.dir(data, { depth: null });
56
+ }
57
+ },
58
+ };
59
+
60
+ const llmConfig = getLLMConfig(provider);
61
+
62
+ const run = await Run.create<t.IState>({
63
+ runId: 'message-num-1',
64
+ graphConfig: {
65
+ type: 'standard',
66
+ llmConfig,
67
+ tools: [createCodeExecutionTool()],
68
+ instructions: 'You are a friendly AI assistant with coding capabilities. Always address the user by their name.',
69
+ additional_instructions: `The user's name is ${userName} and they are located in ${location}. The current date is ${currentDate}.`,
70
+ },
71
+ returnContent: true,
72
+ customHandlers,
73
+ });
74
+
75
+ const config: Partial<RunnableConfig> & { version: 'v1' | 'v2'; run_id?: string; streamMode: string } = {
76
+ configurable: {
77
+ provider,
78
+ thread_id: 'conversation-num-1',
79
+ },
80
+ streamMode: 'values',
81
+ version: 'v2' as const,
82
+ // recursionLimit: 3,
83
+ };
84
+
85
+ console.log('Test 1: Create Project Plan');
86
+
87
+ const userMessage1 = `
88
+ Hi ${userName} here. We are testing your file capabilities.
89
+
90
+ 1. Create a text file named "project_plan.txt" that contains: "This is a project plan for a new software development project."
91
+
92
+ Please generate this file so I can review it.
93
+ `;
94
+
95
+ conversationHistory.push(new HumanMessage(userMessage1));
96
+
97
+ let inputs = {
98
+ messages: conversationHistory,
99
+ };
100
+ const finalContentParts1 = await run.processStream(inputs, config);
101
+ const finalMessages1 = run.getRunMessages();
102
+ if (finalMessages1) {
103
+ conversationHistory.push(...finalMessages1);
104
+ }
105
+ console.log('\n\n====================\n\n');
106
+ console.dir(contentParts, { depth: null });
107
+
108
+ console.log('Test 2: Edit Project Plan');
109
+
110
+ const userMessage2 = `
111
+ Thanks for creating the project plan. Now I'd like you to edit the same plan to:
112
+
113
+ 1. Add a new section called "Technology Stack" that contains: "The technology stack for this project includes the following technologies" and nothing more.
114
+
115
+ `;
116
+
117
+ // Make sure to pass the file ID of the previous file you created and explicitly duplicate or rename the file in your code so we can then access it. Also print the contents of the new file to ensure we did what we wanted.`;
118
+
119
+ conversationHistory.push(new HumanMessage(userMessage2));
120
+
121
+ inputs = {
122
+ messages: conversationHistory,
123
+ };
124
+ const finalContentParts2 = await run.processStream(inputs, config);
125
+ const finalMessages2 = run.getRunMessages();
126
+ if (finalMessages2) {
127
+ conversationHistory.push(...finalMessages2);
128
+ }
129
+ console.log('\n\n====================\n\n');
130
+ console.dir(contentParts, { depth: null });
131
+
132
+ const { handleLLMEnd, collected } = createMetadataAggregator();
133
+ const titleResult = await run.generateTitle({
134
+ inputText: userMessage2,
135
+ contentParts,
136
+ chainOptions: {
137
+ callbacks: [{
138
+ handleLLMEnd,
139
+ }],
140
+ },
141
+ });
142
+ console.log('Generated Title:', titleResult);
143
+ console.log('Collected metadata:', collected);
144
+ }
145
+
146
+ process.on('unhandledRejection', (reason, promise) => {
147
+ console.error('Unhandled Rejection at:', promise, 'reason:', reason);
148
+ console.log('Conversation history:');
149
+ console.dir(conversationHistory, { depth: null });
150
+ process.exit(1);
151
+ });
152
+
153
+ process.on('uncaughtException', (err) => {
154
+ console.error('Uncaught Exception:', err);
155
+ });
156
+
157
+ testCodeExecution().catch((err) => {
158
+ console.error(err);
159
+ console.log('Conversation history:');
160
+ console.dir(conversationHistory, { depth: null });
161
+ process.exit(1);
162
+ });