@librechat/agents 2.3.7 → 2.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
1
  import { AIMessage, BaseMessage, UsageMetadata } from '@langchain/core/messages';
2
- import type { ThinkingContentText, MessageContentComplex } from '@/types/stream';
2
+ import type { ThinkingContentText, MessageContentComplex, ReasoningContentText } from '@/types/stream';
3
3
  import type { TokenCounter } from '@/types/run';
4
- import { ContentTypes } from '@/common';
4
+ import { ContentTypes, Providers } from '@/common';
5
+
5
6
  export type PruneMessagesFactoryParams = {
7
+ provider?: Providers;
6
8
  maxTokens: number;
7
9
  startIndex: number;
8
10
  tokenCounter: TokenCounter;
@@ -20,7 +22,7 @@ function isIndexInContext(arrayA: unknown[], arrayB: unknown[], targetIndex: num
20
22
  return targetIndex >= startingIndexInA;
21
23
  }
22
24
 
23
- function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText): MessageContentComplex[] {
25
+ function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText | ReasoningContentText): MessageContentComplex[] {
24
26
  const content: MessageContentComplex[] = Array.isArray(message.content)
25
27
  ? message.content as MessageContentComplex[]
26
28
  : [{
@@ -52,6 +54,13 @@ export function calculateTotalTokens(usage: Partial<UsageMetadata>): UsageMetada
52
54
  };
53
55
  }
54
56
 
57
+ export type PruningResult = {
58
+ context: BaseMessage[];
59
+ remainingContextTokens: number;
60
+ messagesToRefine: BaseMessage[];
61
+ thinkingStartIndex?: number;
62
+ };
63
+
55
64
  /**
56
65
  * Processes an array of messages and returns a context of messages that fit within a specified token limit.
57
66
  * It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
@@ -65,20 +74,19 @@ export function getMessagesWithinTokenLimit({
65
74
  indexTokenCountMap,
66
75
  startType: _startType,
67
76
  thinkingEnabled,
68
- /** We may need to use this when recalculating */
69
77
  tokenCounter,
78
+ thinkingStartIndex: _thinkingStartIndex = -1,
79
+ reasoningType = ContentTypes.THINKING,
70
80
  }: {
71
81
  messages: BaseMessage[];
72
82
  maxContextTokens: number;
73
83
  indexTokenCountMap: Record<string, number | undefined>;
74
- tokenCounter: TokenCounter;
75
- startType?: string;
84
+ startType?: string | string[];
76
85
  thinkingEnabled?: boolean;
77
- }): {
78
- context: BaseMessage[];
79
- remainingContextTokens: number;
80
- messagesToRefine: BaseMessage[];
81
- } {
86
+ tokenCounter: TokenCounter;
87
+ thinkingStartIndex?: number;
88
+ reasoningType?: ContentTypes.THINKING | ContentTypes.REASONING_CONTENT;
89
+ }): PruningResult {
82
90
  // Every reply is primed with <|start|>assistant<|message|>, so we
83
91
  // start with 3 tokens for the label after all messages have been counted.
84
92
  let currentTokenCount = 3;
@@ -96,12 +104,19 @@ export function getMessagesWithinTokenLimit({
96
104
  * */
97
105
  let context: Array<BaseMessage | undefined> = [];
98
106
 
99
- let thinkingStartIndex = -1;
107
+ let thinkingStartIndex = _thinkingStartIndex;
100
108
  let thinkingEndIndex = -1;
101
- let thinkingBlock: ThinkingContentText | undefined;
109
+ let thinkingBlock: ThinkingContentText | ReasoningContentText | undefined;
102
110
  const endIndex = instructions != null ? 1 : 0;
103
111
  const prunedMemory: BaseMessage[] = [];
104
112
 
113
+ if (_thinkingStartIndex > -1) {
114
+ const thinkingMessageContent = _messages[_thinkingStartIndex]?.content;
115
+ if (Array.isArray(thinkingMessageContent)) {
116
+ thinkingBlock = thinkingMessageContent.find((content) => content.type === reasoningType) as ThinkingContentText | undefined;
117
+ }
118
+ }
119
+
105
120
  if (currentTokenCount < remainingContextTokens) {
106
121
  let currentIndex = messages.length;
107
122
  while (messages.length > 0 && currentTokenCount < remainingContextTokens && currentIndex > endIndex) {
@@ -116,7 +131,7 @@ export function getMessagesWithinTokenLimit({
116
131
  thinkingEndIndex = currentIndex;
117
132
  }
118
133
  if (thinkingEndIndex > -1 && !thinkingBlock && thinkingStartIndex < 0 && messageType === 'ai' && Array.isArray(poppedMessage.content)) {
119
- thinkingBlock = (poppedMessage.content.find((content) => content.type === ContentTypes.THINKING)) as ThinkingContentText | undefined;
134
+ thinkingBlock = (poppedMessage.content.find((content) => content.type === reasoningType)) as ThinkingContentText | undefined;
120
135
  thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
121
136
  }
122
137
  /** False start, the latest message was not part of a multi-assistant/tool sequence of messages */
@@ -135,22 +150,34 @@ export function getMessagesWithinTokenLimit({
135
150
  currentTokenCount += tokenCount;
136
151
  } else {
137
152
  prunedMemory.push(poppedMessage);
138
- if (thinkingEndIndex > -1) {
153
+ if (thinkingEndIndex > -1 && thinkingStartIndex < 0) {
139
154
  continue;
140
155
  }
141
156
  break;
142
157
  }
143
158
  }
144
159
 
145
- if (thinkingEndIndex > -1 && context[context.length - 1]?.getType() === 'tool') {
146
- startType = 'ai';
160
+ if (context[context.length - 1]?.getType() === 'tool') {
161
+ startType = ['ai', 'human'];
147
162
  }
148
163
 
149
- if (startType != null && startType && context.length > 0) {
150
- const requiredTypeIndex = context.findIndex(msg => msg?.getType() === startType);
164
+ if (startType != null && startType.length > 0 && context.length > 0) {
165
+ let requiredTypeIndex = -1;
166
+
167
+ let totalTokens = 0;
168
+ for (let i = context.length - 1; i >= 0; i--) {
169
+ const currentType = context[i]?.getType() ?? '';
170
+ if (Array.isArray(startType) ? startType.includes(currentType) : currentType === startType) {
171
+ requiredTypeIndex = i + 1;
172
+ break;
173
+ }
174
+ const originalIndex = originalLength - 1 - i;
175
+ totalTokens += indexTokenCountMap[originalIndex] ?? 0;
176
+ }
151
177
 
152
178
  if (requiredTypeIndex > 0) {
153
- context = context.slice(requiredTypeIndex);
179
+ currentTokenCount -= totalTokens;
180
+ context = context.slice(0, requiredTypeIndex);
154
181
  }
155
182
  }
156
183
  }
@@ -161,12 +188,16 @@ export function getMessagesWithinTokenLimit({
161
188
  }
162
189
 
163
190
  remainingContextTokens -= currentTokenCount;
164
- const result = {
191
+ const result: PruningResult = {
165
192
  remainingContextTokens,
166
193
  context: [] as BaseMessage[],
167
194
  messagesToRefine: prunedMemory,
168
195
  };
169
196
 
197
+ if (thinkingStartIndex > -1) {
198
+ result.thinkingStartIndex = thinkingStartIndex;
199
+ }
200
+
170
201
  if (prunedMemory.length === 0 || thinkingEndIndex < 0 || (thinkingStartIndex > -1 && isIndexInContext(_messages, context, thinkingStartIndex))) {
171
202
  // we reverse at this step to ensure the context is in the correct order for the model, and we need to work backwards
172
203
  result.context = context.reverse() as BaseMessage[];
@@ -270,6 +301,7 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
270
301
  let lastTurnStartIndex = factoryParams.startIndex;
271
302
  let lastCutOffIndex = 0;
272
303
  let totalTokens = (Object.values(indexTokenCountMap)).reduce((a, b) => a + b, 0);
304
+ let runThinkingStartIndex = -1;
273
305
  return function pruneMessages(params: PruneMessagesParams): {
274
306
  context: BaseMessage[];
275
307
  indexTokenCountMap: Record<string, number>;
@@ -339,15 +371,19 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
339
371
  return { context: params.messages, indexTokenCountMap };
340
372
  }
341
373
 
342
- const { context } = getMessagesWithinTokenLimit({
374
+ const { context, thinkingStartIndex } = getMessagesWithinTokenLimit({
343
375
  maxContextTokens: factoryParams.maxTokens,
344
376
  messages: params.messages,
345
377
  indexTokenCountMap,
346
378
  startType: params.startType,
347
379
  thinkingEnabled: factoryParams.thinkingEnabled,
348
380
  tokenCounter: factoryParams.tokenCounter,
381
+ reasoningType: factoryParams.provider === Providers.BEDROCK ? ContentTypes.REASONING_CONTENT : ContentTypes.THINKING,
382
+ thinkingStartIndex: factoryParams.thinkingEnabled === true ? runThinkingStartIndex : undefined,
349
383
  });
350
- lastCutOffIndex = Math.max(params.messages.length - context.length, 0);
384
+ runThinkingStartIndex = thinkingStartIndex ?? -1;
385
+ /** The index is the first value of `context`, index relative to `params.messages` */
386
+ lastCutOffIndex = Math.max(params.messages.length - (context.length - (context[0]?.getType() === 'system' ? 1 : 0)), 0);
351
387
 
352
388
  return { context, indexTokenCountMap };
353
389
  };
@@ -1,7 +1,7 @@
1
1
  // src/specs/prune.test.ts
2
2
  import { config } from 'dotenv';
3
3
  config();
4
- import { HumanMessage, AIMessage, SystemMessage, BaseMessage } from '@langchain/core/messages';
4
+ import { HumanMessage, AIMessage, SystemMessage, BaseMessage, ToolMessage } from '@langchain/core/messages';
5
5
  import type { RunnableConfig } from '@langchain/core/runnables';
6
6
  import type { UsageMetadata } from '@langchain/core/messages';
7
7
  import type * as t from '@/types';
@@ -512,6 +512,187 @@ describe('Prune Messages Tests', () => {
512
512
  });
513
513
  });
514
514
 
515
+ describe('Tool Message Handling', () => {
516
+ it('should ensure context does not start with a tool message by finding an AI message', () => {
517
+ const tokenCounter = createTestTokenCounter();
518
+ const messages = [
519
+ new SystemMessage('System instruction'),
520
+ new AIMessage('AI message 1'),
521
+ new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
522
+ new AIMessage('AI message 2'),
523
+ new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' })
524
+ ];
525
+
526
+ const indexTokenCountMap = {
527
+ 0: 17, // System instruction
528
+ 1: 12, // AI message 1
529
+ 2: 13, // Tool result 1
530
+ 3: 12, // AI message 2
531
+ 4: 13 // Tool result 2
532
+ };
533
+
534
+ // Create a pruneMessages function with a token limit that will only include the last few messages
535
+ const pruneMessages = createPruneMessages({
536
+ maxTokens: 58, // Only enough for system + last 3 messages + 3, but should not include a parent-less tool message
537
+ startIndex: 0,
538
+ tokenCounter,
539
+ indexTokenCountMap: { ...indexTokenCountMap }
540
+ });
541
+
542
+ const result = pruneMessages({ messages });
543
+
544
+ // The context should include the system message, AI message 2, and Tool result 2
545
+ // It should NOT start with Tool result 2 alone
546
+ expect(result.context.length).toBe(3);
547
+ expect(result.context[0]).toBe(messages[0]); // System message
548
+ expect(result.context[1]).toBe(messages[3]); // AI message 2
549
+ expect(result.context[2]).toBe(messages[4]); // Tool result 2
550
+ });
551
+
552
+ it('should ensure context does not start with a tool message by finding a human message', () => {
553
+ const tokenCounter = createTestTokenCounter();
554
+ const messages = [
555
+ new SystemMessage('System instruction'),
556
+ new HumanMessage('Human message 1'),
557
+ new AIMessage('AI message 1'),
558
+ new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
559
+ new HumanMessage('Human message 2'),
560
+ new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' })
561
+ ];
562
+
563
+ const indexTokenCountMap = {
564
+ 0: 17, // System instruction
565
+ 1: 15, // Human message 1
566
+ 2: 12, // AI message 1
567
+ 3: 13, // Tool result 1
568
+ 4: 15, // Human message 2
569
+ 5: 13 // Tool result 2
570
+ };
571
+
572
+ // Create a pruneMessages function with a token limit that will only include the last few messages
573
+ const pruneMessages = createPruneMessages({
574
+ maxTokens: 48, // Only enough for system + last 2 messages
575
+ startIndex: 0,
576
+ tokenCounter,
577
+ indexTokenCountMap: { ...indexTokenCountMap }
578
+ });
579
+
580
+ const result = pruneMessages({ messages });
581
+
582
+ // The context should include the system message, Human message 2, and Tool result 2
583
+ // It should NOT start with Tool result 2 alone
584
+ expect(result.context.length).toBe(3);
585
+ expect(result.context[0]).toBe(messages[0]); // System message
586
+ expect(result.context[1]).toBe(messages[4]); // Human message 2
587
+ expect(result.context[2]).toBe(messages[5]); // Tool result 2
588
+ });
589
+
590
+ it('should handle the case where a tool message is followed by an AI message', () => {
591
+ const tokenCounter = createTestTokenCounter();
592
+ const messages = [
593
+ new SystemMessage('System instruction'),
594
+ new HumanMessage('Human message'),
595
+ new AIMessage('AI message with tool use'),
596
+ new ToolMessage({ content: 'Tool result', tool_call_id: 'tool1' }),
597
+ new AIMessage('AI message after tool')
598
+ ];
599
+
600
+ const indexTokenCountMap = {
601
+ 0: 17, // System instruction
602
+ 1: 13, // Human message
603
+ 2: 22, // AI message with tool use
604
+ 3: 11, // Tool result
605
+ 4: 19 // AI message after tool
606
+ };
607
+
608
+ const pruneMessages = createPruneMessages({
609
+ maxTokens: 50,
610
+ startIndex: 0,
611
+ tokenCounter,
612
+ indexTokenCountMap: { ...indexTokenCountMap }
613
+ });
614
+
615
+ const result = pruneMessages({ messages });
616
+
617
+ expect(result.context.length).toBe(2);
618
+ expect(result.context[0]).toBe(messages[0]); // System message
619
+ expect(result.context[1]).toBe(messages[4]); // AI message after tool
620
+ });
621
+
622
+ it('should handle the case where a tool message is followed by a human message', () => {
623
+ const tokenCounter = createTestTokenCounter();
624
+ const messages = [
625
+ new SystemMessage('System instruction'),
626
+ new HumanMessage('Human message 1'),
627
+ new AIMessage('AI message with tool use'),
628
+ new ToolMessage({ content: 'Tool result', tool_call_id: 'tool1' }),
629
+ new HumanMessage('Human message 2')
630
+ ];
631
+
632
+ const indexTokenCountMap = {
633
+ 0: 17, // System instruction
634
+ 1: 15, // Human message 1
635
+ 2: 22, // AI message with tool use
636
+ 3: 11, // Tool result
637
+ 4: 15 // Human message 2
638
+ };
639
+
640
+ const pruneMessages = createPruneMessages({
641
+ maxTokens: 46,
642
+ startIndex: 0,
643
+ tokenCounter,
644
+ indexTokenCountMap: { ...indexTokenCountMap }
645
+ });
646
+
647
+ const result = pruneMessages({ messages });
648
+
649
+ expect(result.context.length).toBe(2);
650
+ expect(result.context[0]).toBe(messages[0]); // System message
651
+ expect(result.context[1]).toBe(messages[4]); // Human message 2
652
+ });
653
+
654
+ it('should handle complex sequence with multiple tool messages', () => {
655
+ const tokenCounter = createTestTokenCounter();
656
+ const messages = [
657
+ new SystemMessage('System instruction'),
658
+ new HumanMessage('Human message 1'),
659
+ new AIMessage('AI message 1 with tool use'),
660
+ new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
661
+ new AIMessage('AI message 2 with tool use'),
662
+ new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' }),
663
+ new AIMessage('AI message 3 with tool use'),
664
+ new ToolMessage({ content: 'Tool result 3', tool_call_id: 'tool3' })
665
+ ];
666
+
667
+ const indexTokenCountMap = {
668
+ 0: 17, // System instruction
669
+ 1: 15, // Human message 1
670
+ 2: 26, // AI message 1 with tool use
671
+ 3: 13, // Tool result 1
672
+ 4: 26, // AI message 2 with tool use
673
+ 5: 13, // Tool result 2
674
+ 6: 26, // AI message 3 with tool use
675
+ 7: 13 // Tool result 3
676
+ };
677
+
678
+ const pruneMessages = createPruneMessages({
679
+ maxTokens: 111,
680
+ startIndex: 0,
681
+ tokenCounter,
682
+ indexTokenCountMap: { ...indexTokenCountMap }
683
+ });
684
+
685
+ const result = pruneMessages({ messages });
686
+
687
+ expect(result.context.length).toBe(5);
688
+ expect(result.context[0]).toBe(messages[0]); // System message
689
+ expect(result.context[1]).toBe(messages[4]); // AI message 2 with tool use
690
+ expect(result.context[2]).toBe(messages[5]); // Tool result 2
691
+ expect(result.context[3]).toBe(messages[6]); // AI message 3 with tool use
692
+ expect(result.context[4]).toBe(messages[7]); // Tool result 3
693
+ });
694
+ });
695
+
515
696
  describe('Integration with Run', () => {
516
697
  it('should initialize Run with custom token counter and process messages', async () => {
517
698
  const provider = Providers.OPENAI;
@@ -235,9 +235,9 @@ describe('Token Distribution Edge Case Tests', () => {
235
235
  });
236
236
 
237
237
  // Add two more messages
238
+ messages.push(new HumanMessage('Message 4'));
238
239
  const extendedMessages = [
239
240
  ...messages,
240
- new HumanMessage('Message 4'),
241
241
  new AIMessage('Response 4')
242
242
  ];
243
243
 
@@ -257,6 +257,7 @@ describe('Token Distribution Edge Case Tests', () => {
257
257
  // The context should include the system message and some of the latest messages
258
258
  expect(thirdResult.context.length).toBeGreaterThan(0);
259
259
  expect(thirdResult.context[0].content).toBe('System instruction');
260
+ expect(thirdResult.context[1].content).toBe('Response 4');
260
261
 
261
262
  // Find which messages are in the final context
262
263
  const contextMessageIndices = thirdResult.context.map(msg => {
@@ -282,14 +283,12 @@ describe('Token Distribution Edge Case Tests', () => {
282
283
  // Verify that messages not in the context have their original token counts or previously adjusted values
283
284
  for (let i = 0; i < extendedMessages.length; i++) {
284
285
  if (!contextMessageIndices.includes(i)) {
285
- // This message is not in the context, so its token count should not have been adjusted in the last operation
286
286
  const expectedValue = i < messages.length
287
287
  ? (secondResult.indexTokenCountMap[i] || indexTokenCountMap[i])
288
- : (indexTokenCountMap as Record<string, number | undefined>)[i] ?? indexTokenCountMap[i - 1];
288
+ : (indexTokenCountMap as Record<string, number | undefined>)[i] ?? 0;
289
289
 
290
- // For defined values, we can check that they're close to what we expect
291
290
  const difference = Math.abs((thirdResult.indexTokenCountMap[i] || 0) - expectedValue);
292
- expect(difference).toBeLessThan(20); // Allow for some implementation differences
291
+ expect(difference).toBe(0);
293
292
  }
294
293
  }
295
294
  });
package/src/types/llm.ts CHANGED
@@ -35,16 +35,23 @@ export type AzureClientOptions = (Partial<OpenAIChatInput> & Partial<AzureOpenAI
35
35
  } & BaseChatModelParams & {
36
36
  configuration?: OAIClientOptions;
37
37
  });
38
-
38
+ export type ThinkingConfig = AnthropicInput['thinking'];
39
39
  export type ChatOpenAIToolType = BindToolsInput | OpenAIClient.ChatCompletionTool;
40
40
  export type CommonToolType = StructuredTool | ChatOpenAIToolType;
41
-
41
+ export type AnthropicReasoning = {
42
+ thinking?: ThinkingConfig | boolean;
43
+ thinkingBudget?: number;
44
+ };
42
45
  export type OpenAIClientOptions = ChatOpenAIFields;
43
46
  export type OllamaClientOptions = ChatOllamaInput;
44
47
  export type AnthropicClientOptions = AnthropicInput;
45
48
  export type MistralAIClientOptions = ChatMistralAIInput;
46
49
  export type VertexAIClientOptions = ChatVertexAIInput;
47
50
  export type BedrockClientOptions = BedrockChatFields;
51
+ export type BedrockAnthropicInput = ChatBedrockConverseInput & {
52
+ additionalModelRequestFields?: ChatBedrockConverseInput['additionalModelRequestFields'] &
53
+ AnthropicReasoning;
54
+ };
48
55
  export type BedrockConverseClientOptions = ChatBedrockConverseInput;
49
56
  export type GoogleClientOptions = GoogleGenerativeAIChatInput;
50
57
  export type DeepSeekClientOptions = ChatDeepSeekCallOptions;