@librechat/agents 2.4.13 → 2.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,13 @@
1
- import { AIMessage, BaseMessage, UsageMetadata } from '@langchain/core/messages';
2
- import type { ThinkingContentText, MessageContentComplex, ReasoningContentText } from '@/types/stream';
1
+ import {
2
+ AIMessage,
3
+ BaseMessage,
4
+ UsageMetadata,
5
+ } from '@langchain/core/messages';
6
+ import type {
7
+ ThinkingContentText,
8
+ MessageContentComplex,
9
+ ReasoningContentText,
10
+ } from '@/types/stream';
3
11
  import type { TokenCounter } from '@/types/run';
4
12
  import { ContentTypes, Providers } from '@/common';
5
13
 
@@ -8,27 +16,36 @@ export type PruneMessagesFactoryParams = {
8
16
  maxTokens: number;
9
17
  startIndex: number;
10
18
  tokenCounter: TokenCounter;
11
- indexTokenCountMap: Record<string, number>;
19
+ indexTokenCountMap: Record<string, number | undefined>;
12
20
  thinkingEnabled?: boolean;
13
21
  };
14
22
  export type PruneMessagesParams = {
15
23
  messages: BaseMessage[];
16
24
  usageMetadata?: Partial<UsageMetadata>;
17
25
  startType?: ReturnType<BaseMessage['getType']>;
18
- }
26
+ };
19
27
 
20
- function isIndexInContext(arrayA: unknown[], arrayB: unknown[], targetIndex: number): boolean {
28
+ function isIndexInContext(
29
+ arrayA: unknown[],
30
+ arrayB: unknown[],
31
+ targetIndex: number
32
+ ): boolean {
21
33
  const startingIndexInA = arrayA.length - arrayB.length;
22
34
  return targetIndex >= startingIndexInA;
23
35
  }
24
36
 
25
- function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText | ReasoningContentText): AIMessage {
37
+ function addThinkingBlock(
38
+ message: AIMessage,
39
+ thinkingBlock: ThinkingContentText | ReasoningContentText
40
+ ): AIMessage {
26
41
  const content: MessageContentComplex[] = Array.isArray(message.content)
27
- ? message.content as MessageContentComplex[]
28
- : [{
29
- type: ContentTypes.TEXT,
30
- text: message.content,
31
- }];
42
+ ? (message.content as MessageContentComplex[])
43
+ : [
44
+ {
45
+ type: ContentTypes.TEXT,
46
+ text: message.content,
47
+ },
48
+ ];
32
49
  /** Edge case, the message already has the thinking block */
33
50
  if (content[0].type === thinkingBlock.type) {
34
51
  return message;
@@ -36,7 +53,7 @@ function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText
36
53
  content.unshift(thinkingBlock);
37
54
  return new AIMessage({
38
55
  ...message,
39
- content
56
+ content,
40
57
  });
41
58
  }
42
59
 
@@ -46,7 +63,9 @@ function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText
46
63
  * @param usage The usage metadata object containing token information
47
64
  * @returns An object containing the total input and output tokens
48
65
  */
49
- export function calculateTotalTokens(usage: Partial<UsageMetadata>): UsageMetadata {
66
+ export function calculateTotalTokens(
67
+ usage: Partial<UsageMetadata>
68
+ ): UsageMetadata {
50
69
  const baseInputTokens = Number(usage.input_tokens) || 0;
51
70
  const cacheCreation = Number(usage.input_token_details?.cache_creation) || 0;
52
71
  const cacheRead = Number(usage.input_token_details?.cache_read) || 0;
@@ -57,7 +76,7 @@ export function calculateTotalTokens(usage: Partial<UsageMetadata>): UsageMetada
57
76
  return {
58
77
  input_tokens: totalInputTokens,
59
78
  output_tokens: totalOutputTokens,
60
- total_tokens: totalInputTokens + totalOutputTokens
79
+ total_tokens: totalInputTokens + totalOutputTokens,
61
80
  };
62
81
  }
63
82
 
@@ -97,8 +116,10 @@ export function getMessagesWithinTokenLimit({
97
116
  // Every reply is primed with <|start|>assistant<|message|>, so we
98
117
  // start with 3 tokens for the label after all messages have been counted.
99
118
  let currentTokenCount = 3;
100
- const instructions = _messages[0]?.getType() === 'system' ? _messages[0] : undefined;
101
- const instructionsTokenCount = instructions != null ? indexTokenCountMap[0] ?? 0 : 0;
119
+ const instructions =
120
+ _messages[0]?.getType() === 'system' ? _messages[0] : undefined;
121
+ const instructionsTokenCount =
122
+ instructions != null ? (indexTokenCountMap[0] ?? 0) : 0;
102
123
  const initialContextTokens = maxContextTokens - instructionsTokenCount;
103
124
  let remainingContextTokens = initialContextTokens;
104
125
  let startType = _startType;
@@ -120,13 +141,19 @@ export function getMessagesWithinTokenLimit({
120
141
  if (_thinkingStartIndex > -1) {
121
142
  const thinkingMessageContent = messages[_thinkingStartIndex]?.content;
122
143
  if (Array.isArray(thinkingMessageContent)) {
123
- thinkingBlock = thinkingMessageContent.find((content) => content.type === reasoningType) as ThinkingContentText | undefined;
144
+ thinkingBlock = thinkingMessageContent.find(
145
+ (content) => content.type === reasoningType
146
+ ) as ThinkingContentText | undefined;
124
147
  }
125
148
  }
126
149
 
127
150
  if (currentTokenCount < remainingContextTokens) {
128
151
  let currentIndex = messages.length;
129
- while (messages.length > 0 && currentTokenCount < remainingContextTokens && currentIndex > endIndex) {
152
+ while (
153
+ messages.length > 0 &&
154
+ currentTokenCount < remainingContextTokens &&
155
+ currentIndex > endIndex
156
+ ) {
130
157
  currentIndex--;
131
158
  if (messages.length === 1 && instructions) {
132
159
  break;
@@ -134,25 +161,42 @@ export function getMessagesWithinTokenLimit({
134
161
  const poppedMessage = messages.pop();
135
162
  if (!poppedMessage) continue;
136
163
  const messageType = poppedMessage.getType();
137
- if (thinkingEnabled === true && thinkingEndIndex === -1 && (currentIndex === (originalLength - 1)) && (messageType === 'ai' || messageType === 'tool')) {
164
+ if (
165
+ thinkingEnabled === true &&
166
+ thinkingEndIndex === -1 &&
167
+ currentIndex === originalLength - 1 &&
168
+ (messageType === 'ai' || messageType === 'tool')
169
+ ) {
138
170
  thinkingEndIndex = currentIndex;
139
171
  }
140
- if (thinkingEndIndex > -1 && !thinkingBlock && thinkingStartIndex < 0 && messageType === 'ai' && Array.isArray(poppedMessage.content)) {
141
- thinkingBlock = (poppedMessage.content.find((content) => content.type === reasoningType)) as ThinkingContentText | undefined;
172
+ if (
173
+ thinkingEndIndex > -1 &&
174
+ !thinkingBlock &&
175
+ thinkingStartIndex < 0 &&
176
+ messageType === 'ai' &&
177
+ Array.isArray(poppedMessage.content)
178
+ ) {
179
+ thinkingBlock = poppedMessage.content.find(
180
+ (content) => content.type === reasoningType
181
+ ) as ThinkingContentText | undefined;
142
182
  thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
143
183
  }
144
184
  /** False start, the latest message was not part of a multi-assistant/tool sequence of messages */
145
185
  if (
146
- thinkingEndIndex > -1
147
- && currentIndex === (thinkingEndIndex - 1)
148
- && (messageType !== 'ai' && messageType !== 'tool')
186
+ thinkingEndIndex > -1 &&
187
+ currentIndex === thinkingEndIndex - 1 &&
188
+ messageType !== 'ai' &&
189
+ messageType !== 'tool'
149
190
  ) {
150
191
  thinkingEndIndex = -1;
151
192
  }
152
193
 
153
194
  const tokenCount = indexTokenCountMap[currentIndex] ?? 0;
154
195
 
155
- if (prunedMemory.length === 0 && ((currentTokenCount + tokenCount) <= remainingContextTokens)) {
196
+ if (
197
+ prunedMemory.length === 0 &&
198
+ currentTokenCount + tokenCount <= remainingContextTokens
199
+ ) {
156
200
  context.push(poppedMessage);
157
201
  currentTokenCount += tokenCount;
158
202
  } else {
@@ -174,7 +218,11 @@ export function getMessagesWithinTokenLimit({
174
218
  let totalTokens = 0;
175
219
  for (let i = context.length - 1; i >= 0; i--) {
176
220
  const currentType = context[i]?.getType() ?? '';
177
- if (Array.isArray(startType) ? startType.includes(currentType) : currentType === startType) {
221
+ if (
222
+ Array.isArray(startType)
223
+ ? startType.includes(currentType)
224
+ : currentType === startType
225
+ ) {
178
226
  requiredTypeIndex = i + 1;
179
227
  break;
180
228
  }
@@ -205,18 +253,27 @@ export function getMessagesWithinTokenLimit({
205
253
  result.thinkingStartIndex = thinkingStartIndex;
206
254
  }
207
255
 
208
- if (prunedMemory.length === 0 || thinkingEndIndex < 0 || (thinkingStartIndex > -1 && isIndexInContext(_messages, context, thinkingStartIndex))) {
256
+ if (
257
+ prunedMemory.length === 0 ||
258
+ thinkingEndIndex < 0 ||
259
+ (thinkingStartIndex > -1 &&
260
+ isIndexInContext(_messages, context, thinkingStartIndex))
261
+ ) {
209
262
  // we reverse at this step to ensure the context is in the correct order for the model, and we need to work backwards
210
263
  result.context = context.reverse() as BaseMessage[];
211
264
  return result;
212
265
  }
213
266
 
214
267
  if (thinkingEndIndex > -1 && thinkingStartIndex < 0) {
215
- throw new Error('The payload is malformed. There is a thinking sequence but no "AI" messages with thinking blocks.');
268
+ throw new Error(
269
+ 'The payload is malformed. There is a thinking sequence but no "AI" messages with thinking blocks.'
270
+ );
216
271
  }
217
272
 
218
273
  if (!thinkingBlock) {
219
- throw new Error('The payload is malformed. There is a thinking sequence but no thinking block found.');
274
+ throw new Error(
275
+ 'The payload is malformed. There is a thinking sequence but no thinking block found.'
276
+ );
220
277
  }
221
278
 
222
279
  // Since we have a thinking sequence, we need to find the last assistant message
@@ -235,13 +292,20 @@ export function getMessagesWithinTokenLimit({
235
292
  }
236
293
 
237
294
  if (assistantIndex === -1) {
238
- throw new Error('The payload is malformed. There is a thinking sequence but no "AI" messages to append thinking blocks to.');
295
+ throw new Error(
296
+ 'The payload is malformed. There is a thinking sequence but no "AI" messages to append thinking blocks to.'
297
+ );
239
298
  }
240
299
 
241
300
  thinkingStartIndex = originalLength - 1 - assistantIndex;
242
- const thinkingTokenCount = tokenCounter(new AIMessage({ content: [thinkingBlock] }));
301
+ const thinkingTokenCount = tokenCounter(
302
+ new AIMessage({ content: [thinkingBlock] })
303
+ );
243
304
  const newRemainingCount = remainingContextTokens - thinkingTokenCount;
244
- const newMessage = addThinkingBlock(context[assistantIndex] as AIMessage, thinkingBlock);
305
+ const newMessage = addThinkingBlock(
306
+ context[assistantIndex] as AIMessage,
307
+ thinkingBlock
308
+ );
245
309
  context[assistantIndex] = newMessage;
246
310
  if (newRemainingCount > 0) {
247
311
  result.context = context.reverse() as BaseMessage[];
@@ -250,18 +314,23 @@ export function getMessagesWithinTokenLimit({
250
314
 
251
315
  const thinkingMessage: AIMessage = context[assistantIndex] as AIMessage;
252
316
  // now we need to an additional round of pruning but making the thinking block fit
253
- const newThinkingMessageTokenCount = (indexTokenCountMap[thinkingStartIndex] ?? 0) + thinkingTokenCount;
317
+ const newThinkingMessageTokenCount =
318
+ (indexTokenCountMap[thinkingStartIndex] ?? 0) + thinkingTokenCount;
254
319
  remainingContextTokens = initialContextTokens - newThinkingMessageTokenCount;
255
320
  currentTokenCount = 3;
256
321
  let newContext: BaseMessage[] = [];
257
322
  const secondRoundMessages = [..._messages];
258
323
  let currentIndex = secondRoundMessages.length;
259
- while (secondRoundMessages.length > 0 && currentTokenCount < remainingContextTokens && currentIndex > thinkingStartIndex) {
324
+ while (
325
+ secondRoundMessages.length > 0 &&
326
+ currentTokenCount < remainingContextTokens &&
327
+ currentIndex > thinkingStartIndex
328
+ ) {
260
329
  currentIndex--;
261
330
  const poppedMessage = secondRoundMessages.pop();
262
331
  if (!poppedMessage) continue;
263
332
  const tokenCount = indexTokenCountMap[currentIndex] ?? 0;
264
- if ((currentTokenCount + tokenCount) <= remainingContextTokens) {
333
+ if (currentTokenCount + tokenCount <= remainingContextTokens) {
265
334
  newContext.push(poppedMessage);
266
335
  currentTokenCount += tokenCount;
267
336
  } else {
@@ -282,7 +351,11 @@ export function getMessagesWithinTokenLimit({
282
351
  let totalTokens = 0;
283
352
  for (let i = newContext.length - 1; i >= 0; i--) {
284
353
  const currentType = newContext[i]?.getType() ?? '';
285
- if (Array.isArray(startType) ? startType.includes(currentType) : currentType === startType) {
354
+ if (
355
+ Array.isArray(startType)
356
+ ? startType.includes(currentType)
357
+ : currentType === startType
358
+ ) {
286
359
  requiredTypeIndex = i + 1;
287
360
  break;
288
361
  }
@@ -320,23 +393,28 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
320
393
  const indexTokenCountMap = { ...factoryParams.indexTokenCountMap };
321
394
  let lastTurnStartIndex = factoryParams.startIndex;
322
395
  let lastCutOffIndex = 0;
323
- let totalTokens = (Object.values(indexTokenCountMap)).reduce((a, b) => a + b, 0);
396
+ let totalTokens = Object.values(indexTokenCountMap).reduce(
397
+ (a = 0, b = 0) => a + b,
398
+ 0
399
+ ) as number;
324
400
  let runThinkingStartIndex = -1;
325
401
  return function pruneMessages(params: PruneMessagesParams): {
326
402
  context: BaseMessage[];
327
- indexTokenCountMap: Record<string, number>;
403
+ indexTokenCountMap: Record<string, number | undefined>;
328
404
  } {
329
405
  let currentUsage: UsageMetadata | undefined;
330
- if (params.usageMetadata && (
331
- checkValidNumber(params.usageMetadata.input_tokens)
332
- || (
333
- checkValidNumber(params.usageMetadata.input_token_details)
334
- && (
335
- checkValidNumber(params.usageMetadata.input_token_details.cache_creation)
336
- || checkValidNumber(params.usageMetadata.input_token_details.cache_read)
337
- )
338
- )
339
- ) && checkValidNumber(params.usageMetadata.output_tokens)) {
406
+ if (
407
+ params.usageMetadata &&
408
+ (checkValidNumber(params.usageMetadata.input_tokens) ||
409
+ (checkValidNumber(params.usageMetadata.input_token_details) &&
410
+ (checkValidNumber(
411
+ params.usageMetadata.input_token_details.cache_creation
412
+ ) ||
413
+ checkValidNumber(
414
+ params.usageMetadata.input_token_details.cache_read
415
+ )))) &&
416
+ checkValidNumber(params.usageMetadata.output_tokens)
417
+ ) {
340
418
  currentUsage = calculateTotalTokens(params.usageMetadata);
341
419
  totalTokens = currentUsage.total_tokens;
342
420
  }
@@ -344,16 +422,18 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
344
422
  const newOutputs = new Set<number>();
345
423
  for (let i = lastTurnStartIndex; i < params.messages.length; i++) {
346
424
  const message = params.messages[i];
347
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
348
- if (i === lastTurnStartIndex && indexTokenCountMap[i] === undefined && currentUsage) {
425
+ if (
426
+ i === lastTurnStartIndex &&
427
+ indexTokenCountMap[i] === undefined &&
428
+ currentUsage
429
+ ) {
349
430
  indexTokenCountMap[i] = currentUsage.output_tokens;
350
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
351
431
  } else if (indexTokenCountMap[i] === undefined) {
352
432
  indexTokenCountMap[i] = factoryParams.tokenCounter(message);
353
433
  if (currentUsage) {
354
434
  newOutputs.add(i);
355
435
  }
356
- totalTokens += indexTokenCountMap[i];
436
+ totalTokens += indexTokenCountMap[i] ?? 0;
357
437
  }
358
438
  }
359
439
 
@@ -364,7 +444,7 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
364
444
  if (currentUsage) {
365
445
  let totalIndexTokens = 0;
366
446
  if (params.messages[0].getType() === 'system') {
367
- totalIndexTokens += indexTokenCountMap[0];
447
+ totalIndexTokens += indexTokenCountMap[0] ?? 0;
368
448
  }
369
449
  for (let i = lastCutOffIndex; i < params.messages.length; i++) {
370
450
  if (i === 0 && params.messages[0].getType() === 'system') {
@@ -373,24 +453,31 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
373
453
  if (newOutputs.has(i)) {
374
454
  continue;
375
455
  }
376
- totalIndexTokens += indexTokenCountMap[i];
456
+ totalIndexTokens += indexTokenCountMap[i] ?? 0;
377
457
  }
378
458
 
379
459
  // Calculate ratio based only on messages that remain in the context
380
460
  const ratio = currentUsage.total_tokens / totalIndexTokens;
381
- const isRatioSafe = ratio >= 1/3 && ratio <= 2.5;
461
+ const isRatioSafe = ratio >= 1 / 3 && ratio <= 2.5;
382
462
 
383
463
  // Apply the ratio adjustment only to messages at or after lastCutOffIndex, and only if the ratio is safe
384
464
  if (isRatioSafe) {
385
- if (params.messages[0].getType() === 'system' && lastCutOffIndex !== 0) {
386
- indexTokenCountMap[0] = Math.round(indexTokenCountMap[0] * ratio);
465
+ if (
466
+ params.messages[0].getType() === 'system' &&
467
+ lastCutOffIndex !== 0
468
+ ) {
469
+ indexTokenCountMap[0] = Math.round(
470
+ (indexTokenCountMap[0] ?? 0) * ratio
471
+ );
387
472
  }
388
473
 
389
474
  for (let i = lastCutOffIndex; i < params.messages.length; i++) {
390
475
  if (newOutputs.has(i)) {
391
476
  continue;
392
477
  }
393
- indexTokenCountMap[i] = Math.round(indexTokenCountMap[i] * ratio);
478
+ indexTokenCountMap[i] = Math.round(
479
+ (indexTokenCountMap[i] ?? 0) * ratio
480
+ );
394
481
  }
395
482
  }
396
483
  }
@@ -407,12 +494,22 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
407
494
  startType: params.startType,
408
495
  thinkingEnabled: factoryParams.thinkingEnabled,
409
496
  tokenCounter: factoryParams.tokenCounter,
410
- reasoningType: factoryParams.provider === Providers.BEDROCK ? ContentTypes.REASONING_CONTENT : ContentTypes.THINKING,
411
- thinkingStartIndex: factoryParams.thinkingEnabled === true ? runThinkingStartIndex : undefined,
497
+ reasoningType:
498
+ factoryParams.provider === Providers.BEDROCK
499
+ ? ContentTypes.REASONING_CONTENT
500
+ : ContentTypes.THINKING,
501
+ thinkingStartIndex:
502
+ factoryParams.thinkingEnabled === true
503
+ ? runThinkingStartIndex
504
+ : undefined,
412
505
  });
413
506
  runThinkingStartIndex = thinkingStartIndex ?? -1;
414
507
  /** The index is the first value of `context`, index relative to `params.messages` */
415
- lastCutOffIndex = Math.max(params.messages.length - (context.length - (context[0]?.getType() === 'system' ? 1 : 0)), 0);
508
+ lastCutOffIndex = Math.max(
509
+ params.messages.length -
510
+ (context.length - (context[0]?.getType() === 'system' ? 1 : 0)),
511
+ 0
512
+ );
416
513
 
417
514
  return { context, indexTokenCountMap };
418
515
  };