@librechat/agents 3.1.98 → 3.1.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -364,7 +364,7 @@ function addThinkingBlock(message, thinkingBlock) {
364
364
  },
365
365
  ];
366
366
  /** Edge case, the message already has the thinking block */
367
- if (content[0].type === thinkingBlock.type) {
367
+ if (content[0]?.type === thinkingBlock.type) {
368
368
  return message;
369
369
  }
370
370
  content.unshift(thinkingBlock);
@@ -397,6 +397,26 @@ function calculateTotalTokens(usage) {
397
397
  total_tokens: totalInputTokens + totalOutputTokens,
398
398
  };
399
399
  }
400
+ /**
401
+ * Locates a reasoning block in assistant content. Reasoning blocks carry
402
+ * provider-specific `type` tags: Anthropic emits `thinking`, while Bedrock and
403
+ * OpenAI-compatible reasoning providers (DeepSeek-R1, DashScope/Qwen-thinking)
404
+ * emit `reasoning_content`. DeepSeek/Qwen route through the `THINKING` default
405
+ * even though their blocks are `reasoning_content` and aren't normalized
406
+ * upstream, so for the `THINKING` case we also accept `reasoning_content` — this
407
+ * is what fixes issue #191.
408
+ *
409
+ * The broadening is intentionally one-directional. A Bedrock run
410
+ * (`REASONING_CONTENT`) must NOT match an Anthropic `thinking` block: the
411
+ * Bedrock input converter rejects `thinking` blocks outright
412
+ * (`src/llm/bedrock/utils/message_inputs.ts`), so reattaching one to a
413
+ * surviving message would make the request fail before it is sent.
414
+ */
415
+ function findReasoningBlock(content, reasoningType) {
416
+ return content.find((part) => part.type === reasoningType ||
417
+ (reasoningType === ContentTypes.THINKING &&
418
+ part.type === ContentTypes.REASONING_CONTENT));
419
+ }
400
420
  /**
401
421
  * Processes an array of messages and returns a context of messages that fit within a specified token limit.
402
422
  * It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
@@ -429,7 +449,7 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
429
449
  if (_thinkingStartIndex > -1) {
430
450
  const thinkingMessageContent = messages[_thinkingStartIndex]?.content;
431
451
  if (Array.isArray(thinkingMessageContent)) {
432
- thinkingBlock = thinkingMessageContent.find((content) => content.type === reasoningType);
452
+ thinkingBlock = findReasoningBlock(thinkingMessageContent, reasoningType);
433
453
  }
434
454
  }
435
455
  if (currentTokenCount < remainingContextTokens) {
@@ -456,7 +476,7 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
456
476
  thinkingStartIndex < 0 &&
457
477
  messageType === 'ai' &&
458
478
  Array.isArray(poppedMessage.content)) {
459
- thinkingBlock = poppedMessage.content.find((content) => content.type === reasoningType);
479
+ thinkingBlock = findReasoningBlock(poppedMessage.content, reasoningType);
460
480
  thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
461
481
  }
462
482
  /**
@@ -540,11 +560,28 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
540
560
  result.context = context.reverse();
541
561
  return result;
542
562
  }
543
- if (thinkingEndIndex > -1 && thinkingStartIndex < 0) {
544
- throw new Error('The payload is malformed. There is a thinking sequence but no "AI" messages with thinking blocks.');
545
- }
546
- if (!thinkingBlock) {
547
- throw new Error('The payload is malformed. There is a thinking sequence but no thinking block found.');
563
+ /**
564
+ * A trailing reasoning sequence was detected but its block could not be
565
+ * located in the surviving context. Rather than throw — which permanently
566
+ * bricks the conversation, re-firing on every retry of the same thread (see
567
+ * issue #191) return the partially-pruned context and let the provider
568
+ * surface a real, recoverable error if the payload is genuinely malformed.
569
+ * Strict providers (Anthropic) reject it cleanly; lenient ones (DeepSeek,
570
+ * Qwen) proceed. The pruner cannot know which applies, so it must not be the
571
+ * one to make the failure fatal.
572
+ */
573
+ if ((thinkingEndIndex > -1 && thinkingStartIndex < 0) || !thinkingBlock) {
574
+ /**
575
+ * No block was located, so any `thinkingStartIndex` set above came from a
576
+ * stale carried-over index pointing at a block-less message. Drop it:
577
+ * `createPruneMessages` persists the returned index as
578
+ * `runThinkingStartIndex`, and a stale value would suppress the trailing
579
+ * scan (`thinkingStartIndex < 0`) on later turns, causing a real reasoning
580
+ * block to be missed and never reattached.
581
+ */
582
+ delete result.thinkingStartIndex;
583
+ result.context = context.reverse();
584
+ return result;
548
585
  }
549
586
  let assistantIndex = -1;
550
587
  for (let i = 0; i < context.length; i++) {