@librechat/agents 3.1.98 → 3.1.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -366,7 +366,7 @@ function addThinkingBlock(message, thinkingBlock) {
366
366
  },
367
367
  ];
368
368
  /** Edge case, the message already has the thinking block */
369
- if (content[0].type === thinkingBlock.type) {
369
+ if (content[0]?.type === thinkingBlock.type) {
370
370
  return message;
371
371
  }
372
372
  content.unshift(thinkingBlock);
@@ -399,6 +399,26 @@ function calculateTotalTokens(usage) {
399
399
  total_tokens: totalInputTokens + totalOutputTokens,
400
400
  };
401
401
  }
402
+ /**
403
+ * Locates a reasoning block in assistant content. Reasoning blocks carry
404
+ * provider-specific `type` tags: Anthropic emits `thinking`, while Bedrock and
405
+ * OpenAI-compatible reasoning providers (DeepSeek-R1, DashScope/Qwen-thinking)
406
+ * emit `reasoning_content`. DeepSeek/Qwen route through the `THINKING` default
407
+ * even though their blocks are `reasoning_content` and aren't normalized
408
+ * upstream, so for the `THINKING` case we also accept `reasoning_content` — this
409
+ * is what fixes issue #191.
410
+ *
411
+ * The broadening is intentionally one-directional. A Bedrock run
412
+ * (`REASONING_CONTENT`) must NOT match an Anthropic `thinking` block: the
413
+ * Bedrock input converter rejects `thinking` blocks outright
414
+ * (`src/llm/bedrock/utils/message_inputs.ts`), so reattaching one to a
415
+ * surviving message would make the request fail before it is sent.
416
+ */
417
+ function findReasoningBlock(content, reasoningType) {
418
+ return content.find((part) => part.type === reasoningType ||
419
+ (reasoningType === _enum.ContentTypes.THINKING &&
420
+ part.type === _enum.ContentTypes.REASONING_CONTENT));
421
+ }
402
422
  /**
403
423
  * Processes an array of messages and returns a context of messages that fit within a specified token limit.
404
424
  * It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
@@ -431,7 +451,7 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
431
451
  if (_thinkingStartIndex > -1) {
432
452
  const thinkingMessageContent = messages$1[_thinkingStartIndex]?.content;
433
453
  if (Array.isArray(thinkingMessageContent)) {
434
- thinkingBlock = thinkingMessageContent.find((content) => content.type === reasoningType);
454
+ thinkingBlock = findReasoningBlock(thinkingMessageContent, reasoningType);
435
455
  }
436
456
  }
437
457
  if (currentTokenCount < remainingContextTokens) {
@@ -458,7 +478,7 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
458
478
  thinkingStartIndex < 0 &&
459
479
  messageType === 'ai' &&
460
480
  Array.isArray(poppedMessage.content)) {
461
- thinkingBlock = poppedMessage.content.find((content) => content.type === reasoningType);
481
+ thinkingBlock = findReasoningBlock(poppedMessage.content, reasoningType);
462
482
  thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
463
483
  }
464
484
  /**
@@ -542,11 +562,28 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
542
562
  result.context = context.reverse();
543
563
  return result;
544
564
  }
545
- if (thinkingEndIndex > -1 && thinkingStartIndex < 0) {
546
- throw new Error('The payload is malformed. There is a thinking sequence but no "AI" messages with thinking blocks.');
547
- }
548
- if (!thinkingBlock) {
549
- throw new Error('The payload is malformed. There is a thinking sequence but no thinking block found.');
565
+ /**
566
+ * A trailing reasoning sequence was detected but its block could not be
567
+ * located in the surviving context. Rather than throw — which permanently
568
+ * bricks the conversation, re-firing on every retry of the same thread (see
569
+ * issue #191) return the partially-pruned context and let the provider
570
+ * surface a real, recoverable error if the payload is genuinely malformed.
571
+ * Strict providers (Anthropic) reject it cleanly; lenient ones (DeepSeek,
572
+ * Qwen) proceed. The pruner cannot know which applies, so it must not be the
573
+ * one to make the failure fatal.
574
+ */
575
+ if ((thinkingEndIndex > -1 && thinkingStartIndex < 0) || !thinkingBlock) {
576
+ /**
577
+ * No block was located, so any `thinkingStartIndex` set above came from a
578
+ * stale carried-over index pointing at a block-less message. Drop it:
579
+ * `createPruneMessages` persists the returned index as
580
+ * `runThinkingStartIndex`, and a stale value would suppress the trailing
581
+ * scan (`thinkingStartIndex < 0`) on later turns, causing a real reasoning
582
+ * block to be missed and never reattached.
583
+ */
584
+ delete result.thinkingStartIndex;
585
+ result.context = context.reverse();
586
+ return result;
550
587
  }
551
588
  let assistantIndex = -1;
552
589
  for (let i = 0; i < context.length; i++) {