@librechat/agents 3.1.98 → 3.1.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -366,7 +366,7 @@ function addThinkingBlock(message, thinkingBlock) {
|
|
|
366
366
|
},
|
|
367
367
|
];
|
|
368
368
|
/** Edge case, the message already has the thinking block */
|
|
369
|
-
if (content[0]
|
|
369
|
+
if (content[0]?.type === thinkingBlock.type) {
|
|
370
370
|
return message;
|
|
371
371
|
}
|
|
372
372
|
content.unshift(thinkingBlock);
|
|
@@ -399,6 +399,26 @@ function calculateTotalTokens(usage) {
|
|
|
399
399
|
total_tokens: totalInputTokens + totalOutputTokens,
|
|
400
400
|
};
|
|
401
401
|
}
|
|
402
|
+
/**
|
|
403
|
+
* Locates a reasoning block in assistant content. Reasoning blocks carry
|
|
404
|
+
* provider-specific `type` tags: Anthropic emits `thinking`, while Bedrock and
|
|
405
|
+
* OpenAI-compatible reasoning providers (DeepSeek-R1, DashScope/Qwen-thinking)
|
|
406
|
+
* emit `reasoning_content`. DeepSeek/Qwen route through the `THINKING` default
|
|
407
|
+
* even though their blocks are `reasoning_content` and aren't normalized
|
|
408
|
+
* upstream, so for the `THINKING` case we also accept `reasoning_content` — this
|
|
409
|
+
* is what fixes issue #191.
|
|
410
|
+
*
|
|
411
|
+
* The broadening is intentionally one-directional. A Bedrock run
|
|
412
|
+
* (`REASONING_CONTENT`) must NOT match an Anthropic `thinking` block: the
|
|
413
|
+
* Bedrock input converter rejects `thinking` blocks outright
|
|
414
|
+
* (`src/llm/bedrock/utils/message_inputs.ts`), so reattaching one to a
|
|
415
|
+
* surviving message would make the request fail before it is sent.
|
|
416
|
+
*/
|
|
417
|
+
function findReasoningBlock(content, reasoningType) {
|
|
418
|
+
return content.find((part) => part.type === reasoningType ||
|
|
419
|
+
(reasoningType === _enum.ContentTypes.THINKING &&
|
|
420
|
+
part.type === _enum.ContentTypes.REASONING_CONTENT));
|
|
421
|
+
}
|
|
402
422
|
/**
|
|
403
423
|
* Processes an array of messages and returns a context of messages that fit within a specified token limit.
|
|
404
424
|
* It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
|
|
@@ -431,7 +451,7 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
|
|
|
431
451
|
if (_thinkingStartIndex > -1) {
|
|
432
452
|
const thinkingMessageContent = messages$1[_thinkingStartIndex]?.content;
|
|
433
453
|
if (Array.isArray(thinkingMessageContent)) {
|
|
434
|
-
thinkingBlock = thinkingMessageContent
|
|
454
|
+
thinkingBlock = findReasoningBlock(thinkingMessageContent, reasoningType);
|
|
435
455
|
}
|
|
436
456
|
}
|
|
437
457
|
if (currentTokenCount < remainingContextTokens) {
|
|
@@ -458,7 +478,7 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
|
|
|
458
478
|
thinkingStartIndex < 0 &&
|
|
459
479
|
messageType === 'ai' &&
|
|
460
480
|
Array.isArray(poppedMessage.content)) {
|
|
461
|
-
thinkingBlock = poppedMessage.content
|
|
481
|
+
thinkingBlock = findReasoningBlock(poppedMessage.content, reasoningType);
|
|
462
482
|
thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
|
|
463
483
|
}
|
|
464
484
|
/**
|
|
@@ -542,11 +562,28 @@ function getMessagesWithinTokenLimit({ messages: _messages, maxContextTokens, in
|
|
|
542
562
|
result.context = context.reverse();
|
|
543
563
|
return result;
|
|
544
564
|
}
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
565
|
+
/**
|
|
566
|
+
* A trailing reasoning sequence was detected but its block could not be
|
|
567
|
+
* located in the surviving context. Rather than throw — which permanently
|
|
568
|
+
* bricks the conversation, re-firing on every retry of the same thread (see
|
|
569
|
+
* issue #191) — return the partially-pruned context and let the provider
|
|
570
|
+
* surface a real, recoverable error if the payload is genuinely malformed.
|
|
571
|
+
* Strict providers (Anthropic) reject it cleanly; lenient ones (DeepSeek,
|
|
572
|
+
* Qwen) proceed. The pruner cannot know which applies, so it must not be the
|
|
573
|
+
* one to make the failure fatal.
|
|
574
|
+
*/
|
|
575
|
+
if ((thinkingEndIndex > -1 && thinkingStartIndex < 0) || !thinkingBlock) {
|
|
576
|
+
/**
|
|
577
|
+
* No block was located, so any `thinkingStartIndex` set above came from a
|
|
578
|
+
* stale carried-over index pointing at a block-less message. Drop it:
|
|
579
|
+
* `createPruneMessages` persists the returned index as
|
|
580
|
+
* `runThinkingStartIndex`, and a stale value would suppress the trailing
|
|
581
|
+
* scan (`thinkingStartIndex < 0`) on later turns, causing a real reasoning
|
|
582
|
+
* block to be missed and never reattached.
|
|
583
|
+
*/
|
|
584
|
+
delete result.thinkingStartIndex;
|
|
585
|
+
result.context = context.reverse();
|
|
586
|
+
return result;
|
|
550
587
|
}
|
|
551
588
|
let assistantIndex = -1;
|
|
552
589
|
for (let i = 0; i < context.length; i++) {
|