@librechat/agents 3.1.98 → 3.1.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/messages/prune.ts
CHANGED
|
@@ -563,7 +563,7 @@ function addThinkingBlock(
|
|
|
563
563
|
},
|
|
564
564
|
];
|
|
565
565
|
/** Edge case, the message already has the thinking block */
|
|
566
|
-
if (content[0]
|
|
566
|
+
if (content[0]?.type === thinkingBlock.type) {
|
|
567
567
|
return message;
|
|
568
568
|
}
|
|
569
569
|
content.unshift(thinkingBlock);
|
|
@@ -608,6 +608,33 @@ export type PruningResult = {
|
|
|
608
608
|
thinkingStartIndex?: number;
|
|
609
609
|
};
|
|
610
610
|
|
|
611
|
+
/**
|
|
612
|
+
* Locates a reasoning block in assistant content. Reasoning blocks carry
|
|
613
|
+
* provider-specific `type` tags: Anthropic emits `thinking`, while Bedrock and
|
|
614
|
+
* OpenAI-compatible reasoning providers (DeepSeek-R1, DashScope/Qwen-thinking)
|
|
615
|
+
* emit `reasoning_content`. DeepSeek/Qwen route through the `THINKING` default
|
|
616
|
+
* even though their blocks are `reasoning_content` and aren't normalized
|
|
617
|
+
* upstream, so for the `THINKING` case we also accept `reasoning_content` — this
|
|
618
|
+
* is what fixes issue #191.
|
|
619
|
+
*
|
|
620
|
+
* The broadening is intentionally one-directional. A Bedrock run
|
|
621
|
+
* (`REASONING_CONTENT`) must NOT match an Anthropic `thinking` block: the
|
|
622
|
+
* Bedrock input converter rejects `thinking` blocks outright
|
|
623
|
+
* (`src/llm/bedrock/utils/message_inputs.ts`), so reattaching one to a
|
|
624
|
+
* surviving message would make the request fail before it is sent.
|
|
625
|
+
*/
|
|
626
|
+
function findReasoningBlock(
|
|
627
|
+
content: MessageContentComplex[],
|
|
628
|
+
reasoningType: ContentTypes
|
|
629
|
+
): ThinkingContentText | ReasoningContentText | undefined {
|
|
630
|
+
return content.find(
|
|
631
|
+
(part) =>
|
|
632
|
+
part.type === reasoningType ||
|
|
633
|
+
(reasoningType === ContentTypes.THINKING &&
|
|
634
|
+
part.type === ContentTypes.REASONING_CONTENT)
|
|
635
|
+
) as ThinkingContentText | ReasoningContentText | undefined;
|
|
636
|
+
}
|
|
637
|
+
|
|
611
638
|
/**
|
|
612
639
|
* Processes an array of messages and returns a context of messages that fit within a specified token limit.
|
|
613
640
|
* It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
|
|
@@ -670,9 +697,7 @@ export function getMessagesWithinTokenLimit({
|
|
|
670
697
|
if (_thinkingStartIndex > -1) {
|
|
671
698
|
const thinkingMessageContent = messages[_thinkingStartIndex]?.content;
|
|
672
699
|
if (Array.isArray(thinkingMessageContent)) {
|
|
673
|
-
thinkingBlock = thinkingMessageContent
|
|
674
|
-
(content) => content.type === reasoningType
|
|
675
|
-
) as ThinkingContentText | undefined;
|
|
700
|
+
thinkingBlock = findReasoningBlock(thinkingMessageContent, reasoningType);
|
|
676
701
|
}
|
|
677
702
|
}
|
|
678
703
|
|
|
@@ -705,9 +730,10 @@ export function getMessagesWithinTokenLimit({
|
|
|
705
730
|
messageType === 'ai' &&
|
|
706
731
|
Array.isArray(poppedMessage.content)
|
|
707
732
|
) {
|
|
708
|
-
thinkingBlock =
|
|
709
|
-
|
|
710
|
-
|
|
733
|
+
thinkingBlock = findReasoningBlock(
|
|
734
|
+
poppedMessage.content,
|
|
735
|
+
reasoningType
|
|
736
|
+
);
|
|
711
737
|
thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
|
|
712
738
|
}
|
|
713
739
|
/**
|
|
@@ -811,16 +837,28 @@ export function getMessagesWithinTokenLimit({
|
|
|
811
837
|
return result;
|
|
812
838
|
}
|
|
813
839
|
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
840
|
+
/**
|
|
841
|
+
* A trailing reasoning sequence was detected but its block could not be
|
|
842
|
+
* located in the surviving context. Rather than throw — which permanently
|
|
843
|
+
* bricks the conversation, re-firing on every retry of the same thread (see
|
|
844
|
+
* issue #191) — return the partially-pruned context and let the provider
|
|
845
|
+
* surface a real, recoverable error if the payload is genuinely malformed.
|
|
846
|
+
* Strict providers (Anthropic) reject it cleanly; lenient ones (DeepSeek,
|
|
847
|
+
* Qwen) proceed. The pruner cannot know which applies, so it must not be the
|
|
848
|
+
* one to make the failure fatal.
|
|
849
|
+
*/
|
|
850
|
+
if ((thinkingEndIndex > -1 && thinkingStartIndex < 0) || !thinkingBlock) {
|
|
851
|
+
/**
|
|
852
|
+
* No block was located, so any `thinkingStartIndex` set above came from a
|
|
853
|
+
* stale carried-over index pointing at a block-less message. Drop it:
|
|
854
|
+
* `createPruneMessages` persists the returned index as
|
|
855
|
+
* `runThinkingStartIndex`, and a stale value would suppress the trailing
|
|
856
|
+
* scan (`thinkingStartIndex < 0`) on later turns, causing a real reasoning
|
|
857
|
+
* block to be missed and never reattached.
|
|
858
|
+
*/
|
|
859
|
+
delete result.thinkingStartIndex;
|
|
860
|
+
result.context = context.reverse() as BaseMessage[];
|
|
861
|
+
return result;
|
|
824
862
|
}
|
|
825
863
|
|
|
826
864
|
let assistantIndex = -1;
|
package/src/specs/prune.test.ts
CHANGED
|
@@ -2382,3 +2382,196 @@ describe('thinking enabled — tail tool_use without a thinking block (issue #11
|
|
|
2382
2382
|
expect(result.thinkingStartIndex).toBeGreaterThanOrEqual(0);
|
|
2383
2383
|
});
|
|
2384
2384
|
});
|
|
2385
|
+
|
|
2386
|
+
describe('thinking enabled — non-Anthropic reasoning_content blocks (issue #191)', () => {
|
|
2387
|
+
it('locates a trailing reasoning_content block even when reasoningType defaults to THINKING (DeepSeek/Qwen)', () => {
|
|
2388
|
+
// DeepSeek-R1 and DashScope/Qwen-thinking route through the non-Bedrock
|
|
2389
|
+
// branch, so the caller passes reasoningType: THINKING — but their blocks
|
|
2390
|
+
// are tagged `reasoning_content` and are not normalized upstream. With a
|
|
2391
|
+
// system prompt at index 0 and an all-AI/tool tail, the consume loop never
|
|
2392
|
+
// pops a human to clear thinkingEndIndex (the issue #116 escape hatch), so
|
|
2393
|
+
// searching only for `thinking` missed the present block and threw a fatal
|
|
2394
|
+
// that permanently bricked the thread. The pruner must find the block by
|
|
2395
|
+
// its actual shape instead.
|
|
2396
|
+
const tokenCounter = createTestTokenCounter();
|
|
2397
|
+
const messages: BaseMessage[] = [
|
|
2398
|
+
new SystemMessage('you are a helpful assistant'),
|
|
2399
|
+
new AIMessage({
|
|
2400
|
+
content: [
|
|
2401
|
+
{
|
|
2402
|
+
type: ContentTypes.REASONING_CONTENT,
|
|
2403
|
+
reasoningText: {
|
|
2404
|
+
text: 'I will fetch the doc',
|
|
2405
|
+
signature: 'sig-new',
|
|
2406
|
+
},
|
|
2407
|
+
},
|
|
2408
|
+
{
|
|
2409
|
+
type: 'tool_use',
|
|
2410
|
+
id: 'tc_get_doc',
|
|
2411
|
+
name: 'get_doc_content',
|
|
2412
|
+
input: { docId: 'abc' },
|
|
2413
|
+
},
|
|
2414
|
+
],
|
|
2415
|
+
tool_calls: [
|
|
2416
|
+
{
|
|
2417
|
+
id: 'tc_get_doc',
|
|
2418
|
+
name: 'get_doc_content',
|
|
2419
|
+
args: { docId: 'abc' },
|
|
2420
|
+
type: 'tool_call',
|
|
2421
|
+
},
|
|
2422
|
+
],
|
|
2423
|
+
}),
|
|
2424
|
+
new ToolMessage({
|
|
2425
|
+
content: 'c'.repeat(6000),
|
|
2426
|
+
tool_call_id: 'tc_get_doc',
|
|
2427
|
+
name: 'get_doc_content',
|
|
2428
|
+
}),
|
|
2429
|
+
];
|
|
2430
|
+
|
|
2431
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2432
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2433
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2434
|
+
}
|
|
2435
|
+
|
|
2436
|
+
let result: ReturnType<typeof realGetMessagesWithinTokenLimit> | undefined;
|
|
2437
|
+
expect(() => {
|
|
2438
|
+
result = realGetMessagesWithinTokenLimit({
|
|
2439
|
+
messages,
|
|
2440
|
+
maxContextTokens: 200,
|
|
2441
|
+
indexTokenCountMap,
|
|
2442
|
+
thinkingEnabled: true,
|
|
2443
|
+
tokenCounter,
|
|
2444
|
+
reasoningType: ContentTypes.THINKING,
|
|
2445
|
+
});
|
|
2446
|
+
}).not.toThrow();
|
|
2447
|
+
|
|
2448
|
+
// thinkingStartIndex is only set when the reasoning block is actually
|
|
2449
|
+
// located — isolating the find fix (B) from the graceful-degradation
|
|
2450
|
+
// safety net (C), which would swallow the throw without finding anything.
|
|
2451
|
+
expect(result!.thinkingStartIndex).toBeGreaterThanOrEqual(0);
|
|
2452
|
+
});
|
|
2453
|
+
|
|
2454
|
+
it('does not throw when a carried-over thinking sequence has no locatable block', () => {
|
|
2455
|
+
// Models a stale runThinkingStartIndex carry-over pointing at an assistant
|
|
2456
|
+
// message that has no reasoning block. The pruner cannot find a block, but
|
|
2457
|
+
// a trailing AI/tool sequence keeps thinkingEndIndex set, so it used to
|
|
2458
|
+
// reach the fatal "no thinking block found" throw. Defense in depth: a
|
|
2459
|
+
// misconfiguration upstream of the pruner must not be able to brick the
|
|
2460
|
+
// thread — degrade to the partially-pruned context instead.
|
|
2461
|
+
const tokenCounter = createTestTokenCounter();
|
|
2462
|
+
const messages: BaseMessage[] = [
|
|
2463
|
+
new HumanMessage('h'.repeat(100)),
|
|
2464
|
+
new AIMessage({
|
|
2465
|
+
content: [{ type: 'text', text: 'a reply with no reasoning block' }],
|
|
2466
|
+
}),
|
|
2467
|
+
new HumanMessage('please read the doc'),
|
|
2468
|
+
new AIMessage({
|
|
2469
|
+
content: [
|
|
2470
|
+
{
|
|
2471
|
+
type: 'tool_use',
|
|
2472
|
+
id: 'tc_get_doc',
|
|
2473
|
+
name: 'get_doc_content',
|
|
2474
|
+
input: { docId: 'abc' },
|
|
2475
|
+
},
|
|
2476
|
+
],
|
|
2477
|
+
tool_calls: [
|
|
2478
|
+
{
|
|
2479
|
+
id: 'tc_get_doc',
|
|
2480
|
+
name: 'get_doc_content',
|
|
2481
|
+
args: { docId: 'abc' },
|
|
2482
|
+
type: 'tool_call',
|
|
2483
|
+
},
|
|
2484
|
+
],
|
|
2485
|
+
}),
|
|
2486
|
+
new ToolMessage({
|
|
2487
|
+
content: 'x'.repeat(150),
|
|
2488
|
+
tool_call_id: 'tc_get_doc',
|
|
2489
|
+
name: 'get_doc_content',
|
|
2490
|
+
}),
|
|
2491
|
+
];
|
|
2492
|
+
|
|
2493
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2494
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2495
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2496
|
+
}
|
|
2497
|
+
|
|
2498
|
+
let result: ReturnType<typeof realGetMessagesWithinTokenLimit> | undefined;
|
|
2499
|
+
expect(() => {
|
|
2500
|
+
result = realGetMessagesWithinTokenLimit({
|
|
2501
|
+
messages,
|
|
2502
|
+
maxContextTokens: 200,
|
|
2503
|
+
indexTokenCountMap,
|
|
2504
|
+
thinkingEnabled: true,
|
|
2505
|
+
tokenCounter,
|
|
2506
|
+
thinkingStartIndex: 1,
|
|
2507
|
+
reasoningType: ContentTypes.THINKING,
|
|
2508
|
+
});
|
|
2509
|
+
}).not.toThrow();
|
|
2510
|
+
|
|
2511
|
+
expect(result!.context.length).toBeGreaterThan(0);
|
|
2512
|
+
expect(result!.messagesToRefine.length).toBeGreaterThan(0);
|
|
2513
|
+
// The stale carried-over index must NOT be propagated: createPruneMessages
|
|
2514
|
+
// persists it as runThinkingStartIndex, and a stale value would suppress
|
|
2515
|
+
// the trailing scan on later turns and miss a real reasoning block.
|
|
2516
|
+
expect(result!.thinkingStartIndex).toBeUndefined();
|
|
2517
|
+
});
|
|
2518
|
+
|
|
2519
|
+
it('does not match an Anthropic thinking block for a Bedrock (reasoning_content) run', () => {
|
|
2520
|
+
// The cross-type fallback is one-directional: REASONING_CONTENT (Bedrock)
|
|
2521
|
+
// must not match a `thinking` block, since the Bedrock input converter
|
|
2522
|
+
// rejects `thinking` blocks and reattaching one would break the request.
|
|
2523
|
+
const tokenCounter = createTestTokenCounter();
|
|
2524
|
+
const messages: BaseMessage[] = [
|
|
2525
|
+
new SystemMessage('you are a helpful assistant'),
|
|
2526
|
+
new AIMessage({
|
|
2527
|
+
content: [
|
|
2528
|
+
{
|
|
2529
|
+
type: ContentTypes.THINKING,
|
|
2530
|
+
thinking: 'inherited Anthropic-style reasoning',
|
|
2531
|
+
signature: 'sig-anthropic',
|
|
2532
|
+
},
|
|
2533
|
+
{
|
|
2534
|
+
type: 'tool_use',
|
|
2535
|
+
id: 'tc_get_doc',
|
|
2536
|
+
name: 'get_doc_content',
|
|
2537
|
+
input: { docId: 'abc' },
|
|
2538
|
+
},
|
|
2539
|
+
],
|
|
2540
|
+
tool_calls: [
|
|
2541
|
+
{
|
|
2542
|
+
id: 'tc_get_doc',
|
|
2543
|
+
name: 'get_doc_content',
|
|
2544
|
+
args: { docId: 'abc' },
|
|
2545
|
+
type: 'tool_call',
|
|
2546
|
+
},
|
|
2547
|
+
],
|
|
2548
|
+
}),
|
|
2549
|
+
new ToolMessage({
|
|
2550
|
+
content: 'c'.repeat(6000),
|
|
2551
|
+
tool_call_id: 'tc_get_doc',
|
|
2552
|
+
name: 'get_doc_content',
|
|
2553
|
+
}),
|
|
2554
|
+
];
|
|
2555
|
+
|
|
2556
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
2557
|
+
for (let i = 0; i < messages.length; i++) {
|
|
2558
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
2559
|
+
}
|
|
2560
|
+
|
|
2561
|
+
let result: ReturnType<typeof realGetMessagesWithinTokenLimit> | undefined;
|
|
2562
|
+
expect(() => {
|
|
2563
|
+
result = realGetMessagesWithinTokenLimit({
|
|
2564
|
+
messages,
|
|
2565
|
+
maxContextTokens: 200,
|
|
2566
|
+
indexTokenCountMap,
|
|
2567
|
+
thinkingEnabled: true,
|
|
2568
|
+
tokenCounter,
|
|
2569
|
+
reasoningType: ContentTypes.REASONING_CONTENT,
|
|
2570
|
+
});
|
|
2571
|
+
}).not.toThrow();
|
|
2572
|
+
|
|
2573
|
+
// The thinking block is intentionally not located for a Bedrock run, so no
|
|
2574
|
+
// index is reported and nothing gets reattached.
|
|
2575
|
+
expect(result!.thinkingStartIndex).toBeUndefined();
|
|
2576
|
+
});
|
|
2577
|
+
});
|