@librechat/agents 2.3.7 → 2.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +5 -4
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +36 -13
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +5 -4
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +37 -14
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/types/messages/prune.d.ts +14 -10
- package/dist/types/types/llm.d.ts +8 -0
- package/package.json +1 -1
- package/src/graphs/Graph.ts +13 -4
- package/src/messages/prune.ts +59 -23
- package/src/specs/prune.test.ts +182 -1
- package/src/specs/token-distribution-edge-case.test.ts +4 -5
- package/src/types/llm.ts +9 -2
package/src/messages/prune.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import { AIMessage, BaseMessage, UsageMetadata } from '@langchain/core/messages';
|
|
2
|
-
import type { ThinkingContentText, MessageContentComplex } from '@/types/stream';
|
|
2
|
+
import type { ThinkingContentText, MessageContentComplex, ReasoningContentText } from '@/types/stream';
|
|
3
3
|
import type { TokenCounter } from '@/types/run';
|
|
4
|
-
import { ContentTypes } from '@/common';
|
|
4
|
+
import { ContentTypes, Providers } from '@/common';
|
|
5
|
+
|
|
5
6
|
export type PruneMessagesFactoryParams = {
|
|
7
|
+
provider?: Providers;
|
|
6
8
|
maxTokens: number;
|
|
7
9
|
startIndex: number;
|
|
8
10
|
tokenCounter: TokenCounter;
|
|
@@ -20,7 +22,7 @@ function isIndexInContext(arrayA: unknown[], arrayB: unknown[], targetIndex: num
|
|
|
20
22
|
return targetIndex >= startingIndexInA;
|
|
21
23
|
}
|
|
22
24
|
|
|
23
|
-
function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText): MessageContentComplex[] {
|
|
25
|
+
function addThinkingBlock(message: AIMessage, thinkingBlock: ThinkingContentText | ReasoningContentText): MessageContentComplex[] {
|
|
24
26
|
const content: MessageContentComplex[] = Array.isArray(message.content)
|
|
25
27
|
? message.content as MessageContentComplex[]
|
|
26
28
|
: [{
|
|
@@ -52,6 +54,13 @@ export function calculateTotalTokens(usage: Partial<UsageMetadata>): UsageMetada
|
|
|
52
54
|
};
|
|
53
55
|
}
|
|
54
56
|
|
|
57
|
+
export type PruningResult = {
|
|
58
|
+
context: BaseMessage[];
|
|
59
|
+
remainingContextTokens: number;
|
|
60
|
+
messagesToRefine: BaseMessage[];
|
|
61
|
+
thinkingStartIndex?: number;
|
|
62
|
+
};
|
|
63
|
+
|
|
55
64
|
/**
|
|
56
65
|
* Processes an array of messages and returns a context of messages that fit within a specified token limit.
|
|
57
66
|
* It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
|
|
@@ -65,20 +74,19 @@ export function getMessagesWithinTokenLimit({
|
|
|
65
74
|
indexTokenCountMap,
|
|
66
75
|
startType: _startType,
|
|
67
76
|
thinkingEnabled,
|
|
68
|
-
/** We may need to use this when recalculating */
|
|
69
77
|
tokenCounter,
|
|
78
|
+
thinkingStartIndex: _thinkingStartIndex = -1,
|
|
79
|
+
reasoningType = ContentTypes.THINKING,
|
|
70
80
|
}: {
|
|
71
81
|
messages: BaseMessage[];
|
|
72
82
|
maxContextTokens: number;
|
|
73
83
|
indexTokenCountMap: Record<string, number | undefined>;
|
|
74
|
-
|
|
75
|
-
startType?: string;
|
|
84
|
+
startType?: string | string[];
|
|
76
85
|
thinkingEnabled?: boolean;
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
} {
|
|
86
|
+
tokenCounter: TokenCounter;
|
|
87
|
+
thinkingStartIndex?: number;
|
|
88
|
+
reasoningType?: ContentTypes.THINKING | ContentTypes.REASONING_CONTENT;
|
|
89
|
+
}): PruningResult {
|
|
82
90
|
// Every reply is primed with <|start|>assistant<|message|>, so we
|
|
83
91
|
// start with 3 tokens for the label after all messages have been counted.
|
|
84
92
|
let currentTokenCount = 3;
|
|
@@ -96,12 +104,19 @@ export function getMessagesWithinTokenLimit({
|
|
|
96
104
|
* */
|
|
97
105
|
let context: Array<BaseMessage | undefined> = [];
|
|
98
106
|
|
|
99
|
-
let thinkingStartIndex =
|
|
107
|
+
let thinkingStartIndex = _thinkingStartIndex;
|
|
100
108
|
let thinkingEndIndex = -1;
|
|
101
|
-
let thinkingBlock: ThinkingContentText | undefined;
|
|
109
|
+
let thinkingBlock: ThinkingContentText | ReasoningContentText | undefined;
|
|
102
110
|
const endIndex = instructions != null ? 1 : 0;
|
|
103
111
|
const prunedMemory: BaseMessage[] = [];
|
|
104
112
|
|
|
113
|
+
if (_thinkingStartIndex > -1) {
|
|
114
|
+
const thinkingMessageContent = _messages[_thinkingStartIndex]?.content;
|
|
115
|
+
if (Array.isArray(thinkingMessageContent)) {
|
|
116
|
+
thinkingBlock = thinkingMessageContent.find((content) => content.type === reasoningType) as ThinkingContentText | undefined;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
105
120
|
if (currentTokenCount < remainingContextTokens) {
|
|
106
121
|
let currentIndex = messages.length;
|
|
107
122
|
while (messages.length > 0 && currentTokenCount < remainingContextTokens && currentIndex > endIndex) {
|
|
@@ -116,7 +131,7 @@ export function getMessagesWithinTokenLimit({
|
|
|
116
131
|
thinkingEndIndex = currentIndex;
|
|
117
132
|
}
|
|
118
133
|
if (thinkingEndIndex > -1 && !thinkingBlock && thinkingStartIndex < 0 && messageType === 'ai' && Array.isArray(poppedMessage.content)) {
|
|
119
|
-
thinkingBlock = (poppedMessage.content.find((content) => content.type ===
|
|
134
|
+
thinkingBlock = (poppedMessage.content.find((content) => content.type === reasoningType)) as ThinkingContentText | undefined;
|
|
120
135
|
thinkingStartIndex = thinkingBlock != null ? currentIndex : -1;
|
|
121
136
|
}
|
|
122
137
|
/** False start, the latest message was not part of a multi-assistant/tool sequence of messages */
|
|
@@ -135,22 +150,34 @@ export function getMessagesWithinTokenLimit({
|
|
|
135
150
|
currentTokenCount += tokenCount;
|
|
136
151
|
} else {
|
|
137
152
|
prunedMemory.push(poppedMessage);
|
|
138
|
-
if (thinkingEndIndex > -1) {
|
|
153
|
+
if (thinkingEndIndex > -1 && thinkingStartIndex < 0) {
|
|
139
154
|
continue;
|
|
140
155
|
}
|
|
141
156
|
break;
|
|
142
157
|
}
|
|
143
158
|
}
|
|
144
159
|
|
|
145
|
-
if (
|
|
146
|
-
startType = 'ai';
|
|
160
|
+
if (context[context.length - 1]?.getType() === 'tool') {
|
|
161
|
+
startType = ['ai', 'human'];
|
|
147
162
|
}
|
|
148
163
|
|
|
149
|
-
if (startType != null && startType && context.length > 0) {
|
|
150
|
-
|
|
164
|
+
if (startType != null && startType.length > 0 && context.length > 0) {
|
|
165
|
+
let requiredTypeIndex = -1;
|
|
166
|
+
|
|
167
|
+
let totalTokens = 0;
|
|
168
|
+
for (let i = context.length - 1; i >= 0; i--) {
|
|
169
|
+
const currentType = context[i]?.getType() ?? '';
|
|
170
|
+
if (Array.isArray(startType) ? startType.includes(currentType) : currentType === startType) {
|
|
171
|
+
requiredTypeIndex = i + 1;
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
const originalIndex = originalLength - 1 - i;
|
|
175
|
+
totalTokens += indexTokenCountMap[originalIndex] ?? 0;
|
|
176
|
+
}
|
|
151
177
|
|
|
152
178
|
if (requiredTypeIndex > 0) {
|
|
153
|
-
|
|
179
|
+
currentTokenCount -= totalTokens;
|
|
180
|
+
context = context.slice(0, requiredTypeIndex);
|
|
154
181
|
}
|
|
155
182
|
}
|
|
156
183
|
}
|
|
@@ -161,12 +188,16 @@ export function getMessagesWithinTokenLimit({
|
|
|
161
188
|
}
|
|
162
189
|
|
|
163
190
|
remainingContextTokens -= currentTokenCount;
|
|
164
|
-
const result = {
|
|
191
|
+
const result: PruningResult = {
|
|
165
192
|
remainingContextTokens,
|
|
166
193
|
context: [] as BaseMessage[],
|
|
167
194
|
messagesToRefine: prunedMemory,
|
|
168
195
|
};
|
|
169
196
|
|
|
197
|
+
if (thinkingStartIndex > -1) {
|
|
198
|
+
result.thinkingStartIndex = thinkingStartIndex;
|
|
199
|
+
}
|
|
200
|
+
|
|
170
201
|
if (prunedMemory.length === 0 || thinkingEndIndex < 0 || (thinkingStartIndex > -1 && isIndexInContext(_messages, context, thinkingStartIndex))) {
|
|
171
202
|
// we reverse at this step to ensure the context is in the correct order for the model, and we need to work backwards
|
|
172
203
|
result.context = context.reverse() as BaseMessage[];
|
|
@@ -270,6 +301,7 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
270
301
|
let lastTurnStartIndex = factoryParams.startIndex;
|
|
271
302
|
let lastCutOffIndex = 0;
|
|
272
303
|
let totalTokens = (Object.values(indexTokenCountMap)).reduce((a, b) => a + b, 0);
|
|
304
|
+
let runThinkingStartIndex = -1;
|
|
273
305
|
return function pruneMessages(params: PruneMessagesParams): {
|
|
274
306
|
context: BaseMessage[];
|
|
275
307
|
indexTokenCountMap: Record<string, number>;
|
|
@@ -339,15 +371,19 @@ export function createPruneMessages(factoryParams: PruneMessagesFactoryParams) {
|
|
|
339
371
|
return { context: params.messages, indexTokenCountMap };
|
|
340
372
|
}
|
|
341
373
|
|
|
342
|
-
const { context } = getMessagesWithinTokenLimit({
|
|
374
|
+
const { context, thinkingStartIndex } = getMessagesWithinTokenLimit({
|
|
343
375
|
maxContextTokens: factoryParams.maxTokens,
|
|
344
376
|
messages: params.messages,
|
|
345
377
|
indexTokenCountMap,
|
|
346
378
|
startType: params.startType,
|
|
347
379
|
thinkingEnabled: factoryParams.thinkingEnabled,
|
|
348
380
|
tokenCounter: factoryParams.tokenCounter,
|
|
381
|
+
reasoningType: factoryParams.provider === Providers.BEDROCK ? ContentTypes.REASONING_CONTENT : ContentTypes.THINKING,
|
|
382
|
+
thinkingStartIndex: factoryParams.thinkingEnabled === true ? runThinkingStartIndex : undefined,
|
|
349
383
|
});
|
|
350
|
-
|
|
384
|
+
runThinkingStartIndex = thinkingStartIndex ?? -1;
|
|
385
|
+
/** The index is the first value of `context`, index relative to `params.messages` */
|
|
386
|
+
lastCutOffIndex = Math.max(params.messages.length - (context.length - (context[0]?.getType() === 'system' ? 1 : 0)), 0);
|
|
351
387
|
|
|
352
388
|
return { context, indexTokenCountMap };
|
|
353
389
|
};
|
package/src/specs/prune.test.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// src/specs/prune.test.ts
|
|
2
2
|
import { config } from 'dotenv';
|
|
3
3
|
config();
|
|
4
|
-
import { HumanMessage, AIMessage, SystemMessage, BaseMessage } from '@langchain/core/messages';
|
|
4
|
+
import { HumanMessage, AIMessage, SystemMessage, BaseMessage, ToolMessage } from '@langchain/core/messages';
|
|
5
5
|
import type { RunnableConfig } from '@langchain/core/runnables';
|
|
6
6
|
import type { UsageMetadata } from '@langchain/core/messages';
|
|
7
7
|
import type * as t from '@/types';
|
|
@@ -512,6 +512,187 @@ describe('Prune Messages Tests', () => {
|
|
|
512
512
|
});
|
|
513
513
|
});
|
|
514
514
|
|
|
515
|
+
describe('Tool Message Handling', () => {
|
|
516
|
+
it('should ensure context does not start with a tool message by finding an AI message', () => {
|
|
517
|
+
const tokenCounter = createTestTokenCounter();
|
|
518
|
+
const messages = [
|
|
519
|
+
new SystemMessage('System instruction'),
|
|
520
|
+
new AIMessage('AI message 1'),
|
|
521
|
+
new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
|
|
522
|
+
new AIMessage('AI message 2'),
|
|
523
|
+
new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' })
|
|
524
|
+
];
|
|
525
|
+
|
|
526
|
+
const indexTokenCountMap = {
|
|
527
|
+
0: 17, // System instruction
|
|
528
|
+
1: 12, // AI message 1
|
|
529
|
+
2: 13, // Tool result 1
|
|
530
|
+
3: 12, // AI message 2
|
|
531
|
+
4: 13 // Tool result 2
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
// Create a pruneMessages function with a token limit that will only include the last few messages
|
|
535
|
+
const pruneMessages = createPruneMessages({
|
|
536
|
+
maxTokens: 58, // Only enough for system + last 3 messages + 3, but should not include a parent-less tool message
|
|
537
|
+
startIndex: 0,
|
|
538
|
+
tokenCounter,
|
|
539
|
+
indexTokenCountMap: { ...indexTokenCountMap }
|
|
540
|
+
});
|
|
541
|
+
|
|
542
|
+
const result = pruneMessages({ messages });
|
|
543
|
+
|
|
544
|
+
// The context should include the system message, AI message 2, and Tool result 2
|
|
545
|
+
// It should NOT start with Tool result 2 alone
|
|
546
|
+
expect(result.context.length).toBe(3);
|
|
547
|
+
expect(result.context[0]).toBe(messages[0]); // System message
|
|
548
|
+
expect(result.context[1]).toBe(messages[3]); // AI message 2
|
|
549
|
+
expect(result.context[2]).toBe(messages[4]); // Tool result 2
|
|
550
|
+
});
|
|
551
|
+
|
|
552
|
+
it('should ensure context does not start with a tool message by finding a human message', () => {
|
|
553
|
+
const tokenCounter = createTestTokenCounter();
|
|
554
|
+
const messages = [
|
|
555
|
+
new SystemMessage('System instruction'),
|
|
556
|
+
new HumanMessage('Human message 1'),
|
|
557
|
+
new AIMessage('AI message 1'),
|
|
558
|
+
new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
|
|
559
|
+
new HumanMessage('Human message 2'),
|
|
560
|
+
new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' })
|
|
561
|
+
];
|
|
562
|
+
|
|
563
|
+
const indexTokenCountMap = {
|
|
564
|
+
0: 17, // System instruction
|
|
565
|
+
1: 15, // Human message 1
|
|
566
|
+
2: 12, // AI message 1
|
|
567
|
+
3: 13, // Tool result 1
|
|
568
|
+
4: 15, // Human message 2
|
|
569
|
+
5: 13 // Tool result 2
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
// Create a pruneMessages function with a token limit that will only include the last few messages
|
|
573
|
+
const pruneMessages = createPruneMessages({
|
|
574
|
+
maxTokens: 48, // Only enough for system + last 2 messages
|
|
575
|
+
startIndex: 0,
|
|
576
|
+
tokenCounter,
|
|
577
|
+
indexTokenCountMap: { ...indexTokenCountMap }
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
const result = pruneMessages({ messages });
|
|
581
|
+
|
|
582
|
+
// The context should include the system message, Human message 2, and Tool result 2
|
|
583
|
+
// It should NOT start with Tool result 2 alone
|
|
584
|
+
expect(result.context.length).toBe(3);
|
|
585
|
+
expect(result.context[0]).toBe(messages[0]); // System message
|
|
586
|
+
expect(result.context[1]).toBe(messages[4]); // Human message 2
|
|
587
|
+
expect(result.context[2]).toBe(messages[5]); // Tool result 2
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
it('should handle the case where a tool message is followed by an AI message', () => {
|
|
591
|
+
const tokenCounter = createTestTokenCounter();
|
|
592
|
+
const messages = [
|
|
593
|
+
new SystemMessage('System instruction'),
|
|
594
|
+
new HumanMessage('Human message'),
|
|
595
|
+
new AIMessage('AI message with tool use'),
|
|
596
|
+
new ToolMessage({ content: 'Tool result', tool_call_id: 'tool1' }),
|
|
597
|
+
new AIMessage('AI message after tool')
|
|
598
|
+
];
|
|
599
|
+
|
|
600
|
+
const indexTokenCountMap = {
|
|
601
|
+
0: 17, // System instruction
|
|
602
|
+
1: 13, // Human message
|
|
603
|
+
2: 22, // AI message with tool use
|
|
604
|
+
3: 11, // Tool result
|
|
605
|
+
4: 19 // AI message after tool
|
|
606
|
+
};
|
|
607
|
+
|
|
608
|
+
const pruneMessages = createPruneMessages({
|
|
609
|
+
maxTokens: 50,
|
|
610
|
+
startIndex: 0,
|
|
611
|
+
tokenCounter,
|
|
612
|
+
indexTokenCountMap: { ...indexTokenCountMap }
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
const result = pruneMessages({ messages });
|
|
616
|
+
|
|
617
|
+
expect(result.context.length).toBe(2);
|
|
618
|
+
expect(result.context[0]).toBe(messages[0]); // System message
|
|
619
|
+
expect(result.context[1]).toBe(messages[4]); // AI message after tool
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
it('should handle the case where a tool message is followed by a human message', () => {
|
|
623
|
+
const tokenCounter = createTestTokenCounter();
|
|
624
|
+
const messages = [
|
|
625
|
+
new SystemMessage('System instruction'),
|
|
626
|
+
new HumanMessage('Human message 1'),
|
|
627
|
+
new AIMessage('AI message with tool use'),
|
|
628
|
+
new ToolMessage({ content: 'Tool result', tool_call_id: 'tool1' }),
|
|
629
|
+
new HumanMessage('Human message 2')
|
|
630
|
+
];
|
|
631
|
+
|
|
632
|
+
const indexTokenCountMap = {
|
|
633
|
+
0: 17, // System instruction
|
|
634
|
+
1: 15, // Human message 1
|
|
635
|
+
2: 22, // AI message with tool use
|
|
636
|
+
3: 11, // Tool result
|
|
637
|
+
4: 15 // Human message 2
|
|
638
|
+
};
|
|
639
|
+
|
|
640
|
+
const pruneMessages = createPruneMessages({
|
|
641
|
+
maxTokens: 46,
|
|
642
|
+
startIndex: 0,
|
|
643
|
+
tokenCounter,
|
|
644
|
+
indexTokenCountMap: { ...indexTokenCountMap }
|
|
645
|
+
});
|
|
646
|
+
|
|
647
|
+
const result = pruneMessages({ messages });
|
|
648
|
+
|
|
649
|
+
expect(result.context.length).toBe(2);
|
|
650
|
+
expect(result.context[0]).toBe(messages[0]); // System message
|
|
651
|
+
expect(result.context[1]).toBe(messages[4]); // Human message 2
|
|
652
|
+
});
|
|
653
|
+
|
|
654
|
+
it('should handle complex sequence with multiple tool messages', () => {
|
|
655
|
+
const tokenCounter = createTestTokenCounter();
|
|
656
|
+
const messages = [
|
|
657
|
+
new SystemMessage('System instruction'),
|
|
658
|
+
new HumanMessage('Human message 1'),
|
|
659
|
+
new AIMessage('AI message 1 with tool use'),
|
|
660
|
+
new ToolMessage({ content: 'Tool result 1', tool_call_id: 'tool1' }),
|
|
661
|
+
new AIMessage('AI message 2 with tool use'),
|
|
662
|
+
new ToolMessage({ content: 'Tool result 2', tool_call_id: 'tool2' }),
|
|
663
|
+
new AIMessage('AI message 3 with tool use'),
|
|
664
|
+
new ToolMessage({ content: 'Tool result 3', tool_call_id: 'tool3' })
|
|
665
|
+
];
|
|
666
|
+
|
|
667
|
+
const indexTokenCountMap = {
|
|
668
|
+
0: 17, // System instruction
|
|
669
|
+
1: 15, // Human message 1
|
|
670
|
+
2: 26, // AI message 1 with tool use
|
|
671
|
+
3: 13, // Tool result 1
|
|
672
|
+
4: 26, // AI message 2 with tool use
|
|
673
|
+
5: 13, // Tool result 2
|
|
674
|
+
6: 26, // AI message 3 with tool use
|
|
675
|
+
7: 13 // Tool result 3
|
|
676
|
+
};
|
|
677
|
+
|
|
678
|
+
const pruneMessages = createPruneMessages({
|
|
679
|
+
maxTokens: 111,
|
|
680
|
+
startIndex: 0,
|
|
681
|
+
tokenCounter,
|
|
682
|
+
indexTokenCountMap: { ...indexTokenCountMap }
|
|
683
|
+
});
|
|
684
|
+
|
|
685
|
+
const result = pruneMessages({ messages });
|
|
686
|
+
|
|
687
|
+
expect(result.context.length).toBe(5);
|
|
688
|
+
expect(result.context[0]).toBe(messages[0]); // System message
|
|
689
|
+
expect(result.context[1]).toBe(messages[4]); // AI message 2 with tool use
|
|
690
|
+
expect(result.context[2]).toBe(messages[5]); // Tool result 2
|
|
691
|
+
expect(result.context[3]).toBe(messages[6]); // AI message 3 with tool use
|
|
692
|
+
expect(result.context[4]).toBe(messages[7]); // Tool result 3
|
|
693
|
+
});
|
|
694
|
+
});
|
|
695
|
+
|
|
515
696
|
describe('Integration with Run', () => {
|
|
516
697
|
it('should initialize Run with custom token counter and process messages', async () => {
|
|
517
698
|
const provider = Providers.OPENAI;
|
|
@@ -235,9 +235,9 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
235
235
|
});
|
|
236
236
|
|
|
237
237
|
// Add two more messages
|
|
238
|
+
messages.push(new HumanMessage('Message 4'));
|
|
238
239
|
const extendedMessages = [
|
|
239
240
|
...messages,
|
|
240
|
-
new HumanMessage('Message 4'),
|
|
241
241
|
new AIMessage('Response 4')
|
|
242
242
|
];
|
|
243
243
|
|
|
@@ -257,6 +257,7 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
257
257
|
// The context should include the system message and some of the latest messages
|
|
258
258
|
expect(thirdResult.context.length).toBeGreaterThan(0);
|
|
259
259
|
expect(thirdResult.context[0].content).toBe('System instruction');
|
|
260
|
+
expect(thirdResult.context[1].content).toBe('Response 4');
|
|
260
261
|
|
|
261
262
|
// Find which messages are in the final context
|
|
262
263
|
const contextMessageIndices = thirdResult.context.map(msg => {
|
|
@@ -282,14 +283,12 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
282
283
|
// Verify that messages not in the context have their original token counts or previously adjusted values
|
|
283
284
|
for (let i = 0; i < extendedMessages.length; i++) {
|
|
284
285
|
if (!contextMessageIndices.includes(i)) {
|
|
285
|
-
// This message is not in the context, so its token count should not have been adjusted in the last operation
|
|
286
286
|
const expectedValue = i < messages.length
|
|
287
287
|
? (secondResult.indexTokenCountMap[i] || indexTokenCountMap[i])
|
|
288
|
-
: (indexTokenCountMap as Record<string, number | undefined>)[i] ??
|
|
288
|
+
: (indexTokenCountMap as Record<string, number | undefined>)[i] ?? 0;
|
|
289
289
|
|
|
290
|
-
// For defined values, we can check that they're close to what we expect
|
|
291
290
|
const difference = Math.abs((thirdResult.indexTokenCountMap[i] || 0) - expectedValue);
|
|
292
|
-
expect(difference).
|
|
291
|
+
expect(difference).toBe(0);
|
|
293
292
|
}
|
|
294
293
|
}
|
|
295
294
|
});
|
package/src/types/llm.ts
CHANGED
|
@@ -35,16 +35,23 @@ export type AzureClientOptions = (Partial<OpenAIChatInput> & Partial<AzureOpenAI
|
|
|
35
35
|
} & BaseChatModelParams & {
|
|
36
36
|
configuration?: OAIClientOptions;
|
|
37
37
|
});
|
|
38
|
-
|
|
38
|
+
export type ThinkingConfig = AnthropicInput['thinking'];
|
|
39
39
|
export type ChatOpenAIToolType = BindToolsInput | OpenAIClient.ChatCompletionTool;
|
|
40
40
|
export type CommonToolType = StructuredTool | ChatOpenAIToolType;
|
|
41
|
-
|
|
41
|
+
export type AnthropicReasoning = {
|
|
42
|
+
thinking?: ThinkingConfig | boolean;
|
|
43
|
+
thinkingBudget?: number;
|
|
44
|
+
};
|
|
42
45
|
export type OpenAIClientOptions = ChatOpenAIFields;
|
|
43
46
|
export type OllamaClientOptions = ChatOllamaInput;
|
|
44
47
|
export type AnthropicClientOptions = AnthropicInput;
|
|
45
48
|
export type MistralAIClientOptions = ChatMistralAIInput;
|
|
46
49
|
export type VertexAIClientOptions = ChatVertexAIInput;
|
|
47
50
|
export type BedrockClientOptions = BedrockChatFields;
|
|
51
|
+
export type BedrockAnthropicInput = ChatBedrockConverseInput & {
|
|
52
|
+
additionalModelRequestFields?: ChatBedrockConverseInput['additionalModelRequestFields'] &
|
|
53
|
+
AnthropicReasoning;
|
|
54
|
+
};
|
|
48
55
|
export type BedrockConverseClientOptions = ChatBedrockConverseInput;
|
|
49
56
|
export type GoogleClientOptions = GoogleGenerativeAIChatInput;
|
|
50
57
|
export type DeepSeekClientOptions = ChatDeepSeekCallOptions;
|