@librechat/agents 3.1.57 → 3.1.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +326 -62
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +13 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +7 -27
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +303 -222
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +4 -4
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +6 -2
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/init.cjs +60 -0
- package/dist/cjs/llm/init.cjs.map +1 -0
- package/dist/cjs/llm/invoke.cjs +90 -0
- package/dist/cjs/llm/invoke.cjs.map +1 -0
- package/dist/cjs/llm/openai/index.cjs +2 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/request.cjs +41 -0
- package/dist/cjs/llm/request.cjs.map +1 -0
- package/dist/cjs/main.cjs +40 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +76 -89
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/contextPruning.cjs +156 -0
- package/dist/cjs/messages/contextPruning.cjs.map +1 -0
- package/dist/cjs/messages/contextPruningSettings.cjs +53 -0
- package/dist/cjs/messages/contextPruningSettings.cjs.map +1 -0
- package/dist/cjs/messages/core.cjs +23 -37
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +156 -11
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +1161 -49
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/messages/reducer.cjs +87 -0
- package/dist/cjs/messages/reducer.cjs.map +1 -0
- package/dist/cjs/run.cjs +81 -42
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/stream.cjs +54 -7
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/summarization/index.cjs +75 -0
- package/dist/cjs/summarization/index.cjs.map +1 -0
- package/dist/cjs/summarization/node.cjs +663 -0
- package/dist/cjs/summarization/node.cjs.map +1 -0
- package/dist/cjs/tools/ToolNode.cjs +16 -8
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +2 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/utils/errors.cjs +115 -0
- package/dist/cjs/utils/errors.cjs.map +1 -0
- package/dist/cjs/utils/events.cjs +17 -0
- package/dist/cjs/utils/events.cjs.map +1 -1
- package/dist/cjs/utils/handlers.cjs +16 -0
- package/dist/cjs/utils/handlers.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +10 -0
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs +247 -14
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/truncation.cjs +107 -0
- package/dist/cjs/utils/truncation.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +325 -61
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +13 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +8 -28
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +307 -226
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +4 -4
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +6 -2
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/init.mjs +58 -0
- package/dist/esm/llm/init.mjs.map +1 -0
- package/dist/esm/llm/invoke.mjs +87 -0
- package/dist/esm/llm/invoke.mjs.map +1 -0
- package/dist/esm/llm/openai/index.mjs +2 -0
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/request.mjs +38 -0
- package/dist/esm/llm/request.mjs.map +1 -0
- package/dist/esm/main.mjs +13 -3
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +76 -89
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/contextPruning.mjs +154 -0
- package/dist/esm/messages/contextPruning.mjs.map +1 -0
- package/dist/esm/messages/contextPruningSettings.mjs +50 -0
- package/dist/esm/messages/contextPruningSettings.mjs.map +1 -0
- package/dist/esm/messages/core.mjs +23 -37
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +156 -11
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +1158 -52
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/messages/reducer.mjs +83 -0
- package/dist/esm/messages/reducer.mjs.map +1 -0
- package/dist/esm/run.mjs +82 -43
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/stream.mjs +54 -7
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/summarization/index.mjs +73 -0
- package/dist/esm/summarization/index.mjs.map +1 -0
- package/dist/esm/summarization/node.mjs +659 -0
- package/dist/esm/summarization/node.mjs.map +1 -0
- package/dist/esm/tools/ToolNode.mjs +16 -8
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +2 -0
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/utils/errors.mjs +111 -0
- package/dist/esm/utils/errors.mjs.map +1 -0
- package/dist/esm/utils/events.mjs +17 -1
- package/dist/esm/utils/events.mjs.map +1 -1
- package/dist/esm/utils/handlers.mjs +16 -0
- package/dist/esm/utils/handlers.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +10 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs +245 -15
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/truncation.mjs +102 -0
- package/dist/esm/utils/truncation.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +124 -6
- package/dist/types/common/enum.d.ts +14 -1
- package/dist/types/graphs/Graph.d.ts +22 -27
- package/dist/types/index.d.ts +5 -0
- package/dist/types/llm/init.d.ts +18 -0
- package/dist/types/llm/invoke.d.ts +48 -0
- package/dist/types/llm/request.d.ts +14 -0
- package/dist/types/messages/contextPruning.d.ts +42 -0
- package/dist/types/messages/contextPruningSettings.d.ts +44 -0
- package/dist/types/messages/core.d.ts +1 -1
- package/dist/types/messages/format.d.ts +17 -1
- package/dist/types/messages/index.d.ts +3 -0
- package/dist/types/messages/prune.d.ts +162 -1
- package/dist/types/messages/reducer.d.ts +18 -0
- package/dist/types/run.d.ts +12 -1
- package/dist/types/summarization/index.d.ts +20 -0
- package/dist/types/summarization/node.d.ts +29 -0
- package/dist/types/tools/ToolNode.d.ts +3 -1
- package/dist/types/types/graph.d.ts +44 -6
- package/dist/types/types/index.d.ts +1 -0
- package/dist/types/types/run.d.ts +30 -0
- package/dist/types/types/stream.d.ts +31 -4
- package/dist/types/types/summarize.d.ts +47 -0
- package/dist/types/types/tools.d.ts +7 -0
- package/dist/types/utils/errors.d.ts +28 -0
- package/dist/types/utils/events.d.ts +13 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/llm.d.ts +4 -0
- package/dist/types/utils/tokens.d.ts +14 -1
- package/dist/types/utils/truncation.d.ts +49 -0
- package/package.json +3 -3
- package/src/agents/AgentContext.ts +388 -58
- package/src/agents/__tests__/AgentContext.test.ts +265 -5
- package/src/common/enum.ts +13 -0
- package/src/events.ts +9 -39
- package/src/graphs/Graph.ts +468 -331
- package/src/index.ts +7 -0
- package/src/llm/anthropic/llm.spec.ts +3 -3
- package/src/llm/anthropic/utils/message_inputs.ts +6 -4
- package/src/llm/bedrock/llm.spec.ts +1 -1
- package/src/llm/bedrock/utils/message_inputs.ts +6 -2
- package/src/llm/init.ts +63 -0
- package/src/llm/invoke.ts +144 -0
- package/src/llm/request.ts +55 -0
- package/src/messages/__tests__/observationMasking.test.ts +221 -0
- package/src/messages/cache.ts +77 -102
- package/src/messages/contextPruning.ts +191 -0
- package/src/messages/contextPruningSettings.ts +90 -0
- package/src/messages/core.ts +32 -53
- package/src/messages/ensureThinkingBlock.test.ts +39 -39
- package/src/messages/format.ts +227 -15
- package/src/messages/formatAgentMessages.test.ts +511 -1
- package/src/messages/index.ts +3 -0
- package/src/messages/prune.ts +1548 -62
- package/src/messages/reducer.ts +22 -0
- package/src/run.ts +104 -51
- package/src/scripts/bedrock-merge-test.ts +1 -1
- package/src/scripts/test-thinking-handoff-bedrock.ts +1 -1
- package/src/scripts/test-thinking-handoff.ts +1 -1
- package/src/scripts/thinking-bedrock.ts +1 -1
- package/src/scripts/thinking.ts +1 -1
- package/src/specs/anthropic.simple.test.ts +1 -1
- package/src/specs/multi-agent-summarization.test.ts +396 -0
- package/src/specs/prune.test.ts +1196 -23
- package/src/specs/summarization-unit.test.ts +868 -0
- package/src/specs/summarization.test.ts +3827 -0
- package/src/specs/summarize-prune.test.ts +376 -0
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/specs/thinking-prune.test.ts +7 -4
- package/src/specs/token-accounting-e2e.test.ts +1034 -0
- package/src/specs/token-accounting-pipeline.test.ts +882 -0
- package/src/specs/token-distribution-edge-case.test.ts +25 -26
- package/src/splitStream.test.ts +42 -33
- package/src/stream.ts +64 -11
- package/src/summarization/__tests__/aggregator.test.ts +153 -0
- package/src/summarization/__tests__/node.test.ts +708 -0
- package/src/summarization/__tests__/trigger.test.ts +50 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/node.ts +982 -0
- package/src/tools/ToolNode.ts +25 -3
- package/src/types/graph.ts +62 -7
- package/src/types/index.ts +1 -0
- package/src/types/run.ts +32 -0
- package/src/types/stream.ts +45 -5
- package/src/types/summarize.ts +58 -0
- package/src/types/tools.ts +7 -0
- package/src/utils/errors.ts +117 -0
- package/src/utils/events.ts +31 -0
- package/src/utils/handlers.ts +18 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/llm.ts +12 -0
- package/src/utils/tokens.ts +336 -18
- package/src/utils/truncation.ts +124 -0
- package/src/scripts/image.ts +0 -180
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
import {
|
|
2
|
+
HumanMessage,
|
|
3
|
+
AIMessage,
|
|
4
|
+
SystemMessage,
|
|
5
|
+
} from '@langchain/core/messages';
|
|
6
|
+
import type { UsageMetadata } from '@langchain/core/messages';
|
|
7
|
+
import { createPruneMessages } from '@/messages/prune';
|
|
8
|
+
import { Providers, ContentTypes } from '@/common';
|
|
9
|
+
|
|
10
|
+
function tokenCounter(msg: { content: unknown }): number {
|
|
11
|
+
const content =
|
|
12
|
+
typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
|
|
13
|
+
return Math.ceil(content.length / 4);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
describe('Prune + Summarize Integration', () => {
|
|
17
|
+
it('should return messagesToRefine when messages exceed token limit', () => {
|
|
18
|
+
const messages = [
|
|
19
|
+
new SystemMessage('You are a helpful assistant.'),
|
|
20
|
+
new HumanMessage('First question'),
|
|
21
|
+
new AIMessage('First answer'),
|
|
22
|
+
new HumanMessage('Second question'),
|
|
23
|
+
new AIMessage('Second answer'),
|
|
24
|
+
new HumanMessage('Third question'),
|
|
25
|
+
new AIMessage(
|
|
26
|
+
'Third answer that is quite long to push things over the limit'
|
|
27
|
+
),
|
|
28
|
+
];
|
|
29
|
+
|
|
30
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
31
|
+
for (let i = 0; i < messages.length; i++) {
|
|
32
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const totalTokens = Object.values(indexTokenCountMap).reduce(
|
|
36
|
+
(a = 0, b = 0) => a! + b!,
|
|
37
|
+
0
|
|
38
|
+
) as number;
|
|
39
|
+
const maxTokens = Math.floor(totalTokens * 0.6);
|
|
40
|
+
|
|
41
|
+
const pruneMessages = createPruneMessages({
|
|
42
|
+
provider: Providers.OPENAI,
|
|
43
|
+
maxTokens,
|
|
44
|
+
startIndex: messages.length,
|
|
45
|
+
tokenCounter,
|
|
46
|
+
indexTokenCountMap,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
const result = pruneMessages({ messages });
|
|
50
|
+
|
|
51
|
+
expect(result.messagesToRefine).toBeDefined();
|
|
52
|
+
expect(result.messagesToRefine!.length).toBeGreaterThan(0);
|
|
53
|
+
expect(result.remainingContextTokens).toBeDefined();
|
|
54
|
+
expect(typeof result.remainingContextTokens).toBe('number');
|
|
55
|
+
expect(result.context.length).toBeLessThan(messages.length);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('should return empty messagesToRefine when all messages fit', () => {
|
|
59
|
+
const messages = [new HumanMessage('Hi'), new AIMessage('Hello')];
|
|
60
|
+
|
|
61
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
62
|
+
for (let i = 0; i < messages.length; i++) {
|
|
63
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const pruneMessages = createPruneMessages({
|
|
67
|
+
provider: Providers.OPENAI,
|
|
68
|
+
maxTokens: 10000,
|
|
69
|
+
startIndex: messages.length,
|
|
70
|
+
tokenCounter,
|
|
71
|
+
indexTokenCountMap,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
const result = pruneMessages({ messages });
|
|
75
|
+
|
|
76
|
+
expect(result.messagesToRefine).toBeDefined();
|
|
77
|
+
expect(result.messagesToRefine).toHaveLength(0);
|
|
78
|
+
expect(result.remainingContextTokens).toBeGreaterThan(0);
|
|
79
|
+
expect(result.context).toEqual(messages);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('should preserve system message in context even when pruning', () => {
|
|
83
|
+
const sysMsg = new SystemMessage(
|
|
84
|
+
'Instructions for the assistant to follow carefully'
|
|
85
|
+
);
|
|
86
|
+
const messages = [
|
|
87
|
+
sysMsg,
|
|
88
|
+
new HumanMessage(
|
|
89
|
+
'This is the first message in our conversation and it is fairly long'
|
|
90
|
+
),
|
|
91
|
+
new AIMessage(
|
|
92
|
+
'This is the first response and it is also fairly long with details'
|
|
93
|
+
),
|
|
94
|
+
new HumanMessage(
|
|
95
|
+
'This is the second message with more context and questions'
|
|
96
|
+
),
|
|
97
|
+
new AIMessage(
|
|
98
|
+
'This is the second response which is even more detailed and verbose'
|
|
99
|
+
),
|
|
100
|
+
new HumanMessage('Third message in the conversation chain'),
|
|
101
|
+
new AIMessage('Third response with additional lengthy explanations'),
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
105
|
+
for (let i = 0; i < messages.length; i++) {
|
|
106
|
+
indexTokenCountMap[i] = tokenCounter(messages[i]);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const totalTokens = Object.values(indexTokenCountMap).reduce(
|
|
110
|
+
(a = 0, b = 0) => a! + b!,
|
|
111
|
+
0
|
|
112
|
+
) as number;
|
|
113
|
+
const maxTokens = Math.floor(totalTokens * 0.35);
|
|
114
|
+
|
|
115
|
+
const pruneMessages = createPruneMessages({
|
|
116
|
+
provider: Providers.OPENAI,
|
|
117
|
+
maxTokens,
|
|
118
|
+
startIndex: messages.length,
|
|
119
|
+
tokenCounter,
|
|
120
|
+
indexTokenCountMap,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
const result = pruneMessages({ messages });
|
|
124
|
+
|
|
125
|
+
expect(result.context[0]).toBe(sysMsg);
|
|
126
|
+
expect(result.messagesToRefine!.length).toBeGreaterThan(0);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('should not include summary content type in pruned messages passed to providers', () => {
|
|
130
|
+
const summaryBlock = {
|
|
131
|
+
type: ContentTypes.SUMMARY,
|
|
132
|
+
text: 'Summary of prior conversation',
|
|
133
|
+
};
|
|
134
|
+
expect(summaryBlock.type).toBe('summary');
|
|
135
|
+
expect(Object.values(ContentTypes)).toContain('summary');
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
describe('pruneMessages ratio-based token grounding', () => {
|
|
140
|
+
it('should adjust indexTokenCountMap entries proportionally when usageMetadata is provided', () => {
|
|
141
|
+
const messages = [
|
|
142
|
+
new SystemMessage('Be concise.'),
|
|
143
|
+
new HumanMessage('What is 2+2?'),
|
|
144
|
+
new AIMessage('The answer is 4.'),
|
|
145
|
+
];
|
|
146
|
+
|
|
147
|
+
const indexTokenCountMap: Record<string, number | undefined> = {
|
|
148
|
+
0: 10,
|
|
149
|
+
1: 20,
|
|
150
|
+
2: 30,
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
const pruneMessages = createPruneMessages({
|
|
154
|
+
provider: Providers.OPENAI,
|
|
155
|
+
maxTokens: 5000,
|
|
156
|
+
startIndex: messages.length,
|
|
157
|
+
tokenCounter,
|
|
158
|
+
indexTokenCountMap,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const usageMetadata: Partial<UsageMetadata> = {
|
|
162
|
+
input_tokens: 50,
|
|
163
|
+
output_tokens: 40,
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
const result = pruneMessages({ messages, usageMetadata });
|
|
167
|
+
|
|
168
|
+
// Map stays in raw tiktoken space — calibrationRatio captures the multiplier.
|
|
169
|
+
const originalTotal = 10 + 20 + 30;
|
|
170
|
+
const expectedRatio = 50 / originalTotal;
|
|
171
|
+
|
|
172
|
+
expect(result.indexTokenCountMap[0]).toBe(10);
|
|
173
|
+
expect(result.indexTokenCountMap[1]).toBe(20);
|
|
174
|
+
expect(result.indexTokenCountMap[2]).toBe(30);
|
|
175
|
+
expect(result.calibrationRatio).toBeCloseTo(expectedRatio, 2);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('should NOT adjust when ratio falls outside safe bounds (< 1/3)', () => {
|
|
179
|
+
const messages = [
|
|
180
|
+
new HumanMessage('What is 2+2?'),
|
|
181
|
+
new AIMessage('The answer is 4.'),
|
|
182
|
+
];
|
|
183
|
+
|
|
184
|
+
const indexTokenCountMap: Record<string, number | undefined> = {
|
|
185
|
+
0: 100,
|
|
186
|
+
1: 200,
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
const pruneMessages = createPruneMessages({
|
|
190
|
+
provider: Providers.OPENAI,
|
|
191
|
+
maxTokens: 50000,
|
|
192
|
+
startIndex: messages.length,
|
|
193
|
+
tokenCounter,
|
|
194
|
+
indexTokenCountMap,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
const usageMetadata: Partial<UsageMetadata> = {
|
|
198
|
+
input_tokens: 5,
|
|
199
|
+
output_tokens: 5,
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
const result = pruneMessages({ messages, usageMetadata });
|
|
203
|
+
|
|
204
|
+
expect(result.indexTokenCountMap[0]).toBe(100);
|
|
205
|
+
expect(result.indexTokenCountMap[1]).toBe(200);
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
it('should NOT adjust when ratio falls outside safe bounds (> 2.5)', () => {
|
|
209
|
+
const messages = [new HumanMessage('Hi'), new AIMessage('Hello')];
|
|
210
|
+
|
|
211
|
+
const indexTokenCountMap: Record<string, number | undefined> = {
|
|
212
|
+
0: 5,
|
|
213
|
+
1: 5,
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
const pruneMessages = createPruneMessages({
|
|
217
|
+
provider: Providers.OPENAI,
|
|
218
|
+
maxTokens: 50000,
|
|
219
|
+
startIndex: messages.length,
|
|
220
|
+
tokenCounter,
|
|
221
|
+
indexTokenCountMap,
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
const usageMetadata: Partial<UsageMetadata> = {
|
|
225
|
+
input_tokens: 100,
|
|
226
|
+
output_tokens: 100,
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
const result = pruneMessages({ messages, usageMetadata });
|
|
230
|
+
|
|
231
|
+
expect(result.indexTokenCountMap[0]).toBe(5);
|
|
232
|
+
expect(result.indexTokenCountMap[1]).toBe(5);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it('should include cache_read and cache_creation in ratio total', () => {
|
|
236
|
+
const messages = [
|
|
237
|
+
new SystemMessage('Instructions'),
|
|
238
|
+
new HumanMessage('Hello'),
|
|
239
|
+
new AIMessage('Hi there!'),
|
|
240
|
+
];
|
|
241
|
+
|
|
242
|
+
const indexTokenCountMap: Record<string, number | undefined> = {
|
|
243
|
+
0: 10,
|
|
244
|
+
1: 20,
|
|
245
|
+
2: 30,
|
|
246
|
+
};
|
|
247
|
+
|
|
248
|
+
const pruneMessages = createPruneMessages({
|
|
249
|
+
provider: Providers.ANTHROPIC,
|
|
250
|
+
maxTokens: 5000,
|
|
251
|
+
startIndex: messages.length,
|
|
252
|
+
tokenCounter,
|
|
253
|
+
indexTokenCountMap,
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
// Anthropic: cache_read (15) + cache_creation (10) = 25 > input_tokens (30)?
|
|
257
|
+
// No, 25 < 30, so NOT additive. totalInput = 30.
|
|
258
|
+
// providerMessageTokens = 30 - 0 (no instruction overhead) = 30.
|
|
259
|
+
// ratio = 30 / 60 = 0.5 — safe (>= 1/3, <= 2.5).
|
|
260
|
+
const usageMetadata: Partial<UsageMetadata> = {
|
|
261
|
+
input_tokens: 30,
|
|
262
|
+
output_tokens: 20,
|
|
263
|
+
input_token_details: {
|
|
264
|
+
cache_read: 15,
|
|
265
|
+
cache_creation: 10,
|
|
266
|
+
},
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
const originalTotal = 10 + 20 + 30;
|
|
270
|
+
const expectedRatio = 30 / originalTotal;
|
|
271
|
+
|
|
272
|
+
const result = pruneMessages({ messages, usageMetadata });
|
|
273
|
+
|
|
274
|
+
// Map stays raw — calibrationRatio captures the multiplier
|
|
275
|
+
expect(result.indexTokenCountMap[0]).toBe(10);
|
|
276
|
+
expect(result.indexTokenCountMap[1]).toBe(20);
|
|
277
|
+
expect(result.indexTokenCountMap[2]).toBe(30);
|
|
278
|
+
expect(result.calibrationRatio).toBeCloseTo(expectedRatio, 2);
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
it('should assign output_tokens to the first new message at startIndex', () => {
|
|
282
|
+
const messages = [
|
|
283
|
+
new HumanMessage('What is 2+2?'),
|
|
284
|
+
new AIMessage('The answer is 4.'),
|
|
285
|
+
];
|
|
286
|
+
|
|
287
|
+
const indexTokenCountMap: Record<string, number | undefined> = {
|
|
288
|
+
0: 15,
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
const pruneMessages = createPruneMessages({
|
|
292
|
+
provider: Providers.OPENAI,
|
|
293
|
+
maxTokens: 5000,
|
|
294
|
+
startIndex: 1,
|
|
295
|
+
tokenCounter,
|
|
296
|
+
indexTokenCountMap,
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
const usageMetadata: Partial<UsageMetadata> = {
|
|
300
|
+
input_tokens: 20,
|
|
301
|
+
output_tokens: 25,
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
const result = pruneMessages({ messages, usageMetadata });
|
|
305
|
+
|
|
306
|
+
expect(result.indexTokenCountMap[1]).toBeDefined();
|
|
307
|
+
expect(result.indexTokenCountMap[1] as number).toBeGreaterThan(0);
|
|
308
|
+
|
|
309
|
+
// index[1] is the AI response at startIndex — assigned output_tokens (25).
|
|
310
|
+
// Calibration: providerMessageTokens = input_tokens (20) - overhead (0) = 20.
|
|
311
|
+
// messageTokenSum = index[0] (15) + index[1] is newOutput so excluded = 15.
|
|
312
|
+
// ratio = 20 / 15 = 1.33 — safe.
|
|
313
|
+
const preRatioIndex0 = 15;
|
|
314
|
+
const ratio = 20 / preRatioIndex0;
|
|
315
|
+
const isRatioSafe = ratio >= 1 / 3 && ratio <= 2.5;
|
|
316
|
+
|
|
317
|
+
// Map stays raw regardless of ratio safety
|
|
318
|
+
expect(result.indexTokenCountMap[0]).toBe(preRatioIndex0);
|
|
319
|
+
if (isRatioSafe) {
|
|
320
|
+
expect(result.calibrationRatio).toBeCloseTo(ratio, 1);
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it('should ground tokens correctly across multiple pruneMessages calls', () => {
|
|
325
|
+
const turn1Messages = [
|
|
326
|
+
new SystemMessage('Be concise.'),
|
|
327
|
+
new HumanMessage('What is 2+2?'),
|
|
328
|
+
];
|
|
329
|
+
|
|
330
|
+
const indexTokenCountMap: Record<string, number | undefined> = {
|
|
331
|
+
0: 10,
|
|
332
|
+
1: 20,
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
const pruneMessages = createPruneMessages({
|
|
336
|
+
provider: Providers.OPENAI,
|
|
337
|
+
maxTokens: 5000,
|
|
338
|
+
startIndex: turn1Messages.length,
|
|
339
|
+
tokenCounter,
|
|
340
|
+
indexTokenCountMap,
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
const turn1Result = pruneMessages({
|
|
344
|
+
messages: turn1Messages,
|
|
345
|
+
});
|
|
346
|
+
|
|
347
|
+
expect(turn1Result.indexTokenCountMap[0]).toBe(10);
|
|
348
|
+
expect(turn1Result.indexTokenCountMap[1]).toBe(20);
|
|
349
|
+
|
|
350
|
+
const turn2Messages = [
|
|
351
|
+
...turn1Messages,
|
|
352
|
+
new AIMessage('4'),
|
|
353
|
+
new HumanMessage('And 3+3?'),
|
|
354
|
+
];
|
|
355
|
+
|
|
356
|
+
const turn2Usage: Partial<UsageMetadata> = {
|
|
357
|
+
input_tokens: 25,
|
|
358
|
+
output_tokens: 10,
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
const turn2Result = pruneMessages({
|
|
362
|
+
messages: turn2Messages,
|
|
363
|
+
usageMetadata: turn2Usage,
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
expect(turn2Result.indexTokenCountMap[2]).toBeDefined();
|
|
367
|
+
expect(turn2Result.indexTokenCountMap[2] as number).toBeGreaterThan(0);
|
|
368
|
+
expect(turn2Result.indexTokenCountMap[3]).toBeDefined();
|
|
369
|
+
expect(turn2Result.indexTokenCountMap[3] as number).toBeGreaterThan(0);
|
|
370
|
+
|
|
371
|
+
for (let i = 0; i < turn2Messages.length; i++) {
|
|
372
|
+
expect(turn2Result.indexTokenCountMap[i]).toBeDefined();
|
|
373
|
+
expect(turn2Result.indexTokenCountMap[i] as number).toBeGreaterThan(0);
|
|
374
|
+
}
|
|
375
|
+
});
|
|
376
|
+
});
|
|
@@ -61,7 +61,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
61
61
|
agentId: 'specialist',
|
|
62
62
|
provider: Providers.ANTHROPIC,
|
|
63
63
|
clientOptions: {
|
|
64
|
-
modelName: 'claude-
|
|
64
|
+
modelName: 'claude-sonnet-4-5-20250929',
|
|
65
65
|
apiKey: 'test-key',
|
|
66
66
|
thinking: {
|
|
67
67
|
type: 'enabled',
|
|
@@ -150,7 +150,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
150
150
|
agentId: 'agent_b',
|
|
151
151
|
provider: Providers.ANTHROPIC,
|
|
152
152
|
clientOptions: {
|
|
153
|
-
modelName: 'claude-
|
|
153
|
+
modelName: 'claude-sonnet-4-5-20250929',
|
|
154
154
|
apiKey: 'test-key',
|
|
155
155
|
thinking: {
|
|
156
156
|
type: 'enabled',
|
|
@@ -195,7 +195,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
195
195
|
provider: Providers.BEDROCK,
|
|
196
196
|
clientOptions: {
|
|
197
197
|
region: 'us-east-1',
|
|
198
|
-
model: 'anthropic.claude-
|
|
198
|
+
model: 'us.anthropic.claude-haiku-4-5-20251001-v1:0',
|
|
199
199
|
// No thinking config
|
|
200
200
|
},
|
|
201
201
|
instructions: 'You are a coordinator',
|
|
@@ -206,7 +206,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
206
206
|
provider: Providers.BEDROCK,
|
|
207
207
|
clientOptions: {
|
|
208
208
|
region: 'us-east-1',
|
|
209
|
-
model: 'us.anthropic.claude-
|
|
209
|
+
model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
210
210
|
additionalModelRequestFields: {
|
|
211
211
|
thinking: {
|
|
212
212
|
type: 'enabled',
|
|
@@ -280,7 +280,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
280
280
|
provider: Providers.BEDROCK,
|
|
281
281
|
clientOptions: {
|
|
282
282
|
region: 'us-east-1',
|
|
283
|
-
model: 'us.anthropic.claude-
|
|
283
|
+
model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
284
284
|
additionalModelRequestFields: {
|
|
285
285
|
thinking: {
|
|
286
286
|
type: 'enabled',
|
|
@@ -343,7 +343,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
343
343
|
provider: Providers.BEDROCK,
|
|
344
344
|
clientOptions: {
|
|
345
345
|
region: 'us-east-1',
|
|
346
|
-
model: 'us.anthropic.claude-
|
|
346
|
+
model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
|
|
347
347
|
additionalModelRequestFields: {
|
|
348
348
|
thinking: {
|
|
349
349
|
type: 'enabled',
|
|
@@ -434,7 +434,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
434
434
|
agentId: 'reviewer',
|
|
435
435
|
provider: Providers.ANTHROPIC,
|
|
436
436
|
clientOptions: {
|
|
437
|
-
modelName: 'claude-
|
|
437
|
+
modelName: 'claude-sonnet-4-5-20250929',
|
|
438
438
|
apiKey: 'test-key',
|
|
439
439
|
thinking: {
|
|
440
440
|
type: 'enabled',
|
|
@@ -508,7 +508,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
508
508
|
agentId: 'agent_a',
|
|
509
509
|
provider: Providers.ANTHROPIC,
|
|
510
510
|
clientOptions: {
|
|
511
|
-
modelName: 'claude-
|
|
511
|
+
modelName: 'claude-sonnet-4-5-20250929',
|
|
512
512
|
apiKey: 'test-key',
|
|
513
513
|
thinking: {
|
|
514
514
|
type: 'enabled',
|
|
@@ -522,7 +522,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
522
522
|
agentId: 'agent_b',
|
|
523
523
|
provider: Providers.ANTHROPIC,
|
|
524
524
|
clientOptions: {
|
|
525
|
-
modelName: 'claude-
|
|
525
|
+
modelName: 'claude-sonnet-4-5-20250929',
|
|
526
526
|
apiKey: 'test-key',
|
|
527
527
|
thinking: {
|
|
528
528
|
type: 'enabled',
|
|
@@ -587,7 +587,7 @@ describe('Thinking-Enabled Agent Handoff Tests', () => {
|
|
|
587
587
|
agentId: 'agent_b',
|
|
588
588
|
provider: Providers.ANTHROPIC,
|
|
589
589
|
clientOptions: {
|
|
590
|
-
modelName: 'claude-
|
|
590
|
+
modelName: 'claude-sonnet-4-5-20250929',
|
|
591
591
|
apiKey: 'test-key',
|
|
592
592
|
thinking: {
|
|
593
593
|
type: 'enabled',
|
|
@@ -741,6 +741,7 @@ describe('Prune Messages with Thinking Mode Tests', () => {
|
|
|
741
741
|
tokenCounter,
|
|
742
742
|
indexTokenCountMap: { ...indexTokenCountMap },
|
|
743
743
|
thinkingEnabled: true,
|
|
744
|
+
reserveRatio: 0,
|
|
744
745
|
});
|
|
745
746
|
|
|
746
747
|
// Prune messages
|
|
@@ -767,7 +768,7 @@ describe('Prune Messages with Thinking Mode Tests', () => {
|
|
|
767
768
|
);
|
|
768
769
|
});
|
|
769
770
|
|
|
770
|
-
it('should
|
|
771
|
+
it('should gracefully degrade when aggressive pruning removes all AI messages', () => {
|
|
771
772
|
const tokenCounter = createTestTokenCounter();
|
|
772
773
|
|
|
773
774
|
const assistantMessageWithThinking = new AIMessage({
|
|
@@ -817,8 +818,10 @@ describe('Prune Messages with Thinking Mode Tests', () => {
|
|
|
817
818
|
thinkingEnabled: true,
|
|
818
819
|
});
|
|
819
820
|
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
);
|
|
821
|
+
// Should not throw — gracefully degrades by skipping thinking block reattachment
|
|
822
|
+
expect(() => pruneMessages({ messages })).not.toThrow();
|
|
823
|
+
const result = pruneMessages({ messages });
|
|
824
|
+
// Context should contain at most the system message (no AI messages survived)
|
|
825
|
+
expect(result.context.length).toBeLessThanOrEqual(2);
|
|
823
826
|
});
|
|
824
827
|
});
|