@librechat/agents 2.4.13 → 2.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +17 -11
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs +65 -35
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +17 -11
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs +65 -35
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/types/graphs/Graph.d.ts +1 -1
- package/dist/types/messages/prune.d.ts +2 -2
- package/package.json +1 -1
- package/src/graphs/Graph.ts +19 -12
- package/src/messages/prune.ts +158 -61
- package/src/specs/prune.test.ts +93 -76
- package/src/specs/token-distribution-edge-case.test.ts +73 -52
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
// src/specs/token-distribution-edge-case.test.ts
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
HumanMessage,
|
|
4
|
+
AIMessage,
|
|
5
|
+
SystemMessage,
|
|
6
|
+
BaseMessage,
|
|
7
|
+
} from '@langchain/core/messages';
|
|
3
8
|
import type { UsageMetadata } from '@langchain/core/messages';
|
|
4
9
|
import type * as t from '@/types';
|
|
5
10
|
import { createPruneMessages } from '@/messages/prune';
|
|
@@ -9,7 +14,10 @@ const createTestTokenCounter = (): t.TokenCounter => {
|
|
|
9
14
|
// This simple token counter just counts characters as tokens for predictable testing
|
|
10
15
|
return (message: BaseMessage): number => {
|
|
11
16
|
// Use type assertion to help TypeScript understand the type
|
|
12
|
-
const content = message.content as
|
|
17
|
+
const content = message.content as
|
|
18
|
+
| string
|
|
19
|
+
| Array<t.MessageContentComplex | string>
|
|
20
|
+
| undefined;
|
|
13
21
|
|
|
14
22
|
// Handle string content
|
|
15
23
|
if (typeof content === 'string') {
|
|
@@ -46,19 +54,19 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
46
54
|
// Create messages
|
|
47
55
|
const messages = [
|
|
48
56
|
new SystemMessage('System instruction'), // Will always be included
|
|
49
|
-
new HumanMessage('Message 1'),
|
|
50
|
-
new AIMessage('Response 1'),
|
|
51
|
-
new HumanMessage('Message 2'),
|
|
52
|
-
new AIMessage('Response 2')
|
|
57
|
+
new HumanMessage('Message 1'), // Will be pruned
|
|
58
|
+
new AIMessage('Response 1'), // Will be pruned
|
|
59
|
+
new HumanMessage('Message 2'), // Will remain
|
|
60
|
+
new AIMessage('Response 2'), // Will remain
|
|
53
61
|
];
|
|
54
62
|
|
|
55
63
|
// Calculate initial token counts for each message
|
|
56
64
|
const indexTokenCountMap: Record<string, number> = {
|
|
57
65
|
0: 17, // "System instruction"
|
|
58
|
-
1: 9,
|
|
66
|
+
1: 9, // "Message 1"
|
|
59
67
|
2: 10, // "Response 1"
|
|
60
|
-
3: 9,
|
|
61
|
-
4: 10
|
|
68
|
+
3: 9, // "Message 2"
|
|
69
|
+
4: 10, // "Response 2"
|
|
62
70
|
};
|
|
63
71
|
|
|
64
72
|
// Set a token limit that will force pruning of the first two messages after the system message
|
|
@@ -66,7 +74,7 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
66
74
|
maxTokens: 40, // Only enough for system message + last two messages
|
|
67
75
|
startIndex: 0,
|
|
68
76
|
tokenCounter,
|
|
69
|
-
indexTokenCountMap: { ...indexTokenCountMap }
|
|
77
|
+
indexTokenCountMap: { ...indexTokenCountMap },
|
|
70
78
|
});
|
|
71
79
|
|
|
72
80
|
// First call to establish lastCutOffIndex
|
|
@@ -82,13 +90,13 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
82
90
|
const usageMetadata: Partial<UsageMetadata> = {
|
|
83
91
|
input_tokens: 30,
|
|
84
92
|
output_tokens: 20,
|
|
85
|
-
total_tokens: 50 // Different from the sum of our initial token counts
|
|
93
|
+
total_tokens: 50, // Different from the sum of our initial token counts
|
|
86
94
|
};
|
|
87
95
|
|
|
88
96
|
// Call pruneMessages again with the usage metadata
|
|
89
97
|
const result = pruneMessages({
|
|
90
98
|
messages,
|
|
91
|
-
usageMetadata
|
|
99
|
+
usageMetadata,
|
|
92
100
|
});
|
|
93
101
|
|
|
94
102
|
// The token distribution should only affect messages that remain in the context
|
|
@@ -109,14 +117,16 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
109
117
|
|
|
110
118
|
// Verify that the sum of tokens for messages in the context is close to the total_tokens from usageMetadata
|
|
111
119
|
// There might be small rounding differences or implementation details that affect the exact sum
|
|
112
|
-
const totalContextTokens =
|
|
120
|
+
const totalContextTokens =
|
|
121
|
+
(result.indexTokenCountMap[0] ?? 0) +
|
|
122
|
+
(result.indexTokenCountMap[3] ?? 0) +
|
|
123
|
+
(result.indexTokenCountMap[4] ?? 0);
|
|
113
124
|
expect(totalContextTokens).toBeGreaterThan(0);
|
|
114
125
|
|
|
115
126
|
// The key thing we're testing is that the token distribution happens for messages in the context
|
|
116
127
|
// and that the sum is reasonably close to the expected total
|
|
117
128
|
const tokenDifference = Math.abs(totalContextTokens - 50);
|
|
118
129
|
expect(tokenDifference).toBeLessThan(20); // Allow for some difference due to implementation details
|
|
119
|
-
|
|
120
130
|
});
|
|
121
131
|
|
|
122
132
|
it('should handle the case when all messages fit within the token limit', () => {
|
|
@@ -127,14 +137,14 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
127
137
|
const messages = [
|
|
128
138
|
new SystemMessage('System instruction'),
|
|
129
139
|
new HumanMessage('Message 1'),
|
|
130
|
-
new AIMessage('Response 1')
|
|
140
|
+
new AIMessage('Response 1'),
|
|
131
141
|
];
|
|
132
142
|
|
|
133
143
|
// Calculate initial token counts for each message
|
|
134
144
|
const indexTokenCountMap: Record<string, number> = {
|
|
135
145
|
0: 17, // "System instruction"
|
|
136
|
-
1: 9,
|
|
137
|
-
2: 10
|
|
146
|
+
1: 9, // "Message 1"
|
|
147
|
+
2: 10, // "Response 1"
|
|
138
148
|
};
|
|
139
149
|
|
|
140
150
|
// Set a token limit that will allow all messages to fit
|
|
@@ -142,7 +152,7 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
142
152
|
maxTokens: 100,
|
|
143
153
|
startIndex: 0,
|
|
144
154
|
tokenCounter,
|
|
145
|
-
indexTokenCountMap: { ...indexTokenCountMap }
|
|
155
|
+
indexTokenCountMap: { ...indexTokenCountMap },
|
|
146
156
|
});
|
|
147
157
|
|
|
148
158
|
// First call to establish lastCutOffIndex (should be 0 since no pruning occurs)
|
|
@@ -155,26 +165,36 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
155
165
|
const usageMetadata: Partial<UsageMetadata> = {
|
|
156
166
|
input_tokens: 20,
|
|
157
167
|
output_tokens: 10,
|
|
158
|
-
total_tokens: 30 // Different from the sum of our initial token counts
|
|
168
|
+
total_tokens: 30, // Different from the sum of our initial token counts
|
|
159
169
|
};
|
|
160
170
|
|
|
161
171
|
// Call pruneMessages again with the usage metadata
|
|
162
172
|
const result = pruneMessages({
|
|
163
173
|
messages,
|
|
164
|
-
usageMetadata
|
|
174
|
+
usageMetadata,
|
|
165
175
|
});
|
|
166
176
|
|
|
167
177
|
// Since all messages fit, all token counts should be adjusted
|
|
168
|
-
const initialTotalTokens =
|
|
178
|
+
const initialTotalTokens =
|
|
179
|
+
indexTokenCountMap[0] + indexTokenCountMap[1] + indexTokenCountMap[2];
|
|
169
180
|
const expectedRatio = 30 / initialTotalTokens;
|
|
170
181
|
|
|
171
182
|
// Check that all token counts were adjusted
|
|
172
|
-
expect(result.indexTokenCountMap[0]).toBe(
|
|
173
|
-
|
|
174
|
-
|
|
183
|
+
expect(result.indexTokenCountMap[0]).toBe(
|
|
184
|
+
Math.round(indexTokenCountMap[0] * expectedRatio)
|
|
185
|
+
);
|
|
186
|
+
expect(result.indexTokenCountMap[1]).toBe(
|
|
187
|
+
Math.round(indexTokenCountMap[1] * expectedRatio)
|
|
188
|
+
);
|
|
189
|
+
expect(result.indexTokenCountMap[2]).toBe(
|
|
190
|
+
Math.round(indexTokenCountMap[2] * expectedRatio)
|
|
191
|
+
);
|
|
175
192
|
|
|
176
193
|
// Verify that the sum of all tokens equals the total_tokens from usageMetadata
|
|
177
|
-
const totalTokens =
|
|
194
|
+
const totalTokens =
|
|
195
|
+
(result.indexTokenCountMap[0] ?? 0) +
|
|
196
|
+
(result.indexTokenCountMap[1] ?? 0) +
|
|
197
|
+
(result.indexTokenCountMap[2] ?? 0);
|
|
178
198
|
expect(totalTokens).toBe(30);
|
|
179
199
|
});
|
|
180
200
|
|
|
@@ -185,23 +205,23 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
185
205
|
// Create a longer sequence of messages
|
|
186
206
|
const messages = [
|
|
187
207
|
new SystemMessage('System instruction'), // Will always be included
|
|
188
|
-
new HumanMessage('Message 1'),
|
|
189
|
-
new AIMessage('Response 1'),
|
|
190
|
-
new HumanMessage('Message 2'),
|
|
191
|
-
new AIMessage('Response 2'),
|
|
192
|
-
new HumanMessage('Message 3'),
|
|
193
|
-
new AIMessage('Response 3')
|
|
208
|
+
new HumanMessage('Message 1'), // Will be pruned in first round
|
|
209
|
+
new AIMessage('Response 1'), // Will be pruned in first round
|
|
210
|
+
new HumanMessage('Message 2'), // Will be pruned in second round
|
|
211
|
+
new AIMessage('Response 2'), // Will be pruned in second round
|
|
212
|
+
new HumanMessage('Message 3'), // Will remain
|
|
213
|
+
new AIMessage('Response 3'), // Will remain
|
|
194
214
|
];
|
|
195
215
|
|
|
196
216
|
// Calculate initial token counts for each message
|
|
197
217
|
const indexTokenCountMap: Record<string, number> = {
|
|
198
218
|
0: 17, // "System instruction"
|
|
199
|
-
1: 9,
|
|
219
|
+
1: 9, // "Message 1"
|
|
200
220
|
2: 10, // "Response 1"
|
|
201
|
-
3: 9,
|
|
221
|
+
3: 9, // "Message 2"
|
|
202
222
|
4: 10, // "Response 2"
|
|
203
|
-
5: 9,
|
|
204
|
-
6: 10
|
|
223
|
+
5: 9, // "Message 3"
|
|
224
|
+
6: 10, // "Response 3"
|
|
205
225
|
};
|
|
206
226
|
|
|
207
227
|
// Set a token limit that will force pruning
|
|
@@ -209,7 +229,7 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
209
229
|
maxTokens: 40, // Only enough for system message + last two messages
|
|
210
230
|
startIndex: 0,
|
|
211
231
|
tokenCounter,
|
|
212
|
-
indexTokenCountMap: { ...indexTokenCountMap }
|
|
232
|
+
indexTokenCountMap: { ...indexTokenCountMap },
|
|
213
233
|
});
|
|
214
234
|
|
|
215
235
|
// First pruning operation
|
|
@@ -225,33 +245,30 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
225
245
|
const firstUsageMetadata: Partial<UsageMetadata> = {
|
|
226
246
|
input_tokens: 30,
|
|
227
247
|
output_tokens: 20,
|
|
228
|
-
total_tokens: 50
|
|
248
|
+
total_tokens: 50,
|
|
229
249
|
};
|
|
230
250
|
|
|
231
251
|
// Apply first usage metadata
|
|
232
252
|
const secondResult = pruneMessages({
|
|
233
253
|
messages,
|
|
234
|
-
usageMetadata: firstUsageMetadata
|
|
254
|
+
usageMetadata: firstUsageMetadata,
|
|
235
255
|
});
|
|
236
256
|
|
|
237
257
|
// Add two more messages
|
|
238
258
|
messages.push(new HumanMessage('Message 4'));
|
|
239
|
-
const extendedMessages = [
|
|
240
|
-
...messages,
|
|
241
|
-
new AIMessage('Response 4')
|
|
242
|
-
];
|
|
259
|
+
const extendedMessages = [...messages, new AIMessage('Response 4')];
|
|
243
260
|
|
|
244
261
|
// Second usage metadata update
|
|
245
262
|
const secondUsageMetadata: Partial<UsageMetadata> = {
|
|
246
263
|
input_tokens: 30,
|
|
247
264
|
output_tokens: 20,
|
|
248
|
-
total_tokens: 50
|
|
265
|
+
total_tokens: 50,
|
|
249
266
|
};
|
|
250
267
|
|
|
251
268
|
// Apply second usage metadata with extended messages
|
|
252
269
|
const thirdResult = pruneMessages({
|
|
253
270
|
messages: extendedMessages,
|
|
254
|
-
usageMetadata: secondUsageMetadata
|
|
271
|
+
usageMetadata: secondUsageMetadata,
|
|
255
272
|
});
|
|
256
273
|
|
|
257
274
|
// The context should include the system message and some of the latest messages
|
|
@@ -260,15 +277,15 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
260
277
|
expect(thirdResult.context[1].content).toBe('Response 4');
|
|
261
278
|
|
|
262
279
|
// Find which messages are in the final context
|
|
263
|
-
const contextMessageIndices = thirdResult.context.map(msg => {
|
|
280
|
+
const contextMessageIndices = thirdResult.context.map((msg) => {
|
|
264
281
|
// Find the index of this message in the original array
|
|
265
|
-
return extendedMessages.findIndex(m => m.content === msg.content);
|
|
282
|
+
return extendedMessages.findIndex((m) => m.content === msg.content);
|
|
266
283
|
});
|
|
267
284
|
|
|
268
285
|
// Get the sum of token counts for messages in the context
|
|
269
286
|
let totalContextTokens = 0;
|
|
270
287
|
for (const idx of contextMessageIndices) {
|
|
271
|
-
totalContextTokens += thirdResult.indexTokenCountMap[idx];
|
|
288
|
+
totalContextTokens += thirdResult.indexTokenCountMap[idx] ?? 0;
|
|
272
289
|
}
|
|
273
290
|
|
|
274
291
|
// Verify that the sum of tokens for messages in the context is close to the total_tokens from usageMetadata
|
|
@@ -283,11 +300,15 @@ describe('Token Distribution Edge Case Tests', () => {
|
|
|
283
300
|
// Verify that messages not in the context have their original token counts or previously adjusted values
|
|
284
301
|
for (let i = 0; i < extendedMessages.length; i++) {
|
|
285
302
|
if (!contextMessageIndices.includes(i)) {
|
|
286
|
-
const expectedValue =
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
303
|
+
const expectedValue =
|
|
304
|
+
i < messages.length
|
|
305
|
+
? (secondResult.indexTokenCountMap[i] ?? 0) || indexTokenCountMap[i]
|
|
306
|
+
: ((indexTokenCountMap as Record<string, number | undefined>)[i] ??
|
|
307
|
+
0);
|
|
308
|
+
|
|
309
|
+
const difference = Math.abs(
|
|
310
|
+
(thirdResult.indexTokenCountMap[i] ?? 0) - expectedValue
|
|
311
|
+
);
|
|
291
312
|
expect(difference).toBe(0);
|
|
292
313
|
}
|
|
293
314
|
}
|