@aj-archipelago/cortex 1.3.32 → 1.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/helper-apps/cortex-autogen/OAI_CONFIG_LIST +1 -1
  2. package/lib/encodeCache.js +22 -10
  3. package/lib/pathwayTools.js +10 -3
  4. package/lib/requestExecutor.js +1 -1
  5. package/lib/util.js +136 -1
  6. package/package.json +2 -2
  7. package/pathways/system/entity/memory/sys_memory_manager.js +2 -1
  8. package/pathways/system/entity/sys_entity_continue.js +10 -2
  9. package/pathways/system/entity/sys_entity_start.js +12 -10
  10. package/pathways/system/entity/sys_router_tool.js +2 -2
  11. package/server/chunker.js +23 -3
  12. package/server/pathwayResolver.js +2 -5
  13. package/server/plugins/claude3VertexPlugin.js +2 -3
  14. package/server/plugins/cohereGeneratePlugin.js +1 -1
  15. package/server/plugins/gemini15ChatPlugin.js +1 -1
  16. package/server/plugins/geminiChatPlugin.js +1 -1
  17. package/server/plugins/localModelPlugin.js +1 -1
  18. package/server/plugins/modelPlugin.js +332 -77
  19. package/server/plugins/openAiChatPlugin.js +1 -1
  20. package/server/plugins/openAiCompletionPlugin.js +1 -1
  21. package/server/plugins/palmChatPlugin.js +1 -1
  22. package/server/plugins/palmCodeCompletionPlugin.js +1 -1
  23. package/server/plugins/palmCompletionPlugin.js +1 -1
  24. package/tests/chunkfunction.test.js +9 -6
  25. package/tests/claude3VertexPlugin.test.js +81 -3
  26. package/tests/data/largecontent.txt +1 -0
  27. package/tests/data/mixedcontent.txt +1 -0
  28. package/tests/encodeCache.test.js +47 -14
  29. package/tests/modelPlugin.test.js +21 -0
  30. package/tests/multimodal_conversion.test.js +1 -1
  31. package/tests/subscription.test.js +7 -1
  32. package/tests/tokenHandlingTests.test.js +587 -0
  33. package/tests/truncateMessages.test.js +404 -46
  34. package/tests/util.test.js +146 -0
@@ -0,0 +1,587 @@
1
+ import test from 'ava';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { encode } from '../lib/encodeCache.js';
5
+ import { getFirstNToken } from '../server/chunker.js';
6
+ import Claude3VertexPlugin from '../server/plugins/claude3VertexPlugin.js';
7
+ import ModelPlugin from '../server/plugins/modelPlugin.js';
8
+ import { mockPathwayResolverMessages } from './mocks.js';
9
+
10
+ const { pathway, model } = mockPathwayResolverMessages;
11
+
12
+ // Helper function to load test data from files
13
+ function loadTestData(filename) {
14
+ try {
15
+ const filePath = path.join(process.cwd(), 'tests', 'data', filename);
16
+ return fs.readFileSync(filePath, 'utf8');
17
+ } catch (error) {
18
+ console.error(`Error loading test data file ${filename}:`, error);
19
+ // Return a smaller fallback test string if file loading fails
20
+ return 'a '.repeat(1000);
21
+ }
22
+ }
23
+
24
+ // Test the token count estimation accuracy
25
+ test('token count estimation accuracy', async (t) => {
26
+ const plugin = new Claude3VertexPlugin(pathway, model);
27
+
28
+ // Load large content from file (or use fallback if file doesn't exist)
29
+ const largeContent = loadTestData('largeContent.txt');
30
+
31
+ // Calculate the estimated token count using the sampling method
32
+ const estimatedTokens = plugin.safeGetEncodedLength(largeContent);
33
+
34
+ // Calculate the actual token count by using the direct encoder on a smaller sample
35
+ // and scaling up (since the pattern is uniform)
36
+ const sampleSize = Math.min(100000, largeContent.length);
37
+ const sample = largeContent.substring(0, sampleSize);
38
+ const sampleTokens = encode(sample).length;
39
+ const projectedRatio = largeContent.length / sample.length;
40
+ const actualTokensEstimate = Math.ceil(sampleTokens * projectedRatio);
41
+
42
+ // Log the results for analysis
43
+ console.log(`Token count estimation test:
44
+ - Content length: ${largeContent.length} characters
45
+ - Estimated tokens (using sampling): ${estimatedTokens}
46
+ - Projected actual tokens: ${actualTokensEstimate}
47
+ - Difference: ${estimatedTokens - actualTokensEstimate} tokens
48
+ - Overestimation percentage: ${((estimatedTokens - actualTokensEstimate) / actualTokensEstimate * 100).toFixed(2)}%`);
49
+
50
+ // With our implementation, we should be overestimating
51
+ // Check that the estimated tokens is greater than the actual tokens
52
+ t.true(estimatedTokens >= actualTokensEstimate,
53
+ 'Token count should overestimate to ensure we never exceed token limits');
54
+
55
+ // Since we're specifically testing with highly repetitive test data,
56
+ // allow a higher overestimation percentage (up to 75% for test)
57
+ const overestimationPercentage = (estimatedTokens - actualTokensEstimate) / actualTokensEstimate * 100;
58
+ t.true(overestimationPercentage <= 75,
59
+ `Overestimation should be reasonable (got ${overestimationPercentage.toFixed(2)}%, max allowed 75%)`);
60
+ });
61
+
62
+ // Test safeGetEncodedLength with different content types
63
+ test('safeGetEncodedLength with various content types', async (t) => {
64
+ const plugin = new ModelPlugin(pathway, model);
65
+
66
+ // Test with mixed content that might tokenize differently
67
+ const mixedContent = loadTestData('mixedContent.txt');
68
+
69
+ // Split the mixed content into chunks to test with different segments
70
+ const chunks = mixedContent.split('===').filter(chunk => chunk.trim().length > 0);
71
+
72
+ for (let i = 0; i < chunks.length; i++) {
73
+ const chunk = chunks[i];
74
+ if (chunk.length < 1000) continue; // Skip very short chunks
75
+
76
+ // Get direct token count (actual)
77
+ const directTokenCount = encode(chunk).length;
78
+
79
+ // Get estimated token count
80
+ const estimatedTokenCount = plugin.safeGetEncodedLength(chunk);
81
+
82
+ // Log results for analysis
83
+ console.log(`Content type #${i+1} (${chunk.substring(0, 30)}...):
84
+ - Length: ${chunk.length} characters
85
+ - Direct token count: ${directTokenCount}
86
+ - Estimated token count: ${estimatedTokenCount}
87
+ - Overestimation: ${((estimatedTokenCount - directTokenCount) / directTokenCount * 100).toFixed(2)}%`);
88
+
89
+ // Verify we're overestimating
90
+ t.true(estimatedTokenCount >= directTokenCount,
91
+ `Token count for content type #${i+1} should overestimate`);
92
+ }
93
+ });
94
+
95
+ // Test truncateMessagesToTargetLength function
96
+ test('truncateMessagesToTargetLength preserves recent content', async (t) => {
97
+ const plugin = new Claude3VertexPlugin(pathway, model);
98
+
99
+ // Load test content
100
+ const largeContent = loadTestData('largeContent.txt');
101
+
102
+ // Create a conversation with mixed message sizes
103
+ const messages = [
104
+ { role: 'system', content: 'System message' },
105
+ { role: 'user', content: 'Short first user message' },
106
+ { role: 'assistant', content: 'Short assistant response' },
107
+ { role: 'user', content: largeContent.substring(0, 50000) }, // Large middle message
108
+ { role: 'assistant', content: 'Another short reply' },
109
+ { role: 'user', content: 'Final important question that should be preserved' }
110
+ ];
111
+
112
+ // Set a target token length that forces truncation
113
+ const targetTokenLength = 10000;
114
+
115
+ // Truncate messages
116
+ const truncatedMessages = plugin.truncateMessagesToTargetLength(messages, targetTokenLength);
117
+
118
+ // Verify the system message is preserved
119
+ t.true(truncatedMessages[0].role === 'system', 'System message should be preserved');
120
+
121
+ // Verify the last user message is preserved
122
+ const lastUserMessageIndex = truncatedMessages
123
+ .map(m => m.role)
124
+ .lastIndexOf('user');
125
+
126
+ t.true(lastUserMessageIndex >= 0, 'At least one user message should be preserved');
127
+
128
+ if (lastUserMessageIndex >= 0) {
129
+ const lastUserMessage = truncatedMessages[lastUserMessageIndex];
130
+ t.true(lastUserMessage.content.includes('Final important question'),
131
+ 'The last user message should be preserved');
132
+ }
133
+
134
+ // Verify the total token count is now below the target
135
+ const totalTokens = plugin.safeGetEncodedLength(
136
+ plugin.messagesToChatML(truncatedMessages, false)
137
+ );
138
+
139
+ t.true(totalTokens <= targetTokenLength,
140
+ `Total token count (${totalTokens}) should be less than or equal to target (${targetTokenLength})`);
141
+ });
142
+
143
+ // Test getFirstNToken function
144
+ test('getFirstNToken truncates text accurately', (t) => {
145
+ // Load test content
146
+ const largeContent = loadTestData('largeContent.txt');
147
+
148
+ // Test with different token counts
149
+ const tokenCounts = [100, 500, 1000, 100000];
150
+
151
+ for (const tokenCount of tokenCounts) {
152
+ // Get the first N tokens
153
+ const truncatedText = getFirstNToken(largeContent, tokenCount);
154
+
155
+ // Verify the token count
156
+ const actualTokenCount = encode(truncatedText).length;
157
+
158
+ // Log results
159
+ console.log(`getFirstNToken with limit ${tokenCount}:
160
+ - Truncated text length: ${truncatedText.length} characters
161
+ - Actual token count: ${actualTokenCount}`);
162
+
163
+ // The actual count should be less than or equal to the requested count
164
+ t.true(actualTokenCount <= tokenCount,
165
+ `Truncated text should have at most ${tokenCount} tokens (got ${actualTokenCount})`);
166
+
167
+ // For small token counts like 100, the optimization algorithm in getFirstNToken may result
168
+ // in a larger gap to avoid encoding the whole text. We'll be more lenient with the small token count test.
169
+ const minAcceptableTokens = tokenCount < 200 ? tokenCount - 20 : tokenCount - 5;
170
+ t.true(actualTokenCount >= minAcceptableTokens || actualTokenCount === encode(largeContent).length,
171
+ `Truncated text should have close to ${tokenCount} tokens when possible`);
172
+ }
173
+ });
174
+
175
+ // Comprehensive tests for getFirstNTokenSingle
176
+ test('getFirstNTokenSingle handles various text types and lengths', (t) => {
177
+ // Test cases with different characteristics
178
+ const testCases = [
179
+ {
180
+ name: 'empty text',
181
+ text: '',
182
+ maxTokens: 100,
183
+ expected: ''
184
+ },
185
+ {
186
+ name: 'short text under limit',
187
+ text: 'This is a short text that should not be truncated.',
188
+ maxTokens: 100,
189
+ expected: 'This is a short text that should not be truncated.'
190
+ },
191
+ {
192
+ name: 'text with special characters',
193
+ text: 'Text with special chars: !@#$%^&*()_+{}[]|\\:;"<>,.?/~`',
194
+ maxTokens: 50,
195
+ expected: 'Text with special chars: !@#$%^&*()_+{}[]|\\:;"<>,.?/~`'
196
+ },
197
+ {
198
+ name: 'text with unicode characters',
199
+ text: 'Text with unicode: 你好世界 🌍 🌎 🌏',
200
+ maxTokens: 50,
201
+ expected: 'Text with unicode: 你好世界 🌍 🌎 🌏'
202
+ },
203
+ {
204
+ name: 'text with repeated words',
205
+ text: 'word '.repeat(1000),
206
+ maxTokens: 100,
207
+ },
208
+ {
209
+ name: 'text with long words',
210
+ text: 'supercalifragilisticexpialidocious '.repeat(100),
211
+ maxTokens: 100,
212
+ }
213
+ ];
214
+
215
+ for (const testCase of testCases) {
216
+ const result = getFirstNToken(testCase.text, testCase.maxTokens);
217
+
218
+ // Log test case details
219
+ console.log(`Testing ${testCase.name}:
220
+ - Input length: ${testCase.text.length}
221
+ - Result length: ${result.length}
222
+ - Token count: ${encode(result).length}
223
+ - Max tokens: ${testCase.maxTokens}`);
224
+
225
+ // Basic validation
226
+ t.true(encode(result).length <= testCase.maxTokens,
227
+ `${testCase.name}: Result should not exceed max tokens`);
228
+
229
+ // For non-empty results, verify we don't end with a partial word
230
+ if (result.length > 0) {
231
+ t.false(result.endsWith(' '),
232
+ `${testCase.name}: Result should not end with a space`);
233
+
234
+ // Check that we don't have a partial word at the end
235
+ const lastWord = result.split(' ').pop();
236
+ t.true(testCase.text.includes(lastWord),
237
+ `${testCase.name}: Last word should be complete`);
238
+ }
239
+
240
+ // For expected results, verify exact match
241
+ if (testCase.expected) {
242
+ t.is(result, testCase.expected,
243
+ `${testCase.name}: Result should match expected output`);
244
+ }
245
+ }
246
+ });
247
+
248
+ test('getFirstNTokenSingle handles edge cases and boundary conditions', (t) => {
249
+ // Test with very small token counts
250
+ const smallText = 'This is a test sentence.';
251
+ const smallResult = getFirstNToken(smallText, 1);
252
+ t.true(encode(smallResult).length <= 1,
253
+ 'Should handle very small token counts');
254
+
255
+ // Test with very large token counts
256
+ const largeText = 'word '.repeat(10000);
257
+ const largeResult = getFirstNToken(largeText, 5000);
258
+ t.true(encode(largeResult).length <= 5000,
259
+ 'Should handle very large token counts');
260
+
261
+ // Test with zero tokens
262
+ const zeroResult = getFirstNToken('any text', 0);
263
+ t.is(zeroResult, '',
264
+ 'Should return empty string for zero tokens');
265
+
266
+ // Test with negative tokens
267
+ const negativeResult = getFirstNToken('any text', -1);
268
+ t.is(negativeResult, '',
269
+ 'Should return empty string for negative tokens');
270
+
271
+ // Test with null/undefined text
272
+ const nullResult = getFirstNToken(null, 100);
273
+ t.is(nullResult, '',
274
+ 'Should handle null text');
275
+
276
+ const undefinedResult = getFirstNToken(undefined, 100);
277
+ t.is(undefinedResult, '',
278
+ 'Should handle undefined text');
279
+
280
+ // Test with text containing only spaces
281
+ const spacesResult = getFirstNToken(' ', 100);
282
+ t.is(spacesResult.trim(), '',
283
+ 'Should handle text with only spaces');
284
+
285
+ // Test with text containing only newlines
286
+ const newlinesResult = getFirstNToken('\n\n\n', 100);
287
+ t.is(newlinesResult.trim(), '',
288
+ 'Should handle text with only newlines');
289
+ });
290
+
291
+ test('getFirstNTokenSingle maintains text quality and readability', (t) => {
292
+ // Test with text containing paragraphs
293
+ const paragraphText = `First paragraph with some content.
294
+ Second paragraph with different content.
295
+ Third paragraph with more content.`;
296
+
297
+ const paragraphResult = getFirstNToken(paragraphText, 50);
298
+
299
+ // Verify we don't cut in the middle of a paragraph
300
+ const paragraphs = paragraphResult.split('\n\n');
301
+ for (const para of paragraphs) {
302
+ t.true(paragraphText.includes(para),
303
+ 'Should preserve complete paragraphs when possible');
304
+ }
305
+
306
+ // Test with text containing lists
307
+ const listText = `1. First item
308
+ 2. Second item
309
+ 3. Third item`;
310
+
311
+ const listResult = getFirstNToken(listText, 30);
312
+
313
+ // Verify we don't cut in the middle of a list item
314
+ const items = listResult.split('\n');
315
+ for (const item of items) {
316
+ if (item.trim()) {
317
+ t.true(listText.includes(item),
318
+ 'Should preserve complete list items when possible');
319
+ }
320
+ }
321
+
322
+ // Test with text containing code blocks
323
+ const codeText = `function test() {
324
+ console.log("Hello");
325
+ return true;
326
+ }`;
327
+
328
+ const codeResult = getFirstNToken(codeText, 40);
329
+
330
+ // Verify we don't cut in the middle of code blocks
331
+ t.true(codeText.includes(codeResult),
332
+ 'Should preserve complete code blocks when possible');
333
+ });
334
+
335
+ // Test getFirstNTokenSingle with large content file
336
+ test('getFirstNTokenSingle handles large content file', (t) => {
337
+ // Load large content
338
+ const largeContent = loadTestData('largeContent.txt');
339
+
340
+ // Test with different token counts
341
+ const tokenCounts = [100, 500, 1000, 2000];
342
+
343
+ for (const tokenCount of tokenCounts) {
344
+ const result = getFirstNToken(largeContent, tokenCount);
345
+ const actualTokenCount = encode(result).length;
346
+
347
+ // Log test details
348
+ console.log(`Testing large content with ${tokenCount} tokens:
349
+ - Input length: ${largeContent.length} characters
350
+ - Result length: ${result.length} characters
351
+ - Actual token count: ${actualTokenCount}
352
+ - Token ratio: ${(actualTokenCount / result.length).toFixed(3)} tokens/char`);
353
+
354
+ // Verify token count is within limits
355
+ t.true(actualTokenCount <= tokenCount,
356
+ `Result should not exceed ${tokenCount} tokens (got ${actualTokenCount})`);
357
+
358
+ // Verify we're getting a reasonable amount of content
359
+ const minAcceptableTokens = tokenCount * 0.8;
360
+ t.true(actualTokenCount >= minAcceptableTokens || actualTokenCount === encode(largeContent).length,
361
+ `Should get close to ${tokenCount} tokens when possible (got ${actualTokenCount})`);
362
+
363
+ // Verify we don't end with a partial word
364
+ if (result.length > 0) {
365
+ t.false(result.endsWith(' '),
366
+ 'Result should not end with a space');
367
+
368
+ // Check that the last word is complete
369
+ const lastWord = result.split(' ').pop();
370
+ t.true(largeContent.includes(lastWord),
371
+ 'Last word should be complete');
372
+ }
373
+ }
374
+ });
375
+
376
+ // Test handling of very large messages that exceed token limits
377
+ test('handles messages exceeding token limit', async (t) => {
378
+ const plugin = new Claude3VertexPlugin(pathway, model);
379
+
380
+ // Set manageTokenLength directly on the plugin's promptParameters
381
+ plugin.promptParameters.manageTokenLength = true;
382
+
383
+ // Load large content
384
+ const largeContent = loadTestData('largeContent.txt');
385
+
386
+ // Create a simple text prompt for testing
387
+ class TestPrompt {
388
+ constructor(props) {
389
+ this.messages = props.messages;
390
+ }
391
+ }
392
+
393
+ // Create a simplified test prompt
394
+ const customPrompt = new TestPrompt({
395
+ messages: [
396
+ { role: 'system', content: 'System message' },
397
+ { role: 'user', content: largeContent },
398
+ { role: 'assistant', content: 'Assistant response' },
399
+ { role: 'user', content: 'Final question?' }
400
+ ]
401
+ });
402
+
403
+ // Set up the parameters
404
+ const parameters = {
405
+ stream: false
406
+ };
407
+
408
+ // Call getRequestParameters with our custom prompt
409
+ const requestParameters = await plugin.getRequestParameters(
410
+ 'Help me',
411
+ parameters,
412
+ customPrompt
413
+ );
414
+
415
+ // Verify we got a result
416
+ t.truthy(requestParameters);
417
+ t.truthy(requestParameters.messages);
418
+
419
+ // Verify system message is preserved
420
+ t.is(requestParameters.system, 'System message');
421
+
422
+ // Get total content length after truncation
423
+ const totalContentLength = requestParameters.messages.reduce((total, msg) => {
424
+ if (Array.isArray(msg.content)) {
425
+ return total + msg.content.reduce((sum, item) => {
426
+ return sum + (item.text ? item.text.length : 0);
427
+ }, 0);
428
+ }
429
+ return total;
430
+ }, 0);
431
+
432
+ // Should be truncated (less than original)
433
+ t.true(totalContentLength < largeContent.length, 'Message content should be truncated');
434
+
435
+ // Final user message should be preserved
436
+ const lastUserMessage = requestParameters.messages[requestParameters.messages.length - 1];
437
+ t.is(lastUserMessage.role, 'user');
438
+ t.true(Array.isArray(lastUserMessage.content) &&
439
+ lastUserMessage.content.some(c => c.type === 'text' && c.text && c.text.includes('Final question')),
440
+ 'Final user message should be preserved');
441
+
442
+ // Verify total token count is within model's limit
443
+ const totalTokens = plugin.safeGetEncodedLength(
444
+ plugin.messagesToChatML(requestParameters.messages, false)
445
+ );
446
+ const maxTokens = plugin.getModelMaxPromptTokens();
447
+ t.true(totalTokens <= maxTokens,
448
+ `Total token count (${totalTokens}) should be within model limit (${maxTokens})`);
449
+ });
450
+
451
+ // Test truncateMessagesToTargetLength with very long content
452
+ test('truncateMessagesToTargetLength handles very long content', async (t) => {
453
+ const plugin = new Claude3VertexPlugin(pathway, model);
454
+
455
+ // Load large content
456
+ const largeContent = loadTestData('largeContent.txt');
457
+
458
+ // Create a conversation with very long content
459
+ const messages = [
460
+ { role: 'system', content: 'System message' },
461
+ { role: 'user', content: largeContent }, // Very long message
462
+ { role: 'assistant', content: 'Short response' },
463
+ { role: 'user', content: largeContent }, // Another very long message
464
+ { role: 'assistant', content: 'Another short response' },
465
+ { role: 'user', content: 'Final important question that should be preserved' }
466
+ ];
467
+
468
+ // Get the model's max token length
469
+ const maxTokens = plugin.getModelMaxPromptTokens();
470
+
471
+ // Truncate messages
472
+ const truncatedMessages = plugin.truncateMessagesToTargetLength(messages, maxTokens);
473
+
474
+ // Log initial and final message counts
475
+ console.log(`Truncation test:
476
+ - Initial message count: ${messages.length}
477
+ - Final message count: ${truncatedMessages.length}
478
+ - Target token limit: ${maxTokens}`);
479
+
480
+ // Verify we got a result
481
+ t.truthy(truncatedMessages, 'Should return truncated messages');
482
+ t.true(truncatedMessages.length > 0, 'Should have at least one message');
483
+
484
+ // Verify the last user message is preserved
485
+ const lastUserMessage = truncatedMessages[truncatedMessages.length - 1];
486
+ t.is(lastUserMessage.role, 'user');
487
+ t.true(lastUserMessage.content.includes('Final important question'),
488
+ 'Final user message should be preserved');
489
+
490
+ // Verify total token count is within limit
491
+ const totalTokens = plugin.countMessagesTokens(truncatedMessages);
492
+ t.true(totalTokens <= maxTokens,
493
+ `Total token count (${totalTokens}) should be within model limit (${maxTokens})`);
494
+
495
+ // Verify we're getting a reasonable amount of content
496
+ const minAcceptableTokens = maxTokens * 0.8; // Should use at least 80% of available tokens
497
+ t.true(totalTokens >= minAcceptableTokens,
498
+ `Should use a reasonable amount of available tokens (got ${totalTokens}, expected at least ${minAcceptableTokens})`);
499
+
500
+ // Verify message order is preserved
501
+ const originalOrder = messages.map(m => m.role);
502
+ const truncatedOrder = truncatedMessages.map(m => m.role);
503
+ t.deepEqual(truncatedOrder, originalOrder.slice(0, truncatedOrder.length),
504
+ 'Message order should be preserved');
505
+
506
+ // Verify no messages are empty
507
+ for (const msg of truncatedMessages) {
508
+ t.true(msg.content && msg.content.length > 0,
509
+ 'No messages should be empty');
510
+ }
511
+
512
+ // Log detailed message sizes
513
+ console.log('Message sizes after truncation:');
514
+ truncatedMessages.forEach((msg, i) => {
515
+ const msgTokens = plugin.safeGetEncodedLength(
516
+ plugin.messagesToChatML([msg], false)
517
+ );
518
+ console.log(` ${msg.role} message ${i + 1}: ${msgTokens} tokens`);
519
+ });
520
+ });
521
+
522
+ // Test truncateMessagesToTargetLength with very long content for per message token length
523
+ test('truncateMessagesToTargetLength handles very long content for per message token length', async (t) => {
524
+ const plugin = new Claude3VertexPlugin(pathway, model);
525
+
526
+ // Load large content
527
+ const largeContent = loadTestData('largeContent.txt');
528
+
529
+ // Create a conversation with very long content
530
+ const messages = [
531
+ { role: 'system', content: 'System message' },
532
+ { role: 'user', content: largeContent }, // Very long message as a single string
533
+ { role: 'assistant', content: 'Short response' },
534
+ { role: 'user', content: [{type: 'text', text: largeContent}, {type: 'text', text: largeContent}] }, // Another very long message as an array of text objects
535
+ { role: 'assistant', content: 'Another short response' },
536
+ { role: 'user', content: [largeContent, largeContent] }, // Another very long message as an array of strings
537
+ { role: 'assistant', content: 'A third short response' },
538
+ { role: 'user', content: 'Final important question that should be preserved' }
539
+ ];
540
+
541
+ // Get the model's max token length
542
+ const maxTokens = plugin.getModelMaxPromptTokens();
543
+
544
+ // Truncate messages
545
+ const truncatedMessages = plugin.truncateMessagesToTargetLength(messages, null, 1000);
546
+
547
+ // Log initial and final message counts
548
+ console.log(`Truncation test:
549
+ - Initial message count: ${messages.length}
550
+ - Final message count: ${truncatedMessages.length}
551
+ - Target token limit: 1000`);
552
+
553
+ // Verify we got a result
554
+ t.truthy(truncatedMessages, 'Should return truncated messages');
555
+ t.true(truncatedMessages.length > 0, 'Should have at least one message');
556
+
557
+ // Verify the last user message is preserved
558
+ const lastUserMessage = truncatedMessages[truncatedMessages.length - 1];
559
+ t.is(lastUserMessage.role, 'user');
560
+ t.true(lastUserMessage.content.includes('Final important question'),
561
+ 'Final user message should be preserved');
562
+
563
+ // Verify total token count is within limit
564
+ const totalTokens = plugin.countMessagesTokens(truncatedMessages);
565
+ t.true(totalTokens <= maxTokens,
566
+ `Total token count (${totalTokens}) should be within model limit (${maxTokens})`);
567
+
568
+ // Verify message order is preserved
569
+ const originalOrder = messages.map(m => m.role);
570
+ const truncatedOrder = truncatedMessages.map(m => m.role);
571
+ t.deepEqual(truncatedOrder, originalOrder.slice(0, truncatedOrder.length),
572
+ 'Message order should be preserved');
573
+
574
+ // Verify no messages are empty
575
+ for (const msg of truncatedMessages) {
576
+ t.true(msg.content && msg.content.length > 0,
577
+ 'No messages should be empty');
578
+ }
579
+
580
+ // Log detailed message sizes
581
+ console.log('Message sizes after truncation:');
582
+ truncatedMessages.forEach((msg, i) => {
583
+ const msgTokens = plugin.countMessagesTokens([msg]);
584
+ t.true(msgTokens <= 1010, `Message ${i + 1} tokens (${msgTokens}) should be near target limit (1000)`);
585
+ console.log(` ${msg.role} message ${i + 1}: ${msgTokens} tokens`);
586
+ });
587
+ });