@aj-archipelago/cortex 1.3.22 → 1.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -271,12 +271,18 @@ test('Pathological cases', async (t) => {
271
271
 
272
272
  t.is(geminiSystem15.parts[0].text, 'You are a helpful assistant.');
273
273
  t.is(geminiSystem15.parts[1].text, 'You are also very knowledgeable.');
274
+
274
275
  t.is(geminiMessages15.length, 3);
276
+ // First user message combines "Hello" and "Another greeting"
275
277
  t.is(geminiMessages15[0].role, 'user');
276
278
  t.is(geminiMessages15[0].parts[0].text, 'Hello');
277
279
  t.is(geminiMessages15[0].parts[1].text, 'Another greeting');
280
+
281
+ // Assistant message "Hi there!"
278
282
  t.is(geminiMessages15[1].role, 'assistant');
279
283
  t.is(geminiMessages15[1].parts[0].text, 'Hi there!');
284
+
285
+ // Final user message combines "How are you?", image content, and "Another question"
280
286
  t.is(geminiMessages15[2].role, 'user');
281
287
  t.is(geminiMessages15[2].parts[0].text, 'How are you?');
282
288
  t.is(geminiMessages15[2].parts[1].text, 'What\'s this?');
@@ -310,6 +316,79 @@ test('Empty message array', async (t) => {
310
316
  t.is(geminiMessages15.length, 0);
311
317
  });
312
318
 
319
+ // Test simple string array content
320
+ test('Simple string array content', async (t) => {
321
+ const { gemini15 } = createPlugins();
322
+
323
+ const messages = [
324
+ { role: 'user', content: "Initial message" },
325
+ { role: 'assistant', content: [
326
+ "\"Searchin' for my lost shaker of salt...\"\n",
327
+ ]},
328
+ { role: 'user', content: [
329
+ "Here's another simple string in an array",
330
+ ]}
331
+ ];
332
+
333
+ const { modifiedMessages } = gemini15.convertMessagesToGemini(messages);
334
+
335
+ t.is(modifiedMessages.length, 3);
336
+ t.is(modifiedMessages[0].role, 'user');
337
+ t.is(modifiedMessages[0].parts.length, 1);
338
+ t.is(modifiedMessages[0].parts[0].text, "Initial message");
339
+ t.is(modifiedMessages[1].role, 'assistant');
340
+ t.is(modifiedMessages[1].parts.length, 1);
341
+ t.is(modifiedMessages[1].parts[0].text, "\"Searchin' for my lost shaker of salt...\"\n");
342
+ t.is(modifiedMessages[2].role, 'user');
343
+ t.is(modifiedMessages[2].parts.length, 1);
344
+ t.is(modifiedMessages[2].parts[0].text, "Here's another simple string in an array");
345
+ });
346
+
347
+ // Test string-encoded multimodal content
348
+ test('String-encoded multimodal content', async (t) => {
349
+ const { gemini15 } = createPlugins();
350
+
351
+ const messages = [
352
+ { role: 'user', content: [
353
+ JSON.stringify({
354
+ type: 'text',
355
+ text: 'What is in this image?'
356
+ }),
357
+ JSON.stringify({
358
+ type: 'image_url',
359
+ image_url: { url: 'gs://my-bucket/image.jpg' }
360
+ })
361
+ ]},
362
+ { role: 'assistant', content: [
363
+ JSON.stringify({
364
+ type: 'text',
365
+ text: 'I see a cat.'
366
+ })
367
+ ]},
368
+ { role: 'user', content: [
369
+ JSON.stringify({
370
+ type: 'text',
371
+ text: 'Is it a big cat?'
372
+ })
373
+ ]}
374
+ ];
375
+
376
+ const { modifiedMessages } = gemini15.convertMessagesToGemini(messages);
377
+
378
+ t.is(modifiedMessages.length, 3);
379
+ t.is(modifiedMessages[0].role, 'user');
380
+ t.is(modifiedMessages[0].parts.length, 2);
381
+ t.is(modifiedMessages[0].parts[0].text, 'What is in this image?');
382
+ t.true('fileData' in modifiedMessages[0].parts[1]);
383
+ t.is(modifiedMessages[0].parts[1].fileData.fileUri, 'gs://my-bucket/image.jpg');
384
+ t.is(modifiedMessages[1].role, 'assistant');
385
+ t.is(modifiedMessages[1].parts.length, 1);
386
+ t.is(modifiedMessages[1].parts[0].text, 'I see a cat.');
387
+ t.is(modifiedMessages[2].role, 'user');
388
+ t.is(modifiedMessages[2].parts.length, 1);
389
+ t.is(modifiedMessages[2].parts[0].text, 'Is it a big cat?');
390
+ });
391
+
313
392
  // Test messages with only system messages
314
393
  test('Only system messages', async (t) => {
315
394
  const { openai, claude, gemini, gemini15 } = createPlugins();
@@ -417,3 +496,93 @@ test('Gemini 1.5 image URL edge cases', t => {
417
496
  // Verify we only have one part (the text)
418
497
  t.is(modifiedMessages[0].parts.length, 1, 'Should only have the text part');
419
498
  });
499
+
500
+ // Test multiple images in single message for Claude
501
+ test('Multiple images in single Claude message', async (t) => {
502
+ const { claude } = createPlugins();
503
+
504
+ const multiImageMessage = [
505
+ { role: 'user', content: [
506
+ { type: 'text', text: 'Compare these images:' },
507
+ { type: 'image_url', image_url: { url: sampleBase64Image } },
508
+ { type: 'text', text: 'with this one:' },
509
+ { type: 'image_url', image_url: { url: sampleBase64Image } },
510
+ { type: 'image_url', gcs: 'gs://cortex-bucket/image.jpg' }
511
+ ]}
512
+ ];
513
+
514
+ const { modifiedMessages } = await claude.convertMessagesToClaudeVertex(multiImageMessage);
515
+
516
+ t.is(modifiedMessages.length, 1);
517
+ t.is(modifiedMessages[0].role, 'user');
518
+ t.is(modifiedMessages[0].content.length, 4);
519
+ t.is(modifiedMessages[0].content[0].text, 'Compare these images:');
520
+ t.true(modifiedMessages[0].content[1].source.type === 'base64');
521
+ t.is(modifiedMessages[0].content[2].text, 'with this one:');
522
+ t.true(modifiedMessages[0].content[3].source.type === 'base64');
523
+ });
524
+
525
+ // Test conversation history with mixed image types
526
+ test('Conversation history with mixed image types', async (t) => {
527
+ const { claude, gemini15 } = createPlugins();
528
+
529
+ const conversationHistory = [
530
+ { role: 'system', content: 'You are a visual analysis assistant.' },
531
+ { role: 'user', content: [
532
+ { type: 'text', text: 'What\'s in this image?' },
533
+ { type: 'image_url', image_url: { url: sampleBase64Image } }
534
+ ]},
535
+ { role: 'assistant', content: 'I see a landscape.' },
536
+ { role: 'user', content: [
537
+ { type: 'text', text: 'Compare it with this:' },
538
+ { type: 'image_url', gcs: 'gs://cortex-bucket/image2.jpg' }
539
+ ]},
540
+ { role: 'assistant', content: 'The second image shows a different scene.' },
541
+ { role: 'user', content: 'Which one do you prefer?' }
542
+ ];
543
+
544
+ // Test Claude conversion
545
+ const { system: claudeSystem, modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(conversationHistory);
546
+
547
+ t.is(claudeSystem, 'You are a visual analysis assistant.');
548
+ t.is(claudeMessages.length, 5);
549
+ t.is(claudeMessages[1].content[0].text, 'I see a landscape.');
550
+ t.is(claudeMessages[3].content[0].text, 'The second image shows a different scene.');
551
+ t.is(claudeMessages[4].content[0].text, 'Which one do you prefer?');
552
+
553
+ // Test Gemini 1.5 conversion
554
+ const { system: geminiSystem15, modifiedMessages: geminiMessages15 } = gemini15.convertMessagesToGemini(conversationHistory);
555
+
556
+ t.is(geminiSystem15.parts[0].text, 'You are a visual analysis assistant.');
557
+ t.is(geminiMessages15.length, 5);
558
+ t.true('inlineData' in geminiMessages15[0].parts[1]);
559
+ t.is(geminiMessages15[1].parts[0].text, 'I see a landscape.');
560
+ t.true('fileData' in geminiMessages15[2].parts[1]);
561
+ t.is(geminiMessages15[2].parts[1].fileData.fileUri, 'gs://cortex-bucket/image2.jpg');
562
+ t.is(geminiMessages15[3].parts[0].text, 'The second image shows a different scene.');
563
+ t.is(geminiMessages15[4].parts[0].text, 'Which one do you prefer?');
564
+ });
565
+
566
+ // Test handling of large images
567
+ test('Large image handling', async (t) => {
568
+ const { claude, gemini15 } = createPlugins();
569
+
570
+ // Create a large base64 string (>10MB)
571
+ const largeSampleImage = 'data:image/jpeg;base64,' + 'A'.repeat(10 * 1024 * 1024);
572
+
573
+ const largeImageMessage = [
574
+ { role: 'user', content: [
575
+ { type: 'text', text: 'Check this large image:' },
576
+ { type: 'image_url', image_url: { url: largeSampleImage } }
577
+ ]}
578
+ ];
579
+
580
+ // Both Claude and Gemini should handle or reject oversized images gracefully
581
+ const { modifiedMessages: claudeMessages } = await claude.convertMessagesToClaudeVertex(largeImageMessage);
582
+ const { modifiedMessages: geminiMessages } = gemini15.convertMessagesToGemini(largeImageMessage);
583
+
584
+ // Verify both models handle the oversized image appropriately
585
+ // (The exact behavior - rejection vs. compression - should match the model's specifications)
586
+ t.is(claudeMessages[0].content[0].text, 'Check this large image:');
587
+ t.is(geminiMessages[0].parts[0].text, 'Check this large image:');
588
+ });
@@ -0,0 +1,217 @@
1
+ import test from 'ava';
2
+ import { convertSrtToVtt } from '../pathways/transcribe_gemini.js';
3
+
4
+ test('should return empty WebVTT for null or empty input', t => {
5
+ t.is(convertSrtToVtt(null), "WEBVTT\n\n");
6
+ t.is(convertSrtToVtt(''), "WEBVTT\n\n");
7
+ t.is(convertSrtToVtt(' '), "WEBVTT\n\n");
8
+ });
9
+
10
+ test('should convert basic SRT to WebVTT format', t => {
11
+ const srtInput =
12
+ `1
13
+ 00:00:01,000 --> 00:00:04,000
14
+ Hello world`;
15
+
16
+ const expectedOutput =
17
+ `WEBVTT
18
+
19
+ 1
20
+ 00:00:01.000 --> 00:00:04.000
21
+ Hello world
22
+
23
+ `;
24
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
25
+ });
26
+
27
+ test('should convert multiple subtitle entries', t => {
28
+ const srtInput =
29
+ `1
30
+ 00:00:01,000 --> 00:00:04,000
31
+ First subtitle
32
+
33
+ 2
34
+ 00:00:05,000 --> 00:00:08,000
35
+ Second subtitle`;
36
+
37
+ const expectedOutput =
38
+ `WEBVTT
39
+
40
+ 1
41
+ 00:00:01.000 --> 00:00:04.000
42
+ First subtitle
43
+
44
+ 2
45
+ 00:00:05.000 --> 00:00:08.000
46
+ Second subtitle
47
+
48
+ `;
49
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
50
+ });
51
+
52
+ test('should handle DOS line endings', t => {
53
+ const srtInput = "1\r\n00:00:01,000 --> 00:00:04,000\r\nHello world\r\n";
54
+ const expectedOutput = "WEBVTT\n\n1\n00:00:01.000 --> 00:00:04.000\nHello world\n\n";
55
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
56
+ });
57
+
58
+ test('should handle multi-line subtitles', t => {
59
+ const srtInput =
60
+ `1
61
+ 00:00:01,000 --> 00:00:04,000
62
+ First line
63
+ Second line
64
+ Third line
65
+
66
+ 2
67
+ 00:00:05,000 --> 00:00:08,000
68
+ Another subtitle`;
69
+
70
+ const expectedOutput =
71
+ `WEBVTT
72
+
73
+ 1
74
+ 00:00:01.000 --> 00:00:04.000
75
+ First line
76
+ Second line
77
+ Third line
78
+
79
+ 2
80
+ 00:00:05.000 --> 00:00:08.000
81
+ Another subtitle
82
+
83
+ `;
84
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
85
+ });
86
+
87
+ test('should handle invalid timestamp formats', t => {
88
+ const srtInput =
89
+ `1
90
+ invalid timestamp
91
+ Hello world
92
+
93
+ 2
94
+ 00:00:05,000 --> 00:00:08,000
95
+ Valid subtitle`;
96
+
97
+ const expectedOutput =
98
+ `WEBVTT
99
+
100
+ 2
101
+ 00:00:05.000 --> 00:00:08.000
102
+ Valid subtitle
103
+
104
+ `;
105
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
106
+ });
107
+
108
+ test('should convert comma to dot in timestamps', t => {
109
+ const srtInput =
110
+ `1
111
+ 00:00:01,500 --> 00:00:04,750
112
+ Test subtitle`;
113
+
114
+ const expectedOutput =
115
+ `WEBVTT
116
+
117
+ 1
118
+ 00:00:01.500 --> 00:00:04.750
119
+ Test subtitle
120
+
121
+ `;
122
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
123
+ });
124
+
125
+ test('should handle extra whitespace in input', t => {
126
+ const srtInput = `
127
+
128
+ 1
129
+ 00:00:01,000 --> 00:00:04,000
130
+ Hello world
131
+
132
+ `;
133
+ const expectedOutput =
134
+ `WEBVTT
135
+
136
+ 1
137
+ 00:00:01.000 --> 00:00:04.000
138
+ Hello world
139
+
140
+ `;
141
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
142
+ });
143
+
144
+ test('should handle timestamps with only minutes and seconds', t => {
145
+ const srtInput =
146
+ `1
147
+ 01:30,000 --> 02:45,500
148
+ Short timestamp format`;
149
+
150
+ const expectedOutput =
151
+ `WEBVTT
152
+
153
+ 1
154
+ 00:01:30.000 --> 00:02:45.500
155
+ Short timestamp format
156
+
157
+ `;
158
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
159
+ });
160
+
161
+ test('should handle ultra-short timestamps (SS.mmm)', t => {
162
+ const srtInput =
163
+ `1
164
+ 03.298 --> 04.578
165
+ First line
166
+
167
+ 2
168
+ 04.578 --> 06.178
169
+ Second line`;
170
+
171
+ const expectedOutput =
172
+ `WEBVTT
173
+
174
+ 1
175
+ 00:00:03.298 --> 00:00:04.578
176
+ First line
177
+
178
+ 2
179
+ 00:00:04.578 --> 00:00:06.178
180
+ Second line
181
+
182
+ `;
183
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
184
+ });
185
+
186
+ test('should handle mixed timestamp formats', t => {
187
+ const srtInput =
188
+ `1
189
+ 03.298 --> 04.578
190
+ First line
191
+
192
+ 2
193
+ 00:04.578 --> 00:06.178
194
+ Second line
195
+
196
+ 3
197
+ 00:00:06.178 --> 00:00:07.518
198
+ Third line`;
199
+
200
+ const expectedOutput =
201
+ `WEBVTT
202
+
203
+ 1
204
+ 00:00:03.298 --> 00:00:04.578
205
+ First line
206
+
207
+ 2
208
+ 00:00:04.578 --> 00:00:06.178
209
+ Second line
210
+
211
+ 3
212
+ 00:00:06.178 --> 00:00:07.518
213
+ Third line
214
+
215
+ `;
216
+ t.is(convertSrtToVtt(srtInput), expectedOutput);
217
+ });