@aj-archipelago/cortex 1.3.17 → 1.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -494,25 +494,37 @@ async function deleteGCS(blobName) {
494
494
  if (!gcs) throw new Error("Google Cloud Storage is not initialized");
495
495
 
496
496
  try {
497
+ const bucket = gcs.bucket(GCS_BUCKETNAME);
498
+ const deletedFiles = [];
499
+
497
500
  if (process.env.STORAGE_EMULATOR_HOST) {
498
501
  // For fake GCS server, use HTTP API directly
499
- const response = await axios.delete(
500
- `http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(blobName)}`,
501
- { validateStatus: status => status === 200 || status === 404 }
502
+ const response = await axios.get(
503
+ `http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o`,
504
+ { params: { prefix: blobName } }
502
505
  );
503
- if (response.status === 200) {
504
- console.log(`Cleaned GCS file: ${blobName}`);
505
- return [blobName];
506
+ if (response.data.items) {
507
+ for (const item of response.data.items) {
508
+ await axios.delete(
509
+ `http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(item.name)}`,
510
+ { validateStatus: status => status === 200 || status === 404 }
511
+ );
512
+ deletedFiles.push(item.name);
513
+ }
506
514
  }
507
- return [];
508
515
  } else {
509
516
  // For real GCS, use the SDK
510
- const bucket = gcs.bucket(GCS_BUCKETNAME);
511
- const file = bucket.file(blobName);
512
- await file.delete();
513
- console.log(`Cleaned GCS file: ${blobName}`);
514
- return [blobName];
517
+ const [files] = await bucket.getFiles({ prefix: blobName });
518
+ for (const file of files) {
519
+ await file.delete();
520
+ deletedFiles.push(file.name);
521
+ }
522
+ }
523
+
524
+ if (deletedFiles.length > 0) {
525
+ console.log(`Cleaned GCS files: ${deletedFiles.join(', ')}`);
515
526
  }
527
+ return deletedFiles;
516
528
  } catch (error) {
517
529
  if (error.code !== 404) {
518
530
  console.error(`Error in deleteGCS: ${error}`);
@@ -541,4 +553,15 @@ async function ensureGCSUpload(context, existingFile) {
541
553
  return existingFile;
542
554
  }
543
555
 
544
- export { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs };
556
+ // Helper function to upload a chunk to GCS
557
+ async function uploadChunkToGCS(chunkPath, requestId) {
558
+ if (!gcs) return null;
559
+
560
+ const gcsFileName = `${requestId}/${path.basename(chunkPath)}`;
561
+ await gcs.bucket(GCS_BUCKETNAME).upload(chunkPath, {
562
+ destination: gcsFileName
563
+ });
564
+ return `gs://${GCS_BUCKETNAME}/${gcsFileName}`;
565
+ }
566
+
567
+ export { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, uploadChunkToGCS };
@@ -1,5 +1,5 @@
1
1
  import { downloadFile, splitMediaFile } from './fileChunker.js';
2
- import { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, AZURE_STORAGE_CONTAINER_NAME } from './blobHandler.js';
2
+ import { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, AZURE_STORAGE_CONTAINER_NAME, uploadChunkToGCS } from './blobHandler.js';
3
3
  import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
4
4
  import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
5
5
  import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
@@ -104,9 +104,7 @@ async function CortexFileHandler(context, req) {
104
104
  const azureResult = useAzure ? await deleteBlob(deleteRequestId) : await deleteFolder(deleteRequestId);
105
105
  const gcsResult = [];
106
106
  if (gcs) {
107
- for (const blobName of azureResult) {
108
- gcsResult.push(...await deleteGCS(blobName));
109
- }
107
+ gcsResult.push(...await deleteGCS(deleteRequestId));
110
108
  }
111
109
 
112
110
  context.res = {
@@ -393,10 +391,21 @@ async function CortexFileHandler(context, req) {
393
391
  // sequential processing of chunks
394
392
  for (let index = 0; index < chunks.length; index++) {
395
393
  const chunkPath = chunks[index];
396
- const blobName = useAzure ? await saveFileToBlob(chunkPath, requestId) : await moveFileToPublicFolder(chunkPath, requestId);
394
+ let blobName;
395
+ let gcsUrl;
396
+
397
+ if (useAzure) {
398
+ blobName = await saveFileToBlob(chunkPath, requestId);
399
+ } else {
400
+ blobName = await moveFileToPublicFolder(chunkPath, requestId);
401
+ }
402
+
403
+ // If GCS is configured, save to GCS
404
+ gcsUrl = await uploadChunkToGCS(chunkPath, requestId);
405
+
397
406
  const chunkOffset = chunkOffsets[index];
398
- result.push({ uri: blobName, offset: chunkOffset });
399
- console.log(`Saved chunk as: ${blobName}`);
407
+ result.push({ uri: blobName, offset: chunkOffset, gcs: gcsUrl });
408
+ console.log(`Saved chunk as: ${blobName}${gcsUrl ? ` and ${gcsUrl}` : ''}`);
400
409
  await sendProgress();
401
410
  }
402
411
 
@@ -5,6 +5,10 @@ import axios from 'axios';
5
5
  import FormData from 'form-data';
6
6
  import { port, publicFolder, ipAddress } from '../start.js';
7
7
  import { v4 as uuidv4 } from 'uuid';
8
+ import path from 'path';
9
+ import os from 'os';
10
+ import fs from 'fs';
11
+ import { execSync } from 'child_process';
8
12
 
9
13
  // Add these helper functions at the top after imports
10
14
  const baseUrl = `http://localhost:${port}/api/CortexFileHandler`;
@@ -597,8 +601,7 @@ test.serial('should handle hash reuse with Azure storage', async t => {
597
601
  const originalUrl = upload1.data.url;
598
602
 
599
603
  // Check hash exists and returns the correct URL
600
- const hashCheck1 = await axios.get(baseUrl, {
601
- params: { hash: testHash, checkHash: true },
604
+ const hashCheck1 = await axios.get(baseUrl, { hash: testHash, checkHash: true }, {
602
605
  validateStatus: status => true
603
606
  });
604
607
  t.is(hashCheck1.status, 200, 'Hash should exist after first upload');
@@ -634,8 +637,7 @@ test.serial('should handle hash reuse with Azure storage', async t => {
634
637
  await cleanupUploadedFile(t, originalUrl);
635
638
 
636
639
  // Verify hash is now gone
637
- const hashCheckAfterDelete = await axios.get(baseUrl, {
638
- params: { hash: testHash, checkHash: true },
640
+ const hashCheckAfterDelete = await axios.get(baseUrl, { hash: testHash, checkHash: true }, {
639
641
  validateStatus: status => true
640
642
  });
641
643
  t.is(hashCheckAfterDelete.status, 404, 'Hash should be gone after file deletion');
@@ -762,6 +764,144 @@ test.serial('should handle GCS URL format and accessibility', async t => {
762
764
  await cleanupUploadedFile(t, uploadResponse.data.url);
763
765
  });
764
766
 
767
+ // Add this helper function after other helper functions
768
+ async function createAndUploadTestFile() {
769
+ // Create a temporary file path
770
+ const tempDir = path.join(os.tmpdir(), uuidv4());
771
+ fs.mkdirSync(tempDir, { recursive: true });
772
+ const tempFile = path.join(tempDir, 'test.mp3');
773
+
774
+ // Generate a real MP3 file using ffmpeg
775
+ try {
776
+ execSync(`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t 10 -q:a 9 -acodec libmp3lame "${tempFile}"`, {
777
+ stdio: ['ignore', 'pipe', 'pipe']
778
+ });
779
+
780
+ // Upload the real media file
781
+ const form = new FormData();
782
+ form.append('file', fs.createReadStream(tempFile));
783
+
784
+ const uploadResponse = await axios.post(baseUrl, form, {
785
+ headers: form.getHeaders(),
786
+ validateStatus: status => true,
787
+ timeout: 5000
788
+ });
789
+
790
+ // Wait a short time to ensure file is available
791
+ await new Promise(resolve => setTimeout(resolve, 1000));
792
+
793
+ // Clean up temp file
794
+ fs.rmSync(tempDir, { recursive: true, force: true });
795
+
796
+ return uploadResponse.data.url;
797
+ } catch (error) {
798
+ console.error('Error creating test file:', error);
799
+ throw error;
800
+ }
801
+ }
802
+
803
+ test.serial('should handle chunking with GCS integration when configured', async t => {
804
+ if (!isGCSConfigured()) {
805
+ t.pass('Skipping test - GCS not configured');
806
+ return;
807
+ }
808
+
809
+ // Create a large test file first
810
+ const testFileUrl = await createAndUploadTestFile();
811
+ const requestId = uuidv4();
812
+
813
+ // Request chunking via GET
814
+ const chunkResponse = await axios.get(baseUrl, {
815
+ params: {
816
+ uri: testFileUrl,
817
+ requestId
818
+ },
819
+ validateStatus: status => true,
820
+ timeout: 5000
821
+ });
822
+
823
+ t.is(chunkResponse.status, 200, 'Chunked request should succeed');
824
+ t.truthy(chunkResponse.data, 'Response should contain data');
825
+ t.true(Array.isArray(chunkResponse.data), 'Response should be an array');
826
+ t.true(chunkResponse.data.length > 0, 'Should have created at least one chunk');
827
+
828
+ // Verify each chunk exists in both Azure/Local and GCS
829
+ for (const chunk of chunkResponse.data) {
830
+ // Verify Azure/Local URL is accessible
831
+ const azureResponse = await axios.get(convertToLocalUrl(chunk.uri), {
832
+ validateStatus: status => true,
833
+ timeout: 5000
834
+ });
835
+ t.is(azureResponse.status, 200, `Chunk should be accessible in Azure/Local: ${chunk.uri}`);
836
+
837
+ // Verify GCS URL exists and is in correct format
838
+ t.truthy(chunk.gcs, 'Chunk should contain GCS URL');
839
+ t.true(chunk.gcs.startsWith('gs://'), 'GCS URL should use gs:// protocol');
840
+
841
+ // Check if chunk exists in fake GCS
842
+ const exists = await checkGCSFile(chunk.gcs);
843
+ t.true(exists, `Chunk should exist in GCS: ${chunk.gcs}`);
844
+ }
845
+
846
+ // Clean up chunks
847
+ const deleteResponse = await axios.delete(`${baseUrl}?operation=delete&requestId=${requestId}`);
848
+ t.is(deleteResponse.status, 200, 'Delete should succeed');
849
+
850
+ // Verify all chunks are deleted from both storages
851
+ for (const chunk of chunkResponse.data) {
852
+ // Verify Azure/Local chunk is gone
853
+ const azureResponse = await axios.get(convertToLocalUrl(chunk.uri), {
854
+ validateStatus: status => true,
855
+ timeout: 5000
856
+ });
857
+ t.is(azureResponse.status, 404, `Chunk should not be accessible in Azure/Local after deletion: ${chunk.uri}`);
858
+
859
+ // Verify GCS chunk is gone
860
+ const exists = await checkGCSFile(chunk.gcs);
861
+ t.false(exists, `Chunk should not exist in GCS after deletion: ${chunk.gcs}`);
862
+ }
863
+ });
864
+
865
+ test.serial('should handle chunking errors gracefully with GCS', async t => {
866
+ if (!isGCSConfigured()) {
867
+ t.pass('Skipping test - GCS not configured');
868
+ return;
869
+ }
870
+
871
+ // Create a test file to get a valid URL format
872
+ const validFileUrl = await createAndUploadTestFile();
873
+
874
+ // Test with invalid URL that matches the format of our real URLs
875
+ const invalidUrl = validFileUrl.replace(/[^/]+$/, 'nonexistent-file.mp3');
876
+ const invalidResponse = await axios.get(baseUrl, {
877
+ params: {
878
+ uri: invalidUrl,
879
+ requestId: uuidv4()
880
+ },
881
+ validateStatus: status => true,
882
+ timeout: 5000
883
+ });
884
+
885
+ t.is(invalidResponse.status, 500, 'Should reject nonexistent file URL');
886
+ t.true(invalidResponse.data.includes('Error processing media file'), 'Should indicate error processing media file');
887
+
888
+ // Test with missing URI
889
+ const noUriResponse = await axios.get(baseUrl, {
890
+ params: {
891
+ requestId: uuidv4()
892
+ },
893
+ validateStatus: status => true,
894
+ timeout: 5000
895
+ });
896
+
897
+ t.is(noUriResponse.status, 400, 'Should reject request with no URI');
898
+ t.is(
899
+ noUriResponse.data,
900
+ 'Please pass a uri and requestId on the query string or in the request body',
901
+ 'Should return proper error message'
902
+ );
903
+ });
904
+
765
905
  // Legacy MediaFileChunker Tests
766
906
  test.serial('should handle file upload through legacy MediaFileChunker endpoint', async t => {
767
907
  const form = new FormData();
@@ -4,19 +4,22 @@ To install dependencies:
4
4
 
5
5
  ```bash
6
6
  bun install
7
+ cd client
8
+ bun install
7
9
  ```
8
10
 
9
11
  To run:
10
12
 
13
+ Set up your .env file with the correct Cortex API key and access to the realtime voice service.
14
+
11
15
  ```bash
12
- cd client
13
- bun run build
14
- cd ..
15
- bun run start
16
+ # In the server directory
17
+ bun run dev
16
18
  ```
17
19
 
18
20
  To run in production:
19
21
 
20
22
  ```bash
23
+ # In the server directory
21
24
  bun run start:prod
22
25
  ```
@@ -566,10 +566,10 @@ export class SocketServer {
566
566
 
567
567
  // Parallelize memory reads
568
568
  const [memorySelf, memoryUser, memoryDirectives, memoryTopics, voiceSample] = await Promise.all([
569
- readMemory(socket.data.userId, socket.data.aiName, "memorySelf", 1),
570
- readMemory(socket.data.userId, socket.data.aiName, "memoryUser", 1),
571
- readMemory(socket.data.userId, socket.data.aiName, "memoryDirectives", 1),
572
- readMemory(socket.data.userId, socket.data.aiName, "memoryTopics", 0, 0, 10),
569
+ readMemory(socket.data.userId, "memorySelf", 1, 0, 0, true),
570
+ readMemory(socket.data.userId, "memoryUser", 1, 0, 0, true),
571
+ readMemory(socket.data.userId, "memoryDirectives", 1, 0, 0, true),
572
+ readMemory(socket.data.userId, "memoryTopics", 0, 0, 10, false),
573
573
  style(socket.data.userId, socket.data.aiName, socket.data.aiStyle, [], "")
574
574
  ]);
575
575
 
@@ -24,8 +24,8 @@ query ManageMemory($contextId: String, $chatHistory: [MultiMessage], $aiName: St
24
24
  `
25
25
 
26
26
  const READ_MEMORY = `
27
- query ReadMemory($contextId: String, $aiName: String, $section: String, $priority: Int, $recentHours: Int, $numResults: Int) {
28
- sys_read_memory(contextId: $contextId, aiName: $aiName, section: $section, priority: $priority, recentHours: $recentHours, numResults: $numResults) {
27
+ query ReadMemory($contextId: String, $section: String, $priority: Int, $recentHours: Int, $numResults: Int) {
28
+ sys_read_memory(contextId: $contextId, section: $section, priority: $priority, recentHours: $recentHours, numResults: $numResults) {
29
29
  result
30
30
  tool
31
31
  warnings
@@ -69,20 +69,20 @@ export async function manageMemory(contextId: string,
69
69
  }
70
70
 
71
71
  export async function readMemory(contextId: string,
72
- aiName: string,
73
72
  section: MemorySection,
74
73
  priority: number = 0,
75
74
  recentHours: number = 0,
76
- numResults: number = 0
75
+ numResults: number = 0,
76
+ stripMetadata: boolean = false
77
77
  ) {
78
78
 
79
79
  const variables: CortexVariables = {
80
80
  section,
81
81
  contextId,
82
- aiName,
83
82
  priority,
84
83
  recentHours,
85
- numResults
84
+ numResults,
85
+ stripMetadata
86
86
  }
87
87
 
88
88
  const res = await getCortexResponse(variables, READ_MEMORY);
@@ -56,6 +56,7 @@ export type CortexVariables = {
56
56
  priority?: number;
57
57
  recentHours?: number;
58
58
  numResults?: number;
59
+ stripMetadata?: boolean;
59
60
  }
60
61
 
61
62
  function truncateBody(body: any): string {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.3.17",
3
+ "version": "1.3.19",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -40,13 +40,58 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
40
40
  return {
41
41
  isAccessible: true,
42
42
  contentLength,
43
- durationSeconds: durationSeconds || 60
43
+ durationSeconds: durationSeconds || 60,
44
+ isAzureUrl: videoUrl.includes('.blob.core.windows.net')
44
45
  };
45
46
  } catch (error) {
46
47
  throw new Error(`Failed to access video: ${error.message}`);
47
48
  }
48
49
  }
49
50
 
51
+ async uploadToFileHandler(videoUrl) {
52
+ try {
53
+ // Get the file handler URL from config
54
+ const fileHandlerUrl = config.get("whisperMediaApiUrl");
55
+ if (!fileHandlerUrl) {
56
+ throw new Error("File handler URL is not configured");
57
+ }
58
+
59
+ // Start heartbeat progress updates
60
+ const heartbeat = setInterval(() => {
61
+ publishRequestProgress({
62
+ requestId: this.requestId,
63
+ progress: 0,
64
+ info: 'Uploading and processing video...'
65
+ });
66
+ }, 5000);
67
+
68
+ try {
69
+ // Start the fetch request
70
+ const response = await axios.get(fileHandlerUrl, {
71
+ params: {
72
+ requestId: this.requestId,
73
+ fetch: videoUrl
74
+ }
75
+ });
76
+
77
+ if (!response.data?.url) {
78
+ throw new Error("File handler did not return a valid URL");
79
+ }
80
+
81
+ return response.data.url;
82
+ } finally {
83
+ // Always clear the heartbeat interval
84
+ clearInterval(heartbeat);
85
+ }
86
+ } catch (error) {
87
+ logger.error(`Failed to upload video to file handler: ${error.message}`);
88
+ if (error.response?.data) {
89
+ logger.error(`Response data: ${JSON.stringify(error.response.data)}`);
90
+ }
91
+ throw new Error(`Failed to upload video to file handler: ${error.message}`);
92
+ }
93
+ }
94
+
50
95
  async createTranslation(params) {
51
96
  const { videoUrl, sourceLanguage, targetLanguage, voiceKind, translationId } = params;
52
97
 
@@ -131,8 +176,8 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
131
176
  if (AzureVideoTranslatePlugin.lastProcessingRate && this.videoContentLength) {
132
177
  estimatedTotalTime = this.videoContentLength / AzureVideoTranslatePlugin.lastProcessingRate;
133
178
  } else {
134
- // First run: estimate based on 1x calculated video duration
135
- estimatedTotalTime = (this.videoContentLength * 8) / (2.5 * 1024 * 1024);
179
+ // First run: estimate based on 2x calculated video duration
180
+ estimatedTotalTime = 2 * (this.videoContentLength * 8) / (2.5 * 1024 * 1024);
136
181
  }
137
182
 
138
183
  // eslint-disable-next-line no-constant-condition
@@ -248,7 +293,7 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
248
293
 
249
294
  try {
250
295
  const translationId = `cortex-translation-${this.requestId}`;
251
- const videoUrl = requestParameters.sourcevideooraudiofilepath;
296
+ let videoUrl = requestParameters.sourcevideooraudiofilepath;
252
297
  const sourceLanguage = requestParameters.sourcelocale;
253
298
  const targetLanguage = requestParameters.targetlocale;
254
299
  const voiceKind = requestParameters.voicekind || 'PlatformVoice';
@@ -260,6 +305,13 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
260
305
  this.videoContentLength = videoInfo.contentLength;
261
306
  logger.debug(`Video info: ${JSON.stringify(videoInfo, null, 2)}`);
262
307
 
308
+ // If the video is not from Azure storage, upload it to file handler
309
+ if (!videoInfo.isAzureUrl) {
310
+ logger.debug('Video is not from Azure storage, uploading to file handler...');
311
+ videoUrl = await this.uploadToFileHandler(videoUrl);
312
+ logger.debug(`Video uploaded to file handler: ${videoUrl}`);
313
+ }
314
+
263
315
  // Create translation
264
316
  const { operationUrl } = await this.createTranslation({
265
317
  videoUrl, sourceLanguage, targetLanguage, voiceKind, translationId