@aj-archipelago/cortex 1.3.12 → 1.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ import { join } from "path";
11
11
  import { Storage } from "@google-cloud/storage";
12
12
  import axios from "axios";
13
13
  import { publicFolder, port, ipAddress } from "./start.js";
14
+ // eslint-disable-next-line import/no-extraneous-dependencies
14
15
  import mime from "mime-types";
15
16
 
16
17
  function isBase64(str) {
@@ -65,6 +66,19 @@ async function gcsUrlExists(url, defaultReturn = false) {
65
66
  const bucketName = urlParts[0];
66
67
  const fileName = urlParts.slice(1).join('/');
67
68
 
69
+ if (process.env.STORAGE_EMULATOR_HOST) {
70
+ try {
71
+ const response = await axios.get(
72
+ `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}`,
73
+ { validateStatus: status => status === 200 || status === 404 }
74
+ );
75
+ return response.status === 200;
76
+ } catch (error) {
77
+ console.error('Error checking emulator file:', error);
78
+ return false;
79
+ }
80
+ }
81
+
68
82
  const bucket = gcs.bucket(bucketName);
69
83
  const file = bucket.file(fileName);
70
84
 
@@ -77,7 +91,7 @@ async function gcsUrlExists(url, defaultReturn = false) {
77
91
  }
78
92
  }
79
93
 
80
- const getBlobClient = async () => {
94
+ export const getBlobClient = async () => {
81
95
  const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
82
96
  const containerName = AZURE_STORAGE_CONTAINER_NAME;
83
97
  if (!connectionString || !containerName) {
@@ -159,102 +173,104 @@ async function deleteBlob(requestId) {
159
173
  return result;
160
174
  }
161
175
 
162
- async function uploadBlob(context, req, saveToLocal = false, filePath=null, hash=null) {
163
- return new Promise(async (resolve, reject) => {
164
- try {
165
- let requestId = uuidv4();
166
- let body = {};
167
-
168
- // If filePath is given, we are dealing with local file and not form-data
169
- if (filePath) {
170
- const file = fs.createReadStream(filePath);
171
- const filename = path.basename(filePath);
172
- try {
173
- const result = await uploadFile(context, requestId, body, saveToLocal, file, filename, resolve, hash);
174
- resolve(result);
175
- } catch (error) {
176
- const err = new Error("Error processing file upload.");
177
- err.status = 500;
178
- throw err;
179
- }
180
- } else {
181
- // Otherwise, continue working with form-data
182
- const busboy = Busboy({ headers: req.headers });
183
- let hasFile = false;
184
- let errorOccurred = false;
185
-
186
- busboy.on("field", (fieldname, value) => {
187
- if (fieldname === "requestId") {
188
- requestId = value;
176
+ function uploadBlob(context, req, saveToLocal = false, filePath=null, hash=null) {
177
+ return new Promise((resolve, reject) => {
178
+ (async () => {
179
+ try {
180
+ let requestId = uuidv4();
181
+ let body = {};
182
+
183
+ // If filePath is given, we are dealing with local file and not form-data
184
+ if (filePath) {
185
+ const file = fs.createReadStream(filePath);
186
+ const filename = path.basename(filePath);
187
+ try {
188
+ const result = await uploadFile(context, requestId, body, saveToLocal, file, filename, resolve, hash);
189
+ resolve(result);
190
+ } catch (error) {
191
+ const err = new Error("Error processing file upload.");
192
+ err.status = 500;
193
+ throw err;
189
194
  }
190
- });
195
+ } else {
196
+ // Otherwise, continue working with form-data
197
+ const busboy = Busboy({ headers: req.headers });
198
+ let hasFile = false;
199
+ let errorOccurred = false;
200
+
201
+ busboy.on("field", (fieldname, value) => {
202
+ if (fieldname === "requestId") {
203
+ requestId = value;
204
+ }
205
+ });
191
206
 
192
- busboy.on("file", async (fieldname, file, filename) => {
193
- if (errorOccurred) return;
194
- hasFile = true;
195
- uploadFile(context, requestId, body, saveToLocal, file, filename?.filename || filename, resolve, hash).catch(error => {
207
+ busboy.on("file", async (fieldname, file, filename) => {
208
+ if (errorOccurred) return;
209
+ hasFile = true;
210
+ uploadFile(context, requestId, body, saveToLocal, file, filename?.filename || filename, resolve, hash).catch(_error => {
211
+ if (errorOccurred) return;
212
+ errorOccurred = true;
213
+ const err = new Error("Error processing file upload.");
214
+ err.status = 500;
215
+ reject(err);
216
+ });
217
+ });
218
+
219
+ busboy.on("error", (_error) => {
196
220
  if (errorOccurred) return;
197
221
  errorOccurred = true;
198
- const err = new Error("Error processing file upload.");
199
- err.status = 500;
222
+ const err = new Error("No file provided in request");
223
+ err.status = 400;
200
224
  reject(err);
201
225
  });
202
- });
203
-
204
- busboy.on("error", (error) => {
205
- if (errorOccurred) return;
206
- errorOccurred = true;
207
- const err = new Error("No file provided in request");
208
- err.status = 400;
209
- reject(err);
210
- });
211
-
212
- busboy.on("finish", () => {
213
- if (errorOccurred) return;
214
- if (!hasFile) {
226
+
227
+ busboy.on("finish", () => {
228
+ if (errorOccurred) return;
229
+ if (!hasFile) {
230
+ errorOccurred = true;
231
+ const err = new Error("No file provided in request");
232
+ err.status = 400;
233
+ reject(err);
234
+ }
235
+ });
236
+
237
+ // Handle errors from piping the request
238
+ req.on('error', (error) => {
239
+ if (errorOccurred) return;
215
240
  errorOccurred = true;
241
+ // Only log unexpected errors
242
+ if (error.message !== "No file provided in request") {
243
+ context.log("Error in request stream:", error);
244
+ }
216
245
  const err = new Error("No file provided in request");
217
246
  err.status = 400;
218
247
  reject(err);
219
- }
220
- });
221
-
222
- // Handle errors from piping the request
223
- req.on('error', (error) => {
224
- if (errorOccurred) return;
225
- errorOccurred = true;
226
- // Only log unexpected errors
227
- if (error.message !== "No file provided in request") {
228
- context.log("Error in request stream:", error);
229
- }
230
- const err = new Error("No file provided in request");
231
- err.status = 400;
232
- reject(err);
233
- });
248
+ });
234
249
 
235
- try {
236
- req.pipe(busboy);
237
- } catch (error) {
238
- if (errorOccurred) return;
239
- errorOccurred = true;
240
- // Only log unexpected errors
241
- if (error.message !== "No file provided in request") {
242
- context.log("Error piping request to busboy:", error);
250
+ try {
251
+ req.pipe(busboy);
252
+ } catch (error) {
253
+ if (errorOccurred) return;
254
+ errorOccurred = true;
255
+ // Only log unexpected errors
256
+ if (error.message !== "No file provided in request") {
257
+ context.log("Error piping request to busboy:", error);
258
+ }
259
+ const err = new Error("No file provided in request");
260
+ err.status = 400;
261
+ reject(err);
243
262
  }
244
- const err = new Error("No file provided in request");
245
- err.status = 400;
246
- reject(err);
247
263
  }
264
+ } catch (error) {
265
+ // Only log unexpected errors
266
+ if (error.message !== "No file provided in request") {
267
+ context.log("Error processing file upload:", error);
268
+ }
269
+ const err = new Error(error.message || "Error processing file upload.");
270
+ err.status = error.status || 500;
271
+ reject(err);
248
272
  }
249
- } catch (error) {
250
- // Only log unexpected errors
251
- if (error.message !== "No file provided in request") {
252
- context.log("Error processing file upload:", error);
253
- }
254
- const err = new Error(error.message || "Error processing file upload.");
255
- err.status = error.status || 500;
256
- reject(err);
257
- }
273
+ })();
258
274
  });
259
275
  }
260
276
 
@@ -511,7 +527,16 @@ async function ensureGCSUpload(context, existingFile) {
511
527
  if (!existingFile.gcs && gcs) {
512
528
  context.log(`GCS file was missing - uploading.`);
513
529
  const encodedFilename = path.basename(existingFile.url.split('?')[0]);
514
- existingFile.gcs = await uploadToGCS(context, existingFile.url, encodedFilename);
530
+
531
+ // Download the file from Azure/local storage
532
+ const response = await axios({
533
+ method: 'get',
534
+ url: existingFile.url,
535
+ responseType: 'stream'
536
+ });
537
+
538
+ // Upload the file stream to GCS
539
+ existingFile.gcs = await uploadToGCS(context, response.data, encodedFilename);
515
540
  }
516
541
  return existingFile;
517
542
  }
@@ -18,8 +18,7 @@ const tempDirectories = new Map(); // dir -> { createdAt, requestId }
18
18
 
19
19
  // Temp directory cleanup
20
20
  async function cleanupTempDirectories() {
21
- const tempDir = os.tmpdir();
22
-
21
+
23
22
  for (const [dir, info] of tempDirectories) {
24
23
  try {
25
24
  // Cleanup directories older than 1 hour
@@ -1,6 +1,8 @@
1
1
  import fs from 'fs';
2
- import { ACCEPTED_MIME_TYPES } from './constants.js';
2
+ import { ACCEPTED_MIME_TYPES, isAcceptedMimeType } from './constants.js';
3
3
  import path from 'path';
4
+ import http from 'http';
5
+ import https from 'https';
4
6
 
5
7
  export async function deleteTempPath(path) {
6
8
  try {
@@ -65,3 +67,45 @@ export function ensureEncoded(url) {
65
67
  return url;
66
68
  }
67
69
  }
70
+
71
+ export async function urlExists(url) {
72
+ if(!url) return false;
73
+
74
+ try {
75
+ // Basic URL validation
76
+ const urlObj = new URL(url);
77
+ if (!['http:', 'https:'].includes(urlObj.protocol)) {
78
+ throw new Error('Invalid protocol - only HTTP and HTTPS are supported');
79
+ }
80
+
81
+ const httpModule = urlObj.protocol === 'https:' ? https : http;
82
+
83
+ return new Promise((resolve) => {
84
+ const request = httpModule.request(url, { method: 'HEAD' }, function(response) {
85
+ if (response.statusCode >= 200 && response.statusCode < 400) {
86
+ const contentType = response.headers['content-type'];
87
+ const cleanContentType = contentType ? contentType.split(';')[0].trim() : '';
88
+ // Check if the content type is one we accept
89
+ if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
90
+ resolve({ valid: true, contentType: cleanContentType });
91
+ } else {
92
+ console.log(`Unsupported content type: ${contentType}`);
93
+ resolve({ valid: false });
94
+ }
95
+ } else {
96
+ resolve({ valid: false });
97
+ }
98
+ });
99
+
100
+ request.on('error', function(err) {
101
+ console.error('URL validation error:', err.message);
102
+ resolve({ valid: false });
103
+ });
104
+
105
+ request.end();
106
+ });
107
+ } catch (error) {
108
+ console.error('URL validation error:', error.message);
109
+ return { valid: false };
110
+ }
111
+ }
@@ -1,20 +1,19 @@
1
1
  import { downloadFile, splitMediaFile } from './fileChunker.js';
2
2
  import { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, AZURE_STORAGE_CONTAINER_NAME } from './blobHandler.js';
3
3
  import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
4
- import { ensureEncoded, ensureFileExtension } from './helper.js';
4
+ import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
5
5
  import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
6
6
  import { documentToText, easyChunker } from './docHelper.js';
7
- import { DOC_EXTENSIONS, isAcceptedMimeType } from './constants.js';
7
+ import { DOC_EXTENSIONS } from './constants.js';
8
8
  import path from 'path';
9
9
  import os from 'os';
10
10
  import { v4 as uuidv4 } from 'uuid';
11
11
  import fs from 'fs';
12
- import http from 'http';
13
- import https from 'https';
14
12
 
15
13
  const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
14
+ const useGCS = process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 || process.env.GCP_SERVICE_ACCOUNT_KEY ? true : false;
16
15
 
17
- console.log(`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${gcs ? ' and Google Cloud Storage' : ''}`);
16
+ console.log(`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${useGCS ? ' and Google Cloud Storage' : ''}`);
18
17
 
19
18
  let isCleanupRunning = false;
20
19
  async function cleanupInactive(context) {
@@ -76,48 +75,6 @@ async function cleanupInactive(context) {
76
75
  }
77
76
  }
78
77
 
79
- async function urlExists(url) {
80
- if(!url) return false;
81
-
82
- try {
83
- // Basic URL validation
84
- const urlObj = new URL(url);
85
- if (!['http:', 'https:'].includes(urlObj.protocol)) {
86
- throw new Error('Invalid protocol - only HTTP and HTTPS are supported');
87
- }
88
-
89
- const httpModule = urlObj.protocol === 'https:' ? https : http;
90
-
91
- return new Promise((resolve) => {
92
- const request = httpModule.request(url, { method: 'HEAD' }, function(response) {
93
- if (response.statusCode >= 200 && response.statusCode < 400) {
94
- const contentType = response.headers['content-type'];
95
- const cleanContentType = contentType ? contentType.split(';')[0].trim() : '';
96
- // Check if the content type is one we accept
97
- if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
98
- resolve({ valid: true, contentType: cleanContentType });
99
- } else {
100
- console.log(`Unsupported content type: ${contentType}`);
101
- resolve({ valid: false });
102
- }
103
- } else {
104
- resolve({ valid: false });
105
- }
106
- });
107
-
108
- request.on('error', function(err) {
109
- console.error('URL validation error:', err.message);
110
- resolve({ valid: false });
111
- });
112
-
113
- request.end();
114
- });
115
- } catch (error) {
116
- console.error('URL validation error:', error.message);
117
- return { valid: false };
118
- }
119
- }
120
-
121
78
  async function CortexFileHandler(context, req) {
122
79
  const { uri, requestId, save, hash, checkHash, clearHash, fetch, load, restore } = req.body?.params || req.query;
123
80
  const operation = save ? 'save' :
@@ -268,24 +225,55 @@ async function CortexFileHandler(context, req) {
268
225
 
269
226
  if(hashResult){
270
227
  context.log(`File exists in map: ${hash}`);
271
- const exists = await urlExists(hashResult?.url);
228
+
229
+ // Check primary storage (Azure/Local) first
230
+ const primaryExists = await urlExists(hashResult?.url);
231
+ const gcsExists = gcs ? await gcsUrlExists(hashResult?.gcs) : false;
272
232
 
273
- if(!exists.valid){
274
- context.log(`File is not in storage. Removing from map: ${hash}`);
233
+ // If neither storage has the file, remove from map and return not found
234
+ if (!primaryExists.valid && !gcsExists) {
235
+ context.log(`File not found in any storage. Removing from map: ${hash}`);
275
236
  await removeFromFileStoreMap(hash);
276
237
  context.res = {
277
238
  status: 404,
278
- body: `Hash ${hash} not found`
239
+ body: `Hash ${hash} not found in storage`
279
240
  };
280
241
  return;
281
242
  }
282
243
 
283
- if (gcs) {
284
- const gcsExists = await gcsUrlExists(hashResult?.gcs);
285
- if(!gcsExists){
286
- hashResult = await ensureGCSUpload(context, hashResult);
244
+ // If primary is missing but GCS exists, restore from GCS
245
+ if (!primaryExists.valid && gcsExists) {
246
+ context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
247
+ try {
248
+ const res = await CortexFileHandler(context, {
249
+ method: 'GET',
250
+ body: { params: { fetch: hashResult.gcs } }
251
+ });
252
+ if (res?.body?.url) {
253
+ hashResult.url = res.body.url;
254
+ }
255
+ } catch (error) {
256
+ console.error('Error restoring from GCS:', error);
287
257
  }
288
258
  }
259
+ // If GCS is missing but primary exists, restore to GCS
260
+ else if (primaryExists.valid && gcs && !gcsExists) {
261
+ context.log(`GCS file missing, restoring from primary: ${hash}`);
262
+ const { gcs: _, ...fileInfo } = hashResult; // eslint-disable-line no-unused-vars
263
+ hashResult = await ensureGCSUpload(context, fileInfo);
264
+ }
265
+
266
+ // Final check to ensure we have at least one valid storage location
267
+ const finalPrimaryCheck = await urlExists(hashResult?.url);
268
+ if (!finalPrimaryCheck.valid && !await gcsUrlExists(hashResult?.gcs)) {
269
+ context.log(`Failed to restore file. Removing from map: ${hash}`);
270
+ await removeFromFileStoreMap(hash);
271
+ context.res = {
272
+ status: 404,
273
+ body: `Hash ${hash} not found and restoration failed`
274
+ };
275
+ return;
276
+ }
289
277
 
290
278
  //update redis timestamp with current time
291
279
  await setFileStoreMap(hash, hashResult);
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "1.0.15",
3
+ "version": "1.0.16",
4
4
  "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -43,6 +43,7 @@
43
43
  "timeout": "1m",
44
44
  "nodeArguments": [
45
45
  "--experimental-modules"
46
- ]
46
+ ],
47
+ "serial": true
47
48
  }
48
49
  }
@@ -18,7 +18,7 @@ node scripts/setup-azure-container.js
18
18
 
19
19
  # Run the tests
20
20
  echo "Running tests..."
21
- node -r dotenv/config node_modules/ava/entrypoints/cli.mjs
21
+ node -r dotenv/config node_modules/ava/entrypoints/cli.mjs "$@"
22
22
 
23
23
  # Store test result
24
24
  TEST_RESULT=$?
@@ -3,6 +3,7 @@ import express from "express";
3
3
  import { fileURLToPath } from 'url';
4
4
  import { dirname, join } from 'path';
5
5
  import cors from 'cors';
6
+ import { readFileSync } from 'fs';
6
7
 
7
8
  import { publicIpv4 } from 'public-ip';
8
9
  const ipAddress = await publicIpv4();
@@ -11,10 +12,22 @@ const app = express();
11
12
  const port = process.env.PORT || 7071;
12
13
  const publicFolder = join(dirname(fileURLToPath(import.meta.url)), 'files');
13
14
 
15
+ // Get version from package.json
16
+ const packageJson = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), 'package.json'), 'utf8'));
17
+ const version = packageJson.version;
18
+
14
19
  app.use(cors());
15
20
  // Serve static files from the public folder
16
21
  app.use('/files', express.static(publicFolder));
17
22
 
23
+ // Health check endpoint
24
+ app.get('/health', (req, res) => {
25
+ res.status(200).json({
26
+ status: 'healthy',
27
+ version: version
28
+ });
29
+ });
30
+
18
31
  // New primary endpoint
19
32
  app.all('/api/CortexFileHandler', async (req, res) => {
20
33
  const context = { req, res, log: console.log }
@@ -44,7 +57,7 @@ app.all('/api/MediaFileChunker', async (req, res) => {
44
57
  });
45
58
 
46
59
  app.listen(port, () => {
47
- console.log(`Cortex File Handler running on port ${port} (includes legacy MediaFileChunker endpoint)`);
60
+ console.log(`Cortex File Handler v${version} running on port ${port} (includes legacy MediaFileChunker endpoint)`);
48
61
  });
49
62
 
50
63
  export { port, publicFolder, ipAddress };
@@ -0,0 +1,292 @@
1
+ import test from 'ava';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { fileURLToPath } from 'url';
5
+ import { uploadBlob, ensureGCSUpload, gcsUrlExists, deleteGCS, getBlobClient } from '../blobHandler.js';
6
+ import axios from 'axios';
7
+ import CortexFileHandler from '../index.js';
8
+ import { setFileStoreMap } from '../redis.js';
9
+ import { urlExists } from '../helper.js';
10
+
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = path.dirname(__filename);
13
+
14
+ // Helper function to determine if GCS is configured
15
+ function isGCSConfigured() {
16
+ return process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 || process.env.GCP_SERVICE_ACCOUNT_KEY;
17
+ }
18
+
19
+ // Helper function to check file size in GCS
20
+ async function getGCSFileSize(gcsUrl) {
21
+ if (!isGCSConfigured()) return null;
22
+ try {
23
+ const bucket = gcsUrl.split('/')[2];
24
+ const filename = gcsUrl.split('/').slice(3).join('/');
25
+
26
+ if (process.env.STORAGE_EMULATOR_HOST) {
27
+ const response = await axios.get(
28
+ `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucket}/o/${encodeURIComponent(filename)}`,
29
+ { validateStatus: status => status === 200 || status === 404 }
30
+ );
31
+ if (response.status === 200) {
32
+ return parseInt(response.data.size);
33
+ }
34
+ return null;
35
+ }
36
+ } catch (error) {
37
+ return null;
38
+ }
39
+ }
40
+
41
+ // Helper function to check file size in Azure/HTTP
42
+ async function getHttpFileSize(url) {
43
+ try {
44
+ const response = await axios.head(url);
45
+ const contentLength = response.headers['content-length'];
46
+ return contentLength ? parseInt(contentLength) : null;
47
+ } catch (error) {
48
+ console.error('Error getting HTTP file size:', error);
49
+ return null;
50
+ }
51
+ }
52
+
53
+ test('test GCS backup during initial upload', async t => {
54
+ if (!isGCSConfigured()) {
55
+ t.pass('Skipping test - GCS not configured');
56
+ return;
57
+ }
58
+
59
+ // Create a test file with known content
60
+ const testContent = 'Hello World!'.repeat(1000); // Create a decent sized file
61
+ const testFile = path.join(__dirname, 'test.txt');
62
+ fs.writeFileSync(testFile, testContent);
63
+
64
+ try {
65
+ // Upload the file - should go to both Azure/local and GCS
66
+ const context = { log: console.log };
67
+ const result = await uploadBlob(context, null, false, testFile);
68
+
69
+ // Verify we got both URLs
70
+ t.truthy(result.url, 'Should have primary storage URL');
71
+ t.truthy(result.gcs, 'Should have GCS backup URL');
72
+
73
+ // Verify GCS file exists
74
+ const gcsExists = await gcsUrlExists(result.gcs);
75
+ t.true(gcsExists, 'File should exist in GCS');
76
+
77
+ // Verify file content size in GCS
78
+ const gcsSize = await getGCSFileSize(result.gcs);
79
+ t.is(gcsSize, testContent.length, 'GCS file size should match original');
80
+ } finally {
81
+ // Cleanup
82
+ if (fs.existsSync(testFile)) {
83
+ fs.unlinkSync(testFile);
84
+ }
85
+ }
86
+ });
87
+
88
+ test('test GCS backup restoration when missing', async t => {
89
+ if (!isGCSConfigured()) {
90
+ t.pass('Skipping test - GCS not configured');
91
+ return;
92
+ }
93
+
94
+ // Create a test file with known content
95
+ const testContent = 'Hello World!'.repeat(1000); // Create a decent sized file
96
+ const testFile = path.join(__dirname, 'test.txt');
97
+ fs.writeFileSync(testFile, testContent);
98
+
99
+ try {
100
+ // First upload normally
101
+ const context = { log: console.log };
102
+ const result = await uploadBlob(context, null, false, testFile);
103
+
104
+ // Verify initial upload worked
105
+ t.truthy(result.gcs, 'Should have GCS backup URL after initial upload');
106
+
107
+ // Delete the GCS file
108
+ const gcsFileName = result.gcs.replace('gs://cortextempfiles/', '');
109
+ await deleteGCS(gcsFileName);
110
+
111
+ // Verify file is gone
112
+ const existsAfterDelete = await gcsUrlExists(result.gcs);
113
+ t.false(existsAfterDelete, 'File should not exist in GCS after deletion');
114
+
115
+ // Remove GCS URL to simulate missing backup
116
+ const { gcs: _, ...fileInfo } = result; // eslint-disable-line no-unused-vars
117
+
118
+ // Try to ensure GCS backup
119
+ const updatedResult = await ensureGCSUpload(context, fileInfo);
120
+
121
+ // Verify GCS URL was added
122
+ t.truthy(updatedResult.gcs, 'Should have GCS backup URL after ensure');
123
+
124
+ // Verify GCS file exists
125
+ const gcsExists = await gcsUrlExists(updatedResult.gcs);
126
+ t.true(gcsExists, 'File should exist in GCS after ensure');
127
+
128
+ // Verify file content size in GCS
129
+ const gcsSize = await getGCSFileSize(updatedResult.gcs);
130
+ t.is(gcsSize, testContent.length, 'GCS file size should match original after ensure');
131
+ } finally {
132
+ // Cleanup
133
+ if (fs.existsSync(testFile)) {
134
+ fs.unlinkSync(testFile);
135
+ }
136
+ }
137
+ });
138
+
139
+ test('test primary storage restoration from GCS backup', async t => {
140
+ if (!isGCSConfigured()) {
141
+ t.pass('Skipping test - GCS not configured');
142
+ return;
143
+ }
144
+
145
+ // Create a test file with known content
146
+ const testContent = 'Hello World!'.repeat(1000);
147
+ const testFile = path.join(__dirname, 'test.txt');
148
+ fs.writeFileSync(testFile, testContent);
149
+
150
+ try {
151
+ // First upload normally
152
+ const context = { log: console.log };
153
+ const initialResult = await uploadBlob(context, null, false, testFile);
154
+
155
+ // Verify initial upload worked
156
+ t.truthy(initialResult.url, 'Should have primary storage URL');
157
+ t.truthy(initialResult.gcs, 'Should have GCS backup URL');
158
+
159
+ // Store the hash and simulate a missing primary file by requesting with a bad URL
160
+ const hash = 'test_primary_restore';
161
+ const modifiedResult = {
162
+ ...initialResult,
163
+ url: initialResult.url.replace('.blob.core.windows.net', '.invalid.url')
164
+ };
165
+
166
+ // Set up Redis state with the bad URL
167
+ await setFileStoreMap(hash, modifiedResult);
168
+
169
+ // Set up request for the handler
170
+ const mockReq = {
171
+ method: 'GET',
172
+ body: { params: { hash, checkHash: true } }
173
+ };
174
+
175
+ // Set up context for the handler
176
+ const handlerContext = {
177
+ log: console.log,
178
+ res: null
179
+ };
180
+
181
+ // Call the handler which should restore from GCS
182
+ await CortexFileHandler(handlerContext, mockReq);
183
+
184
+ // Verify we got a valid response
185
+ t.is(handlerContext.res.status, 200, 'Should get successful response');
186
+ t.truthy(handlerContext.res.body.url, 'Should have restored primary URL');
187
+ t.truthy(handlerContext.res.body.gcs, 'Should still have GCS URL');
188
+
189
+ // Verify the restored URL is accessible
190
+ const { valid } = await urlExists(handlerContext.res.body.url);
191
+ t.true(valid, 'Restored URL should be accessible');
192
+
193
+ // Verify file sizes match in both storages
194
+ const gcsSize = await getGCSFileSize(handlerContext.res.body.gcs);
195
+ const azureSize = await getHttpFileSize(handlerContext.res.body.url);
196
+ t.is(azureSize, testContent.length, 'Azure file size should match original');
197
+ t.is(gcsSize, azureSize, 'Azure and GCS file sizes should match');
198
+
199
+ } finally {
200
+ // Cleanup
201
+ if (fs.existsSync(testFile)) {
202
+ fs.unlinkSync(testFile);
203
+ }
204
+ }
205
+ });
206
+
207
+ test('test hash check returns 404 when both storages are empty', async t => {
208
+ if (!isGCSConfigured()) {
209
+ t.pass('Skipping test - GCS not configured');
210
+ return;
211
+ }
212
+
213
+ // Create a test file with known content
214
+ const testContent = 'Hello World!'.repeat(1000);
215
+ const testFile = path.join(__dirname, 'test.txt');
216
+ fs.writeFileSync(testFile, testContent);
217
+
218
+ try {
219
+ // First upload normally
220
+ const context = { log: console.log };
221
+ const initialResult = await uploadBlob(context, null, false, testFile);
222
+
223
+ // Verify initial upload worked
224
+ t.truthy(initialResult.url, 'Should have primary storage URL');
225
+ t.truthy(initialResult.gcs, 'Should have GCS backup URL');
226
+
227
+ // Store the hash
228
+ const hash = 'test_both_missing';
229
+ await setFileStoreMap(hash, initialResult);
230
+
231
+ // Verify both files exist initially
232
+ const initialPrimaryCheck = await urlExists(initialResult.url);
233
+ const initialGcsCheck = await gcsUrlExists(initialResult.gcs);
234
+ t.true(initialPrimaryCheck.valid, 'Primary file should exist initially');
235
+ t.true(initialGcsCheck, 'GCS file should exist initially');
236
+
237
+ // Delete from Azure/primary storage
238
+ const azureUrl = new URL(initialResult.url);
239
+ console.log('Azure URL:', initialResult.url);
240
+ // Get the path without query parameters and decode it
241
+ const fullPath = decodeURIComponent(azureUrl.pathname);
242
+ console.log('Full path:', fullPath);
243
+ // Get the request ID and filename from the path
244
+ const pathParts = fullPath.split('/');
245
+ const blobName = pathParts[pathParts.length - 1];
246
+ console.log('Attempting to delete Azure blob:', blobName);
247
+
248
+ // Delete the blob using the correct container name
249
+ const { containerClient } = await getBlobClient();
250
+ const blockBlobClient = containerClient.getBlockBlobClient(blobName);
251
+ await blockBlobClient.delete();
252
+ console.log('Azure deletion completed');
253
+
254
+ // Add a small delay to ensure deletion is complete
255
+ await new Promise(resolve => setTimeout(resolve, 1000));
256
+
257
+ // Delete from GCS
258
+ const gcsFileName = initialResult.gcs.replace('gs://cortextempfiles/', '');
259
+ console.log('Attempting to delete GCS file:', gcsFileName);
260
+ await deleteGCS(gcsFileName);
261
+ console.log('GCS deletion completed');
262
+
263
+ // Verify both files are gone
264
+ const primaryExists = await urlExists(initialResult.url);
265
+ console.log('Primary exists after deletion:', primaryExists.valid);
266
+ const gcsExists = await gcsUrlExists(initialResult.gcs);
267
+ console.log('GCS exists after deletion:', gcsExists);
268
+ t.false(primaryExists.valid, 'Primary file should be deleted');
269
+ t.false(gcsExists, 'GCS file should be deleted');
270
+
271
+ // Try to get the file via hash - should fail
272
+ const handlerContext = {
273
+ log: console.log,
274
+ res: null
275
+ };
276
+
277
+ await CortexFileHandler(handlerContext, {
278
+ method: 'GET',
279
+ body: { params: { hash, checkHash: true } }
280
+ });
281
+
282
+ // Verify we got a 404 response
283
+ t.is(handlerContext.res.status, 404, 'Should get 404 when both files are missing');
284
+ t.true(handlerContext.res.body.includes('not found in storage'), 'Should indicate files are missing in storage');
285
+
286
+ } finally {
287
+ // Cleanup
288
+ if (fs.existsSync(testFile)) {
289
+ fs.unlinkSync(testFile);
290
+ }
291
+ }
292
+ });
@@ -5,7 +5,6 @@ import fs from 'fs/promises';
5
5
  import { existsSync } from 'fs';
6
6
  import { splitMediaFile, downloadFile } from '../fileChunker.js';
7
7
  import nock from 'nock';
8
- import path from 'path';
9
8
  import os from 'os';
10
9
  import { execSync } from 'child_process';
11
10
  import { performance } from 'perf_hooks';
@@ -94,7 +93,7 @@ test.before(async t => {
94
93
  }
95
94
  });
96
95
 
97
- // Cleanup: Remove test files and worker pool
96
+ // Cleanup: Remove test files
98
97
  test.after.always(async t => {
99
98
  // Clean up test files
100
99
  if (t.context.testDir) {
@@ -108,16 +107,6 @@ test.after.always(async t => {
108
107
 
109
108
  // Clean up nock
110
109
  nock.cleanAll();
111
-
112
- // Clean up worker pool if it exists
113
- if (typeof workerPool !== 'undefined' && Array.isArray(workerPool)) {
114
- try {
115
- await Promise.all(workerPool.map(worker => worker.terminate()));
116
- console.log('Worker pool cleaned up successfully');
117
- } catch (error) {
118
- console.error('Error cleaning up worker pool:', error);
119
- }
120
- }
121
110
  });
122
111
 
123
112
  // Test successful chunking of a short file
@@ -250,7 +239,7 @@ function formatDuration(ms) {
250
239
  test('performance test - 1 hour file', async t => {
251
240
  const start = performance.now();
252
241
 
253
- const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile1h);
242
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile1h);
254
243
 
255
244
  // Wait for all chunks to complete
256
245
  const chunkPaths = await Promise.all(chunkPromises);
@@ -274,7 +263,7 @@ test('performance test - 1 hour file', async t => {
274
263
  test('performance test - 4 hour file', async t => {
275
264
  const start = performance.now();
276
265
 
277
- const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
266
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(t.context.testFile4h);
278
267
 
279
268
  // Wait for all chunks to complete
280
269
  const chunkPaths = await Promise.all(chunkPromises);
@@ -1,5 +1,7 @@
1
+ /* eslint-disable no-unused-vars */
1
2
  import test from 'ava';
2
3
  import axios from 'axios';
4
+ // eslint-disable-next-line import/no-extraneous-dependencies
3
5
  import FormData from 'form-data';
4
6
  import { port, publicFolder, ipAddress } from '../start.js';
5
7
  import { v4 as uuidv4 } from 'uuid';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.3.12",
3
+ "version": "1.3.14",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -1,6 +1,7 @@
1
1
  // replicateApiPlugin.js
2
2
  import ModelPlugin from "./modelPlugin.js";
3
3
  import logger from "../../lib/logger.js";
4
+ import axios from "axios";
4
5
 
5
6
  class ReplicateApiPlugin extends ModelPlugin {
6
7
  constructor(pathway, model) {
@@ -106,10 +107,61 @@ class ReplicateApiPlugin extends ModelPlugin {
106
107
  cortexRequest.data = requestParameters;
107
108
  cortexRequest.params = requestParameters.params;
108
109
 
109
- return this.executeRequest(cortexRequest);
110
+ // Make initial request to start prediction
111
+ const stringifiedResponse = await this.executeRequest(cortexRequest);
112
+ const parsedResponse = JSON.parse(stringifiedResponse);
113
+
114
+ // If we got a completed response, return it
115
+ if (parsedResponse?.status === "succeeded") {
116
+ return stringifiedResponse;
117
+ }
118
+
119
+ logger.info("Replicate API returned a non-completed response.");
120
+
121
+ if (!parsedResponse?.id) {
122
+ throw new Error("No prediction ID returned from Replicate API");
123
+ }
124
+
125
+ // Get the prediction ID and polling URL
126
+ const predictionId = parsedResponse.id;
127
+ const pollUrl = parsedResponse.urls?.get;
128
+
129
+ if (!pollUrl) {
130
+ throw new Error("No polling URL returned from Replicate API");
131
+ }
132
+
133
+ // Poll for results
134
+ const maxAttempts = 60; // 5 minutes with 5 second intervals
135
+ const pollInterval = 5000;
136
+
137
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
138
+ try {
139
+ const pollResponse = await axios.get(pollUrl, {
140
+ headers: cortexRequest.headers
141
+ });
142
+
143
+ logger.info("Polling Replicate API - attempt " + attempt);
144
+ const status = pollResponse.data?.status;
145
+
146
+ if (status === "succeeded") {
147
+ logger.info("Replicate API returned a completed response after polling");
148
+ return JSON.stringify(pollResponse.data);
149
+ } else if (status === "failed" || status === "canceled") {
150
+ throw new Error(`Prediction ${status}: ${pollResponse.data?.error || "Unknown error"}`);
151
+ }
152
+
153
+ // Wait before next poll
154
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
155
+ } catch (error) {
156
+ logger.error(`Error polling prediction ${predictionId}: ${error.message}`);
157
+ throw error;
158
+ }
159
+ }
160
+
161
+ throw new Error(`Prediction ${predictionId} timed out after ${maxAttempts * pollInterval / 1000} seconds`);
110
162
  }
111
163
 
112
- // Parse the response from the Replicate API
164
+ // Stringify the response from the Replicate API
113
165
  parseResponse(data) {
114
166
  if (data.data) {
115
167
  return JSON.stringify(data.data);