@aj-archipelago/cortex 1.3.12 → 1.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -131,6 +131,16 @@ Cortex pathway prompt enhancements include:
131
131
  ### Pathway Development
132
132
  To add a new pathway to Cortex, you create a new JavaScript file and define the prompts, properties, and functions that implement the desired functionality. Cortex provides defaults for almost everything, so in the simplest case a pathway can really just consist of a string prompt like the spelling example above. You can then save this file in the `pathways` directory in your Cortex project and it will be picked up and made available as a GraphQL query.
133
133
 
134
+ ### Specifying a Model
135
+ When determining which model to use for a pathway, Cortex follows this order of precedence:
136
+
137
+ 1. `pathway.model` - The model specified directly in the pathway definition
138
+ 2. `args.model` - The model passed in the request arguments
139
+ 3. `pathway.inputParameters.model` - The model specified in the pathway's input parameters
140
+ 4. `config.get('defaultModelName')` - The default model specified in the configuration
141
+
142
+ The first valid model found in this order will be used. If none of these models are found in the configured endpoints, Cortex will log a warning and use the default model defined in the configuration.
143
+
134
144
  ### Prompt
135
145
  When you define a new pathway, you need to at least specify a prompt that will be passed to the model for processing. In the simplest case, a prompt is really just a string, but the prompt is polymorphic - it can be a string or an object that contains information for the model API that you wish to call. Prompts can also be an array of strings or an array of objects for sequential operations. In this way Cortex aims to support the most simple to advanced prompting scenarios.
136
146
 
@@ -577,7 +587,7 @@ The following properties can be configured through environment variables or the
577
587
  - `subscriptionKeepAlive`: Keep-alive time for subscriptions in seconds. Default is 0.
578
588
 
579
589
  API-specific configuration:
580
- - `azureVideoTranslationApiUrl`: URL for Azure video translation API. Default is 'http://127.0.0.1:5005'.
590
+ - `azureVideoTranslationApiKey`: API key for Azure video translation API. Default is null.
581
591
  - `dalleImageApiUrl`: URL for DALL-E image API. Default is 'null'.
582
592
  - `neuralSpaceApiKey`: API key for NeuralSpace services. Default is null.
583
593
  - `whisperMediaApiUrl`: URL for Whisper media API. Default is 'null'.
package/config.js CHANGED
@@ -271,18 +271,19 @@ var config = convict({
271
271
  },
272
272
  "azure-video-translate": {
273
273
  "type": "AZURE-VIDEO-TRANSLATE",
274
+ "url": "https://eastus.api.cognitive.microsoft.com/videotranslation",
274
275
  "headers": {
275
276
  "Content-Type": "application/json"
276
277
  },
277
- "supportsStreaming": true,
278
278
  }
279
279
  },
280
280
  env: 'CORTEX_MODELS'
281
281
  },
282
- azureVideoTranslationApiUrl: {
282
+ azureVideoTranslationApiKey: {
283
283
  format: String,
284
- default: 'http://127.0.0.1:5005',
285
- env: 'AZURE_VIDEO_TRANSLATION_API_URL'
284
+ default: null,
285
+ env: 'AZURE_VIDEO_TRANSLATION_API_KEY',
286
+ sensitive: true
286
287
  },
287
288
  openaiApiKey: {
288
289
  format: String,
@@ -11,6 +11,7 @@ import { join } from "path";
11
11
  import { Storage } from "@google-cloud/storage";
12
12
  import axios from "axios";
13
13
  import { publicFolder, port, ipAddress } from "./start.js";
14
+ // eslint-disable-next-line import/no-extraneous-dependencies
14
15
  import mime from "mime-types";
15
16
 
16
17
  function isBase64(str) {
@@ -65,6 +66,19 @@ async function gcsUrlExists(url, defaultReturn = false) {
65
66
  const bucketName = urlParts[0];
66
67
  const fileName = urlParts.slice(1).join('/');
67
68
 
69
+ if (process.env.STORAGE_EMULATOR_HOST) {
70
+ try {
71
+ const response = await axios.get(
72
+ `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucketName}/o/${encodeURIComponent(fileName)}`,
73
+ { validateStatus: status => status === 200 || status === 404 }
74
+ );
75
+ return response.status === 200;
76
+ } catch (error) {
77
+ console.error('Error checking emulator file:', error);
78
+ return false;
79
+ }
80
+ }
81
+
68
82
  const bucket = gcs.bucket(bucketName);
69
83
  const file = bucket.file(fileName);
70
84
 
@@ -77,7 +91,7 @@ async function gcsUrlExists(url, defaultReturn = false) {
77
91
  }
78
92
  }
79
93
 
80
- const getBlobClient = async () => {
94
+ export const getBlobClient = async () => {
81
95
  const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
82
96
  const containerName = AZURE_STORAGE_CONTAINER_NAME;
83
97
  if (!connectionString || !containerName) {
@@ -159,102 +173,104 @@ async function deleteBlob(requestId) {
159
173
  return result;
160
174
  }
161
175
 
162
- async function uploadBlob(context, req, saveToLocal = false, filePath=null, hash=null) {
163
- return new Promise(async (resolve, reject) => {
164
- try {
165
- let requestId = uuidv4();
166
- let body = {};
167
-
168
- // If filePath is given, we are dealing with local file and not form-data
169
- if (filePath) {
170
- const file = fs.createReadStream(filePath);
171
- const filename = path.basename(filePath);
172
- try {
173
- const result = await uploadFile(context, requestId, body, saveToLocal, file, filename, resolve, hash);
174
- resolve(result);
175
- } catch (error) {
176
- const err = new Error("Error processing file upload.");
177
- err.status = 500;
178
- throw err;
179
- }
180
- } else {
181
- // Otherwise, continue working with form-data
182
- const busboy = Busboy({ headers: req.headers });
183
- let hasFile = false;
184
- let errorOccurred = false;
185
-
186
- busboy.on("field", (fieldname, value) => {
187
- if (fieldname === "requestId") {
188
- requestId = value;
176
+ function uploadBlob(context, req, saveToLocal = false, filePath=null, hash=null) {
177
+ return new Promise((resolve, reject) => {
178
+ (async () => {
179
+ try {
180
+ let requestId = uuidv4();
181
+ let body = {};
182
+
183
+ // If filePath is given, we are dealing with local file and not form-data
184
+ if (filePath) {
185
+ const file = fs.createReadStream(filePath);
186
+ const filename = path.basename(filePath);
187
+ try {
188
+ const result = await uploadFile(context, requestId, body, saveToLocal, file, filename, resolve, hash);
189
+ resolve(result);
190
+ } catch (error) {
191
+ const err = new Error("Error processing file upload.");
192
+ err.status = 500;
193
+ throw err;
189
194
  }
190
- });
195
+ } else {
196
+ // Otherwise, continue working with form-data
197
+ const busboy = Busboy({ headers: req.headers });
198
+ let hasFile = false;
199
+ let errorOccurred = false;
200
+
201
+ busboy.on("field", (fieldname, value) => {
202
+ if (fieldname === "requestId") {
203
+ requestId = value;
204
+ }
205
+ });
191
206
 
192
- busboy.on("file", async (fieldname, file, filename) => {
193
- if (errorOccurred) return;
194
- hasFile = true;
195
- uploadFile(context, requestId, body, saveToLocal, file, filename?.filename || filename, resolve, hash).catch(error => {
207
+ busboy.on("file", async (fieldname, file, filename) => {
208
+ if (errorOccurred) return;
209
+ hasFile = true;
210
+ uploadFile(context, requestId, body, saveToLocal, file, filename?.filename || filename, resolve, hash).catch(_error => {
211
+ if (errorOccurred) return;
212
+ errorOccurred = true;
213
+ const err = new Error("Error processing file upload.");
214
+ err.status = 500;
215
+ reject(err);
216
+ });
217
+ });
218
+
219
+ busboy.on("error", (_error) => {
196
220
  if (errorOccurred) return;
197
221
  errorOccurred = true;
198
- const err = new Error("Error processing file upload.");
199
- err.status = 500;
222
+ const err = new Error("No file provided in request");
223
+ err.status = 400;
200
224
  reject(err);
201
225
  });
202
- });
203
-
204
- busboy.on("error", (error) => {
205
- if (errorOccurred) return;
206
- errorOccurred = true;
207
- const err = new Error("No file provided in request");
208
- err.status = 400;
209
- reject(err);
210
- });
211
-
212
- busboy.on("finish", () => {
213
- if (errorOccurred) return;
214
- if (!hasFile) {
226
+
227
+ busboy.on("finish", () => {
228
+ if (errorOccurred) return;
229
+ if (!hasFile) {
230
+ errorOccurred = true;
231
+ const err = new Error("No file provided in request");
232
+ err.status = 400;
233
+ reject(err);
234
+ }
235
+ });
236
+
237
+ // Handle errors from piping the request
238
+ req.on('error', (error) => {
239
+ if (errorOccurred) return;
215
240
  errorOccurred = true;
241
+ // Only log unexpected errors
242
+ if (error.message !== "No file provided in request") {
243
+ context.log("Error in request stream:", error);
244
+ }
216
245
  const err = new Error("No file provided in request");
217
246
  err.status = 400;
218
247
  reject(err);
219
- }
220
- });
221
-
222
- // Handle errors from piping the request
223
- req.on('error', (error) => {
224
- if (errorOccurred) return;
225
- errorOccurred = true;
226
- // Only log unexpected errors
227
- if (error.message !== "No file provided in request") {
228
- context.log("Error in request stream:", error);
229
- }
230
- const err = new Error("No file provided in request");
231
- err.status = 400;
232
- reject(err);
233
- });
248
+ });
234
249
 
235
- try {
236
- req.pipe(busboy);
237
- } catch (error) {
238
- if (errorOccurred) return;
239
- errorOccurred = true;
240
- // Only log unexpected errors
241
- if (error.message !== "No file provided in request") {
242
- context.log("Error piping request to busboy:", error);
250
+ try {
251
+ req.pipe(busboy);
252
+ } catch (error) {
253
+ if (errorOccurred) return;
254
+ errorOccurred = true;
255
+ // Only log unexpected errors
256
+ if (error.message !== "No file provided in request") {
257
+ context.log("Error piping request to busboy:", error);
258
+ }
259
+ const err = new Error("No file provided in request");
260
+ err.status = 400;
261
+ reject(err);
243
262
  }
244
- const err = new Error("No file provided in request");
245
- err.status = 400;
246
- reject(err);
247
263
  }
264
+ } catch (error) {
265
+ // Only log unexpected errors
266
+ if (error.message !== "No file provided in request") {
267
+ context.log("Error processing file upload:", error);
268
+ }
269
+ const err = new Error(error.message || "Error processing file upload.");
270
+ err.status = error.status || 500;
271
+ reject(err);
248
272
  }
249
- } catch (error) {
250
- // Only log unexpected errors
251
- if (error.message !== "No file provided in request") {
252
- context.log("Error processing file upload:", error);
253
- }
254
- const err = new Error(error.message || "Error processing file upload.");
255
- err.status = error.status || 500;
256
- reject(err);
257
- }
273
+ })();
258
274
  });
259
275
  }
260
276
 
@@ -511,7 +527,16 @@ async function ensureGCSUpload(context, existingFile) {
511
527
  if (!existingFile.gcs && gcs) {
512
528
  context.log(`GCS file was missing - uploading.`);
513
529
  const encodedFilename = path.basename(existingFile.url.split('?')[0]);
514
- existingFile.gcs = await uploadToGCS(context, existingFile.url, encodedFilename);
530
+
531
+ // Download the file from Azure/local storage
532
+ const response = await axios({
533
+ method: 'get',
534
+ url: existingFile.url,
535
+ responseType: 'stream'
536
+ });
537
+
538
+ // Upload the file stream to GCS
539
+ existingFile.gcs = await uploadToGCS(context, response.data, encodedFilename);
515
540
  }
516
541
  return existingFile;
517
542
  }
@@ -68,6 +68,7 @@ export const ACCEPTED_MIME_TYPES = {
68
68
  'image/webp': ['.webp'],
69
69
  'image/heic': ['.heic'],
70
70
  'image/heif': ['.heif'],
71
+ 'application/octet-stream': ['.jpg', '.jpeg', '.png', '.webp', '.heic', '.heif'],
71
72
  'application/pdf': ['.pdf'],
72
73
 
73
74
  // Audio types
@@ -18,8 +18,7 @@ const tempDirectories = new Map(); // dir -> { createdAt, requestId }
18
18
 
19
19
  // Temp directory cleanup
20
20
  async function cleanupTempDirectories() {
21
- const tempDir = os.tmpdir();
22
-
21
+
23
22
  for (const [dir, info] of tempDirectories) {
24
23
  try {
25
24
  // Cleanup directories older than 1 hour
@@ -1,6 +1,8 @@
1
1
  import fs from 'fs';
2
- import { ACCEPTED_MIME_TYPES } from './constants.js';
2
+ import { ACCEPTED_MIME_TYPES, isAcceptedMimeType } from './constants.js';
3
3
  import path from 'path';
4
+ import http from 'http';
5
+ import https from 'https';
4
6
 
5
7
  export async function deleteTempPath(path) {
6
8
  try {
@@ -65,3 +67,45 @@ export function ensureEncoded(url) {
65
67
  return url;
66
68
  }
67
69
  }
70
+
71
+ export async function urlExists(url) {
72
+ if(!url) return false;
73
+
74
+ try {
75
+ // Basic URL validation
76
+ const urlObj = new URL(url);
77
+ if (!['http:', 'https:'].includes(urlObj.protocol)) {
78
+ throw new Error('Invalid protocol - only HTTP and HTTPS are supported');
79
+ }
80
+
81
+ const httpModule = urlObj.protocol === 'https:' ? https : http;
82
+
83
+ return new Promise((resolve) => {
84
+ const request = httpModule.request(url, { method: 'HEAD' }, function(response) {
85
+ if (response.statusCode >= 200 && response.statusCode < 400) {
86
+ const contentType = response.headers['content-type'];
87
+ const cleanContentType = contentType ? contentType.split(';')[0].trim() : '';
88
+ // Check if the content type is one we accept
89
+ if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
90
+ resolve({ valid: true, contentType: cleanContentType });
91
+ } else {
92
+ console.log(`Unsupported content type: ${contentType}`);
93
+ resolve({ valid: false });
94
+ }
95
+ } else {
96
+ resolve({ valid: false });
97
+ }
98
+ });
99
+
100
+ request.on('error', function(err) {
101
+ console.error('URL validation error:', err.message);
102
+ resolve({ valid: false });
103
+ });
104
+
105
+ request.end();
106
+ });
107
+ } catch (error) {
108
+ console.error('URL validation error:', error.message);
109
+ return { valid: false };
110
+ }
111
+ }
@@ -1,20 +1,19 @@
1
1
  import { downloadFile, splitMediaFile } from './fileChunker.js';
2
2
  import { saveFileToBlob, deleteBlob, deleteGCS, uploadBlob, cleanup, cleanupGCS, gcsUrlExists, ensureGCSUpload, gcs, AZURE_STORAGE_CONTAINER_NAME } from './blobHandler.js';
3
3
  import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
4
- import { ensureEncoded, ensureFileExtension } from './helper.js';
4
+ import { ensureEncoded, ensureFileExtension, urlExists } from './helper.js';
5
5
  import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
6
6
  import { documentToText, easyChunker } from './docHelper.js';
7
- import { DOC_EXTENSIONS, isAcceptedMimeType } from './constants.js';
7
+ import { DOC_EXTENSIONS } from './constants.js';
8
8
  import path from 'path';
9
9
  import os from 'os';
10
10
  import { v4 as uuidv4 } from 'uuid';
11
11
  import fs from 'fs';
12
- import http from 'http';
13
- import https from 'https';
14
12
 
15
13
  const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
14
+ const useGCS = process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 || process.env.GCP_SERVICE_ACCOUNT_KEY ? true : false;
16
15
 
17
- console.log(`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${gcs ? ' and Google Cloud Storage' : ''}`);
16
+ console.log(`Storage configuration - ${useAzure ? 'Azure' : 'Local'} Storage${useGCS ? ' and Google Cloud Storage' : ''}`);
18
17
 
19
18
  let isCleanupRunning = false;
20
19
  async function cleanupInactive(context) {
@@ -76,48 +75,6 @@ async function cleanupInactive(context) {
76
75
  }
77
76
  }
78
77
 
79
- async function urlExists(url) {
80
- if(!url) return false;
81
-
82
- try {
83
- // Basic URL validation
84
- const urlObj = new URL(url);
85
- if (!['http:', 'https:'].includes(urlObj.protocol)) {
86
- throw new Error('Invalid protocol - only HTTP and HTTPS are supported');
87
- }
88
-
89
- const httpModule = urlObj.protocol === 'https:' ? https : http;
90
-
91
- return new Promise((resolve) => {
92
- const request = httpModule.request(url, { method: 'HEAD' }, function(response) {
93
- if (response.statusCode >= 200 && response.statusCode < 400) {
94
- const contentType = response.headers['content-type'];
95
- const cleanContentType = contentType ? contentType.split(';')[0].trim() : '';
96
- // Check if the content type is one we accept
97
- if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
98
- resolve({ valid: true, contentType: cleanContentType });
99
- } else {
100
- console.log(`Unsupported content type: ${contentType}`);
101
- resolve({ valid: false });
102
- }
103
- } else {
104
- resolve({ valid: false });
105
- }
106
- });
107
-
108
- request.on('error', function(err) {
109
- console.error('URL validation error:', err.message);
110
- resolve({ valid: false });
111
- });
112
-
113
- request.end();
114
- });
115
- } catch (error) {
116
- console.error('URL validation error:', error.message);
117
- return { valid: false };
118
- }
119
- }
120
-
121
78
  async function CortexFileHandler(context, req) {
122
79
  const { uri, requestId, save, hash, checkHash, clearHash, fetch, load, restore } = req.body?.params || req.query;
123
80
  const operation = save ? 'save' :
@@ -268,24 +225,55 @@ async function CortexFileHandler(context, req) {
268
225
 
269
226
  if(hashResult){
270
227
  context.log(`File exists in map: ${hash}`);
271
- const exists = await urlExists(hashResult?.url);
228
+
229
+ // Check primary storage (Azure/Local) first
230
+ const primaryExists = await urlExists(hashResult?.url);
231
+ const gcsExists = gcs ? await gcsUrlExists(hashResult?.gcs) : false;
272
232
 
273
- if(!exists.valid){
274
- context.log(`File is not in storage. Removing from map: ${hash}`);
233
+ // If neither storage has the file, remove from map and return not found
234
+ if (!primaryExists.valid && !gcsExists) {
235
+ context.log(`File not found in any storage. Removing from map: ${hash}`);
275
236
  await removeFromFileStoreMap(hash);
276
237
  context.res = {
277
238
  status: 404,
278
- body: `Hash ${hash} not found`
239
+ body: `Hash ${hash} not found in storage`
279
240
  };
280
241
  return;
281
242
  }
282
243
 
283
- if (gcs) {
284
- const gcsExists = await gcsUrlExists(hashResult?.gcs);
285
- if(!gcsExists){
286
- hashResult = await ensureGCSUpload(context, hashResult);
244
+ // If primary is missing but GCS exists, restore from GCS
245
+ if (!primaryExists.valid && gcsExists) {
246
+ context.log(`Primary storage file missing, restoring from GCS: ${hash}`);
247
+ try {
248
+ const res = await CortexFileHandler(context, {
249
+ method: 'GET',
250
+ body: { params: { fetch: hashResult.gcs } }
251
+ });
252
+ if (res?.body?.url) {
253
+ hashResult.url = res.body.url;
254
+ }
255
+ } catch (error) {
256
+ console.error('Error restoring from GCS:', error);
287
257
  }
288
258
  }
259
+ // If GCS is missing but primary exists, restore to GCS
260
+ else if (primaryExists.valid && gcs && !gcsExists) {
261
+ context.log(`GCS file missing, restoring from primary: ${hash}`);
262
+ const { gcs: _, ...fileInfo } = hashResult; // eslint-disable-line no-unused-vars
263
+ hashResult = await ensureGCSUpload(context, fileInfo);
264
+ }
265
+
266
+ // Final check to ensure we have at least one valid storage location
267
+ const finalPrimaryCheck = await urlExists(hashResult?.url);
268
+ if (!finalPrimaryCheck.valid && !await gcsUrlExists(hashResult?.gcs)) {
269
+ context.log(`Failed to restore file. Removing from map: ${hash}`);
270
+ await removeFromFileStoreMap(hash);
271
+ context.res = {
272
+ status: 404,
273
+ body: `Hash ${hash} not found and restoration failed`
274
+ };
275
+ return;
276
+ }
289
277
 
290
278
  //update redis timestamp with current time
291
279
  await setFileStoreMap(hash, hashResult);
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "1.0.15",
3
+ "version": "1.0.16",
4
4
  "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -43,6 +43,7 @@
43
43
  "timeout": "1m",
44
44
  "nodeArguments": [
45
45
  "--experimental-modules"
46
- ]
46
+ ],
47
+ "serial": true
47
48
  }
48
49
  }
@@ -18,7 +18,7 @@ node scripts/setup-azure-container.js
18
18
 
19
19
  # Run the tests
20
20
  echo "Running tests..."
21
- node -r dotenv/config node_modules/ava/entrypoints/cli.mjs
21
+ node -r dotenv/config node_modules/ava/entrypoints/cli.mjs "$@"
22
22
 
23
23
  # Store test result
24
24
  TEST_RESULT=$?
@@ -3,6 +3,7 @@ import express from "express";
3
3
  import { fileURLToPath } from 'url';
4
4
  import { dirname, join } from 'path';
5
5
  import cors from 'cors';
6
+ import { readFileSync } from 'fs';
6
7
 
7
8
  import { publicIpv4 } from 'public-ip';
8
9
  const ipAddress = await publicIpv4();
@@ -11,10 +12,22 @@ const app = express();
11
12
  const port = process.env.PORT || 7071;
12
13
  const publicFolder = join(dirname(fileURLToPath(import.meta.url)), 'files');
13
14
 
15
+ // Get version from package.json
16
+ const packageJson = JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), 'package.json'), 'utf8'));
17
+ const version = packageJson.version;
18
+
14
19
  app.use(cors());
15
20
  // Serve static files from the public folder
16
21
  app.use('/files', express.static(publicFolder));
17
22
 
23
+ // Health check endpoint
24
+ app.get('/health', (req, res) => {
25
+ res.status(200).json({
26
+ status: 'healthy',
27
+ version: version
28
+ });
29
+ });
30
+
18
31
  // New primary endpoint
19
32
  app.all('/api/CortexFileHandler', async (req, res) => {
20
33
  const context = { req, res, log: console.log }
@@ -44,7 +57,7 @@ app.all('/api/MediaFileChunker', async (req, res) => {
44
57
  });
45
58
 
46
59
  app.listen(port, () => {
47
- console.log(`Cortex File Handler running on port ${port} (includes legacy MediaFileChunker endpoint)`);
60
+ console.log(`Cortex File Handler v${version} running on port ${port} (includes legacy MediaFileChunker endpoint)`);
48
61
  });
49
62
 
50
63
  export { port, publicFolder, ipAddress };