@aj-archipelago/cortex 1.1.19 → 1.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  {
2
- "defaultModelName": "oai-td3",
2
+ "defaultModelName": "oai-gpturbo",
3
3
  "models": {
4
4
  "azure-translate": {
5
5
  "type": "AZURE-TRANSLATE",
@@ -12,19 +12,6 @@
12
12
  "requestsPerSecond": 10,
13
13
  "maxTokenLength": 2000
14
14
  },
15
- "oai-td3": {
16
- "type": "OPENAI-COMPLETION",
17
- "url": "https://api.openai.com/v1/completions",
18
- "headers": {
19
- "Authorization": "Bearer {{OPENAI_API_KEY}}",
20
- "Content-Type": "application/json"
21
- },
22
- "params": {
23
- "model": "text-davinci-003"
24
- },
25
- "requestsPerSecond": 10,
26
- "maxTokenLength": 4096
27
- },
28
15
  "oai-gpturbo": {
29
16
  "type": "OPENAI-CHAT",
30
17
  "url": "https://api.openai.com/v1/chat/completions",
package/config.js CHANGED
@@ -111,6 +111,13 @@ var config = convict({
111
111
  "model": "whisper-1"
112
112
  },
113
113
  },
114
+ "neuralspace": {
115
+ "type": "NEURALSPACE",
116
+ "url": "https://voice.neuralspace.ai/api/v2/jobs",
117
+ "headers": {
118
+ "Authorization": "{{NEURALSPACE_API_KEY}}",
119
+ },
120
+ },
114
121
  "azure-cognitive": {
115
122
  "type": "AZURE-COGNITIVE",
116
123
  "url": "{{{AZURE_COGNITIVE_API_URL}}}",
@@ -222,6 +229,11 @@ var config = convict({
222
229
  default: 0,
223
230
  env: 'SUBSCRIPTION_KEEP_ALIVE'
224
231
  },
232
+ neuralSpaceApiKey: {
233
+ format: String,
234
+ default: null,
235
+ env: 'NEURALSPACE_API_KEY'
236
+ },
225
237
  });
226
238
 
227
239
  // Read in environment variables and set up service configuration
@@ -11,6 +11,7 @@ import { join } from "path";
11
11
  import { Storage } from "@google-cloud/storage";
12
12
  import axios from "axios";
13
13
  import { publicFolder, port, ipAddress } from "./start.js";
14
+ import mime from "mime-types";
14
15
 
15
16
  const IMAGE_EXTENSIONS = [
16
17
  ".jpg",
@@ -21,6 +22,7 @@ const IMAGE_EXTENSIONS = [
21
22
  ".webp",
22
23
  ".tiff",
23
24
  ".svg",
25
+ ".pdf"
24
26
  ];
25
27
 
26
28
  const VIDEO_EXTENSIONS = [
@@ -74,7 +76,34 @@ if (!GCP_PROJECT_ID || !GCP_SERVICE_ACCOUNT) {
74
76
 
75
77
  const GCS_BUCKETNAME = process.env.GCS_BUCKETNAME || "cortextempfiles";
76
78
 
77
- const getBlobClient = () => {
79
+
80
+ async function gcsUrlExists(url, defaultReturn = true) {
81
+ try {
82
+ if(!url) {
83
+ return defaultReturn; // Cannot check return
84
+ }
85
+ if (!gcs) {
86
+ console.warn('GCS environment variables are not set. Unable to check if URL exists in GCS.');
87
+ return defaultReturn; // Cannot check return
88
+ }
89
+
90
+ const urlParts = url.replace('gs://', '').split('/');
91
+ const bucketName = urlParts[0];
92
+ const fileName = urlParts.slice(1).join('/');
93
+
94
+ const bucket = gcs.bucket(bucketName);
95
+ const file = bucket.file(fileName);
96
+
97
+ const [exists] = await file.exists();
98
+
99
+ return exists;
100
+ } catch (error) {
101
+ console.error('Error checking if GCS URL exists:', error);
102
+ return false;
103
+ }
104
+ }
105
+
106
+ const getBlobClient = async () => {
78
107
  const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
79
108
  const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
80
109
  if (!connectionString || !containerName) {
@@ -83,17 +112,23 @@ const getBlobClient = () => {
83
112
  );
84
113
  }
85
114
 
86
- const blobServiceClient =
87
- BlobServiceClient.fromConnectionString(connectionString);
115
+ const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
116
+
117
+ const serviceProperties = await blobServiceClient.getProperties();
118
+ if(!serviceProperties.defaultServiceVersion) {
119
+ serviceProperties.defaultServiceVersion = '2020-02-10';
120
+ await blobServiceClient.setProperties(serviceProperties);
121
+ }
122
+
88
123
  const containerClient = blobServiceClient.getContainerClient(containerName);
89
124
 
90
125
  return { blobServiceClient, containerClient };
91
126
  };
92
127
 
93
128
  async function saveFileToBlob(chunkPath, requestId) {
94
- const { containerClient } = getBlobClient();
129
+ const { containerClient } = await getBlobClient();
95
130
  // Use the filename with a UUID as the blob name
96
- const blobName = `${requestId}/${uuidv4()}_${path.basename(chunkPath)}`;
131
+ const blobName = `${requestId}/${uuidv4()}_${encodeURIComponent(path.basename(chunkPath))}`;
97
132
 
98
133
  // Create a read stream for the chunk file
99
134
  const fileStream = fs.createReadStream(chunkPath);
@@ -110,7 +145,7 @@ async function saveFileToBlob(chunkPath, requestId) {
110
145
  //deletes blob that has the requestId
111
146
  async function deleteBlob(requestId) {
112
147
  if (!requestId) throw new Error("Missing requestId parameter");
113
- const { containerClient } = getBlobClient();
148
+ const { containerClient } = await getBlobClient();
114
149
  // List the blobs in the container with the specified prefix
115
150
  const blobs = containerClient.listBlobsFlat({ prefix: `${requestId}/` });
116
151
 
@@ -166,7 +201,7 @@ async function uploadBlob(
166
201
  const localPath = join(publicFolder, requestId);
167
202
  fs.mkdirSync(localPath, { recursive: true });
168
203
 
169
- const filename = `${uuidv4()}_${info.filename}`;
204
+ const filename = encodeURIComponent(`${uuidv4()}_${info.filename}`);
170
205
  const destinationPath = `${localPath}/${filename}`;
171
206
 
172
207
  await pipeline(file, fs.createWriteStream(destinationPath));
@@ -180,15 +215,21 @@ async function uploadBlob(
180
215
 
181
216
  resolve(body); // Resolve the promise
182
217
  } else {
183
- const filename = `${requestId}/${uuidv4()}_${info.filename}`;
184
- const { containerClient } = getBlobClient();
218
+ const filename = encodeURIComponent(`${requestId}/${uuidv4()}_${info.filename}`);
219
+ const { containerClient } = await getBlobClient();
220
+
221
+ const contentType = mime.lookup(filename); // content type based on file extension
222
+ const options = {};
223
+ if (contentType) {
224
+ options.blobHTTPHeaders = { blobContentType: contentType };
225
+ }
185
226
 
186
227
  const blockBlobClient = containerClient.getBlockBlobClient(filename);
187
228
 
188
229
  const passThroughStream = new PassThrough();
189
230
  file.pipe(passThroughStream);
190
231
 
191
- await blockBlobClient.uploadStream(passThroughStream);
232
+ await blockBlobClient.uploadStream(passThroughStream, undefined, undefined, options);
192
233
 
193
234
  const message = `File '${filename}' uploaded successfully.`;
194
235
  const url = blockBlobClient.url;
@@ -203,7 +244,7 @@ async function uploadBlob(
203
244
 
204
245
  if (useGoogle && useGoogle !== "false") {
205
246
  const { url } = body;
206
- const filename = `${requestId}/${uuidv4()}_${info.filename}`;
247
+ const filename = encodeURIComponent(`${requestId}/${uuidv4()}_${info.filename}`);
207
248
  const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(filename);
208
249
  const writeStream = gcsFile.createWriteStream();
209
250
 
@@ -250,7 +291,7 @@ async function uploadBlob(
250
291
 
251
292
  // Function to delete files that haven't been used in more than a month
252
293
  async function cleanup(urls=null) {
253
- const { containerClient } = getBlobClient();
294
+ const { containerClient } = await getBlobClient();
254
295
 
255
296
  if(!urls) {
256
297
  const xMonthAgo = new Date();
@@ -334,4 +375,4 @@ async function cleanupGCS(urls=null) {
334
375
  return cleanedURLs;
335
376
  }
336
377
 
337
- export { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS };
378
+ export { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS, gcsUrlExists };
@@ -1,5 +1,5 @@
1
1
  import { downloadFile, processYoutubeUrl, splitMediaFile } from './fileChunker.js';
2
- import { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS } from './blobHandler.js';
2
+ import { saveFileToBlob, deleteBlob, uploadBlob, cleanup, cleanupGCS, gcsUrlExists } from './blobHandler.js';
3
3
  import { cleanupRedisFileStoreMap, getFileStoreMap, publishRequestProgress, removeFromFileStoreMap, setFileStoreMap } from './redis.js';
4
4
  import { deleteTempPath, ensureEncoded, isValidYoutubeUrl } from './helper.js';
5
5
  import { moveFileToPublicFolder, deleteFolder, cleanupLocal } from './localFileHandler.js';
@@ -122,14 +122,15 @@ async function main(context, req) {
122
122
  context.log(`Checking hash: ${hash}`);
123
123
  const result = await getFileStoreMap(hash);
124
124
 
125
- const exists = await urlExists(result?.url);
125
+ if(result){
126
+ const exists = await urlExists(result?.url);
127
+ const gcsExists = await gcsUrlExists(result?.gcs);
126
128
 
127
- if(!exists){
128
- await removeFromFileStoreMap(hash);
129
- return;
130
- }
129
+ if(!exists || !gcsExists){
130
+ await removeFromFileStoreMap(hash);
131
+ return;
132
+ }
131
133
 
132
- if(result){
133
134
  context.log(`Hash exists: ${hash}`);
134
135
  //update redis timestamp with current time
135
136
  await setFileStoreMap(hash, result);
@@ -5,6 +5,7 @@
5
5
  "type": "module",
6
6
  "scripts": {
7
7
  "start": "node start.js",
8
+ "dev": "node -r dotenv/config start.js",
8
9
  "test": "echo \"No tests yet...\""
9
10
  },
10
11
  "dependencies": {
@@ -3,7 +3,11 @@ import { encode, decode } from '../lib/encodeCache.js';
3
3
  import { config } from '../config.js';
4
4
 
5
5
  // callPathway - call a pathway from another pathway
6
- const callPathway = async (pathwayName, args) => {
6
+ const callPathway = async (pathwayName, inArgs, pathwayResolver) => {
7
+
8
+ // Clone the args object to avoid modifying the original
9
+ const args = JSON.parse(JSON.stringify(inArgs));
10
+
7
11
  const pathway = config.get(`pathways.${pathwayName}`);
8
12
  if (!pathway) {
9
13
  throw new Error(`Pathway ${pathwayName} not found`);
@@ -11,6 +15,12 @@ const callPathway = async (pathwayName, args) => {
11
15
  const requestState = {};
12
16
  const parent = {};
13
17
  const data = await pathway.rootResolver(parent, args, { config, pathway, requestState } );
18
+
19
+ // Merge the results into the pathwayResolver if it was provided
20
+ if (pathwayResolver) {
21
+ pathwayResolver.mergeResults(data);
22
+ }
23
+
14
24
  return data?.result;
15
25
  };
16
26
 
@@ -184,6 +184,7 @@ setInterval(() => {
184
184
  }, 30000); // Log rates every 30 seconds
185
185
 
186
186
  const requestWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
187
+ //logger.warn(`Requesting ${url} with data: ${JSON.stringify(data)}`);
187
188
  const callId = endpoint?.monitor?.startCall();
188
189
  let response;
189
190
  try {
@@ -356,14 +357,13 @@ const executeRequest = async (cortexRequest) => {
356
357
  if (cached) {
357
358
  logger.info(`<<< [${requestId}] served with cached response.`);
358
359
  }
359
- if (error && error.length > 0) {
360
- const lastError = error[error.length - 1];
361
- return { error: lastError.toJSON() ?? lastError ?? error };
360
+ if (error) {
361
+ throw { error: error.toJSON() ?? error };
362
362
  }
363
363
  return { data, duration };
364
364
  } catch (error) {
365
365
  logger.error(`Error in request: ${error.message || error}`);
366
- return { error: error };
366
+ throw error;
367
367
  }
368
368
  }
369
369
 
package/lib/util.js CHANGED
@@ -1,3 +1,22 @@
1
+ import logger from "./logger.js";
2
+ import stream from 'stream';
3
+ import subsrt from 'subsrt';
4
+ import os from 'os';
5
+ import path from 'path';
6
+ import http from 'http';
7
+ import https from 'https';
8
+ import { URL } from 'url';
9
+ import { v4 as uuidv4 } from 'uuid';
10
+ import { promisify } from 'util';
11
+ import { axios } from './requestExecutor.js';
12
+ import { config } from '../config.js';
13
+ import fs from 'fs';
14
+
15
+ const pipeline = promisify(stream.pipeline);
16
+ const MEDIA_API_URL = config.get('whisperMediaApiUrl');
17
+
18
+
19
+
1
20
  function convertToSingleContentChatHistory(chatHistory){
2
21
  for(let i=0; i<chatHistory.length; i++){
3
22
  //if isarray make it single string
@@ -29,8 +48,151 @@ function chatArgsHasImageUrl(args){
29
48
  return chatArgsHasType(args, 'image_url');
30
49
  }
31
50
 
51
+
52
+ async function deleteTempPath(path) {
53
+ try {
54
+ if (!path) {
55
+ logger.warn('Temporary path is not defined.');
56
+ return;
57
+ }
58
+ if (!fs.existsSync(path)) {
59
+ logger.warn(`Temporary path ${path} does not exist.`);
60
+ return;
61
+ }
62
+ const stats = fs.statSync(path);
63
+ if (stats.isFile()) {
64
+ fs.unlinkSync(path);
65
+ logger.info(`Temporary file ${path} deleted successfully.`);
66
+ } else if (stats.isDirectory()) {
67
+ fs.rmSync(path, { recursive: true });
68
+ logger.info(`Temporary folder ${path} and its contents deleted successfully.`);
69
+ }
70
+ } catch (err) {
71
+ logger.error(`Error occurred while deleting the temporary path: ${err}`);
72
+ }
73
+ }
74
+
75
+ function generateUniqueFilename(extension) {
76
+ return `${uuidv4()}.${extension}`;
77
+ }
78
+
79
+ const downloadFile = async (fileUrl) => {
80
+ const fileExtension = path.extname(fileUrl).slice(1);
81
+ const uniqueFilename = generateUniqueFilename(fileExtension);
82
+ const tempDir = os.tmpdir();
83
+ const localFilePath = `${tempDir}/${uniqueFilename}`;
84
+
85
+ // eslint-disable-next-line no-async-promise-executor
86
+ return new Promise(async (resolve, reject) => {
87
+ try {
88
+ const parsedUrl = new URL(fileUrl);
89
+ const protocol = parsedUrl.protocol === 'https:' ? https : http;
90
+
91
+ const response = await new Promise((resolve, reject) => {
92
+ protocol.get(parsedUrl, (res) => {
93
+ if (res.statusCode === 200) {
94
+ resolve(res);
95
+ } else {
96
+ reject(new Error(`HTTP request failed with status code ${res.statusCode}`));
97
+ }
98
+ }).on('error', reject);
99
+ });
100
+
101
+ await pipeline(response, fs.createWriteStream(localFilePath));
102
+ logger.info(`Downloaded file to ${localFilePath}`);
103
+ resolve(localFilePath);
104
+ } catch (error) {
105
+ fs.unlink(localFilePath, () => {
106
+ reject(error);
107
+ });
108
+ //throw error;
109
+ }
110
+ });
111
+ };
112
+
113
+ // convert srt format to text
114
+ function convertSrtToText(str) {
115
+ return str
116
+ .split('\n')
117
+ .filter(line => !line.match(/^\d+$/) && !line.match(/^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$/) && line !== '')
118
+ .join(' ');
119
+ }
120
+
121
+ function alignSubtitles(subtitles, format, offsets) {
122
+ const result = [];
123
+
124
+ function preprocessStr(str) {
125
+ try{
126
+ if(!str) return '';
127
+ return str.trim().replace(/(\n\n)(?!\n)/g, '\n\n\n');
128
+ }catch(e){
129
+ logger.error(`An error occurred in content text preprocessing: ${e}`);
130
+ return '';
131
+ }
132
+ }
133
+
134
+ function shiftSubtitles(subtitle, shiftOffset) {
135
+ const captions = subsrt.parse(preprocessStr(subtitle));
136
+ const resynced = subsrt.resync(captions, { offset: shiftOffset });
137
+ return resynced;
138
+ }
139
+
140
+ for (let i = 0; i < subtitles.length; i++) {
141
+ result.push(...shiftSubtitles(subtitles[i], offsets[i]*1000)); // convert to milliseconds
142
+ }
143
+
144
+ try {
145
+ //if content has needed html style tags, keep them
146
+ for(const obj of result) {
147
+ if(obj && obj.content){
148
+ obj.text = obj.content;
149
+ }
150
+ }
151
+ } catch (error) {
152
+ logger.error(`An error occurred in content text parsing: ${error}`);
153
+ }
154
+
155
+ return subsrt.build(result, { format: format === 'vtt' ? 'vtt' : 'srt' });
156
+ }
157
+
158
+
159
+ async function getMediaChunks(file, requestId) {
160
+ try {
161
+ if (MEDIA_API_URL) {
162
+ //call helper api and get list of file uris
163
+ const res = await axios.get(MEDIA_API_URL, { params: { uri: file, requestId } });
164
+ return res.data;
165
+ } else {
166
+ logger.info(`No API_URL set, returning file as chunk`);
167
+ return [file];
168
+ }
169
+ } catch (err) {
170
+ logger.error(`Error getting media chunks list from api: ${err}`);
171
+ throw err;
172
+ }
173
+ }
174
+
175
+ async function markCompletedForCleanUp(requestId) {
176
+ try {
177
+ if (MEDIA_API_URL) {
178
+ //call helper api to mark processing as completed
179
+ const res = await axios.delete(MEDIA_API_URL, { params: { requestId } });
180
+ logger.info(`Marked request ${requestId} as completed:`, res.data);
181
+ return res.data;
182
+ }
183
+ } catch (err) {
184
+ logger.error(`Error marking request ${requestId} as completed: ${err}`);
185
+ }
186
+ }
187
+
32
188
  export {
33
189
  convertToSingleContentChatHistory,
34
190
  chatArgsHasImageUrl,
35
- chatArgsHasType
191
+ chatArgsHasType,
192
+ deleteTempPath,
193
+ downloadFile,
194
+ convertSrtToText,
195
+ alignSubtitles,
196
+ getMediaChunks,
197
+ markCompletedForCleanUp
36
198
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.1.19",
3
+ "version": "1.1.21",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
package/pathways/index.js CHANGED
@@ -16,6 +16,7 @@ import test_cohere_summarize from './test_cohere_summarize.js';
16
16
  import test_langchain from './test_langchain.mjs';
17
17
  import test_palm_chat from './test_palm_chat.js';
18
18
  import transcribe from './transcribe.js';
19
+ import transcribe_neuralspace from './transcribe_neuralspace.js';
19
20
  import translate from './translate.js';
20
21
  import embeddings from './embeddings.js';
21
22
  import vision from './vision.js';
@@ -42,6 +43,7 @@ export {
42
43
  test_langchain,
43
44
  test_palm_chat,
44
45
  transcribe,
46
+ transcribe_neuralspace,
45
47
  translate,
46
48
  vision,
47
49
  };
@@ -0,0 +1,18 @@
1
+ export default {
2
+ prompt: `{{text}}`,
3
+ model: `neuralspace`,
4
+ inputParameters: {
5
+ file: ``,
6
+ language: ``,
7
+ responseFormat: `text`,
8
+ wordTimestamped: false,
9
+ highlightWords: false,
10
+ maxLineWidth: 0,
11
+ maxLineCount: 0,
12
+ maxWordsPerLine: 0,
13
+ },
14
+ timeout: 3600, // in seconds
15
+ enableDuplicateRequests: false,
16
+ };
17
+
18
+
@@ -23,6 +23,7 @@ import Gemini15ChatPlugin from './plugins/gemini15ChatPlugin.js';
23
23
  import Gemini15VisionPlugin from './plugins/gemini15VisionPlugin.js';
24
24
  import AzureBingPlugin from './plugins/azureBingPlugin.js';
25
25
  import Claude3VertexPlugin from './plugins/claude3VertexPlugin.js';
26
+ import NeuralSpacePlugin from './plugins/neuralSpacePlugin.js';
26
27
 
27
28
  class ModelExecutor {
28
29
  constructor(pathway, model) {
@@ -57,6 +58,9 @@ class ModelExecutor {
57
58
  case 'OPENAI-WHISPER':
58
59
  plugin = new OpenAIWhisperPlugin(pathway, model);
59
60
  break;
61
+ case 'NEURALSPACE':
62
+ plugin = new NeuralSpacePlugin(pathway, model);
63
+ break;
60
64
  case 'LOCAL-CPP-MODEL':
61
65
  plugin = new LocalModelPlugin(pathway, model);
62
66
  break;
@@ -25,8 +25,10 @@ class PathwayResolver {
25
25
  this.useInputChunking = pathway.useInputChunking;
26
26
  this.chunkMaxTokenLength = 0;
27
27
  this.warnings = [];
28
+ this.errors = [];
28
29
  this.requestId = uuidv4();
29
30
  this.responseParser = new PathwayResponseParser(pathway);
31
+ this.tool = null;
30
32
  this.modelName = [
31
33
  pathway.model,
32
34
  args?.model,
@@ -178,6 +180,15 @@ class PathwayResolver {
178
180
  }
179
181
  }
180
182
 
183
+ mergeResults(mergeData) {
184
+ if (mergeData) {
185
+ this.previousResult = mergeData.previousResult ? mergeData.previousResult : this.previousResult;
186
+ this.warnings = [...this.warnings, ...(mergeData.warnings || [])];
187
+ this.errors = [...this.errors, ...(mergeData.errors || [])];
188
+ this.tool = mergeData.tool || this.tool;
189
+ }
190
+ }
191
+
181
192
  async resolve(args) {
182
193
  // Either we're dealing with an async request, stream, or regular request
183
194
  if (args.async || args.stream) {
@@ -244,6 +255,12 @@ class PathwayResolver {
244
255
  logger.warn(warning);
245
256
  }
246
257
 
258
+ // Add an error and log it
259
+ logError(error) {
260
+ this.errors.push(error);
261
+ logger.error(error);
262
+ }
263
+
247
264
  // Here we choose how to handle long input - either summarize or chunk
248
265
  processInputText(text) {
249
266
  let chunkTokenLength = 0;
@@ -416,7 +433,8 @@ class PathwayResolver {
416
433
  }
417
434
  }
418
435
 
419
- if (prompt.saveResultTo) {
436
+ // save the result to the context if requested and no errors
437
+ if (prompt.saveResultTo && this.errors.length === 0) {
420
438
  this.savedContext[prompt.saveResultTo] = result;
421
439
  }
422
440
  return result;