npm - @aj-archipelago/cortex - Versions diffs - 1.1.21 → 1.1.23 - Mend

@aj-archipelago/cortex 1.1.21 → 1.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/config/default.example.json +84 -0
package/config.js +5 -4
package/helper-apps/cortex-file-handler/blobHandler.js +115 -98
package/helper-apps/cortex-file-handler/fileChunker.js +15 -10
package/helper-apps/cortex-file-handler/index.js +48 -2
package/helper-apps/cortex-file-handler/package-lock.json +226 -53
package/helper-apps/cortex-file-handler/package.json +3 -3
package/package.json +2 -1
package/pathways/categorize.js +23 -0
package/pathways/chat.js +1 -1
package/pathways/chat_code.js +19 -0
package/pathways/chat_context.js +19 -0
package/pathways/chat_jarvis.js +19 -0
package/pathways/chat_persist.js +23 -0
package/pathways/code_review.js +17 -0
package/pathways/cognitive_delete.js +2 -1
package/pathways/cognitive_insert.js +1 -0
package/pathways/cognitive_search.js +1 -0
package/pathways/embeddings.js +1 -1
package/pathways/expand_story.js +12 -0
package/pathways/format_paragraph_turbo.js +16 -0
package/pathways/format_summarization.js +21 -0
package/pathways/gemini_15_vision.js +20 -0
package/pathways/gemini_vision.js +20 -0
package/pathways/grammar.js +30 -0
package/pathways/hashtags.js +19 -0
package/pathways/headline.js +43 -0
package/pathways/headline_custom.js +169 -0
package/pathways/highlights.js +22 -0
package/pathways/image.js +2 -1
package/pathways/index.js +111 -17
package/pathways/jira_story.js +18 -0
package/pathways/keywords.js +4 -0
package/pathways/language.js +17 -6
package/pathways/locations.js +93 -0
package/pathways/quotes.js +19 -0
package/pathways/rag.js +207 -0
package/pathways/rag_jarvis.js +254 -0
package/pathways/rag_search_helper.js +21 -0
package/pathways/readme.js +18 -0
package/pathways/release_notes.js +16 -0
package/pathways/remove_content.js +31 -0
package/pathways/retrieval.js +23 -0
package/pathways/run_claude35_sonnet.js +21 -0
package/pathways/run_claude3_haiku.js +20 -0
package/pathways/run_gpt35turbo.js +20 -0
package/pathways/run_gpt4.js +20 -0
package/pathways/run_gpt4_32.js +20 -0
package/pathways/select_extension.js +6 -0
package/pathways/select_services.js +10 -0
package/pathways/spelling.js +3 -0
package/pathways/story_angles.js +13 -0
package/pathways/styleguide/styleguide.js +221 -0
package/pathways/styleguidemulti.js +127 -0
package/pathways/subhead.js +48 -0
package/pathways/summarize_turbo.js +98 -0
package/pathways/summary.js +31 -12
package/pathways/sys_claude_35_sonnet.js +19 -0
package/pathways/sys_claude_3_haiku.js +19 -0
package/pathways/sys_google_chat.js +19 -0
package/pathways/sys_google_code_chat.js +19 -0
package/pathways/sys_google_gemini_chat.js +23 -0
package/pathways/sys_openai_chat.js +2 -2
package/pathways/sys_openai_chat_16.js +19 -0
package/pathways/sys_openai_chat_gpt4.js +19 -0
package/pathways/sys_openai_chat_gpt4_32.js +19 -0
package/pathways/sys_openai_chat_gpt4_turbo.js +19 -0
package/pathways/tags.js +25 -0
package/pathways/taxonomy.js +135 -0
package/pathways/timeline.js +51 -0
package/pathways/topics.js +25 -0
package/pathways/topics_sentiment.js +20 -0
package/pathways/transcribe.js +2 -4
package/pathways/translate.js +10 -12
package/pathways/translate_azure.js +13 -0
package/pathways/translate_context.js +21 -0
package/pathways/translate_gpt4.js +19 -0
package/pathways/translate_gpt4_turbo.js +19 -0
package/pathways/translate_subtitle.js +201 -0
package/pathways/translate_subtitle_helper.js +31 -0
package/pathways/translate_turbo.js +19 -0
package/pathways/vision.js +9 -7
package/server/pathwayResolver.js +1 -1
package/server/plugins/azureCognitivePlugin.js +10 -1
package/server/plugins/openAiVisionPlugin.js +14 -6
package/tests/main.test.js +62 -2
package/tests/sublong.srt +4543 -0
package/tests/vision.test.js +0 -34

package/config/default.example.json CHANGED Viewed

@@ -12,6 +12,62 @@
             "requestsPerSecond": 10,
             "maxTokenLength": 2000
         },
+        "gemini-pro-chat": {
+            "type": "GEMINI-CHAT",
+            "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent",
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 32768,
+            "maxReturnTokens": 8192,
+            "supportsStreaming": true
+        },
+        "gemini-pro-vision": {
+            "type": "GEMINI-VISION",
+            "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-pro-vision:streamGenerateContent",
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 32768,
+            "maxReturnTokens": 2048,
+            "supportsStreaming": true
+        },
+        "gemini-pro-15-vision": {
+            "type": "GEMINI-VISION",
+            "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/gemini-1.5-pro-preview-0215:streamGenerateContent",
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 1048576,
+            "maxReturnTokens": 2048,
+            "supportsStreaming": true
+        },
+        "claude-3-haiku-vertex": {
+            "type": "CLAUDE-3-VERTEX",
+            "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/anthropic/models/claude-3-haiku@20240307",
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 200000,
+            "maxReturnTokens": 2048,
+            "supportsStreaming": true
+        },
+        "claude-35-sonnet-vertex": {
+            "type": "CLAUDE-3-VERTEX",
+            "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/anthropic/models/claude-3-5-sonnet@20240229",
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 200000,
+            "maxReturnTokens": 2048,
+            "supportsStreaming": true
+        },
         "oai-gpturbo": {
             "type": "OPENAI-CHAT",
             "url": "https://api.openai.com/v1/chat/completions",
@@ -38,6 +94,34 @@
             "requestsPerSecond": 10,
             "maxTokenLength": 8192
         },
+        "oai-gpt4-32": {
+            "type": "OPENAI-CHAT",
+            "url": "https://api.openai.com/v1/chat/completions",
+            "headers": {
+                "Authorization": "Bearer {{OPENAI_API_KEY}}",
+                "Content-Type": "application/json"
+            },
+            "params": {
+                "model": "gpt-4-32"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 32768
+        },
+        "oai-gpt4o": {
+            "type": "OPENAI-VISION",
+            "url": "https://api.openai.com/v1/chat/completions",
+            "headers": {
+                "Authorization": "Bearer {{OPENAI_API_KEY}}",
+                "Content-Type": "application/json"
+            },
+            "params": {
+                "model": "gpt-4o"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 131072,
+            "maxReturnTokens": 4096,
+            "supportsStreaming": true
+        },
         "palm-text": {
             "type": "PALM-COMPLETION",
             "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/text-bison@001:predict",

package/config.js CHANGED Viewed

@@ -139,7 +139,7 @@ var config = convict({
                 },
                 "maxTokenLength": 8192,
             },
-            "oai-gpt4-vision": {
+            "oai-gpt4o": {
                 "type": "OPENAI-VISION",
                 "url": "https://api.openai.com/v1/chat/completions",
                 "headers": {
@@ -147,10 +147,11 @@ var config = convict({
                     "Content-Type": "application/json"
                 },
                 "params": {
-                    "model": "gpt-4-vision-preview"
+                    "model": "gpt-4o"
                 },
-                "requestsPerSecond": 1,
-                "maxTokenLength": 128000,
+                "requestsPerSecond": 50,
+                "maxTokenLength": 131072,
+                "maxReturnTokens": 4096,
                 "supportsStreaming": true
             },
             "azure-bing": {

package/helper-apps/cortex-file-handler/blobHandler.js CHANGED Viewed

@@ -36,6 +36,15 @@ const VIDEO_EXTENSIONS = [
   ".mkv",
 ];
+const AUDIO_EXTENSIONS = [
+  ".mp3",
+  ".wav",
+  ".ogg",
+  ".flac",
+  ".aac",
+  ".aiff",
+];
 function isBase64(str) {
   try {
     return btoa(atob(str)) == str;
@@ -162,131 +171,139 @@ async function deleteBlob(requestId) {
   return result;
 }
-async function uploadBlob(
-  context,
-  req,
-  saveToLocal = false,
-  useGoogle = false
-) {
+async function uploadBlob(context, req, saveToLocal = false, useGoogle = false, filePath=null) {
   return new Promise((resolve, reject) => {
     try {
-      const busboy = Busboy({ headers: req.headers });
       let requestId = uuidv4();
       let body = {};
-      busboy.on("field", (fieldname, value) => {
-        if (fieldname === "requestId") {
-          requestId = value;
-        } else if (fieldname === "useGoogle") {
-          useGoogle = value;
-        }
-      });
-      busboy.on("file", async (fieldname, file, info) => {
-        //do not use google if file is not image or video
-        const ext = path.extname(info.filename).toLowerCase();
-        const canUseGoogle = IMAGE_EXTENSIONS.includes(ext) || VIDEO_EXTENSIONS.includes(ext);
-        if(!canUseGoogle) {
-          useGoogle = false;
-        }
+      // If filePath is given, we are dealing with local file and not form-data
+      if (filePath) {
+        const file = fs.createReadStream(filePath);
+        const filename = path.basename(filePath);
+        uploadFile(context, requestId, body, saveToLocal, useGoogle, file, filename, resolve)
+      } else {
+        // Otherwise, continue working with form-data
+        const busboy = Busboy({ headers: req.headers });
+        busboy.on("field", (fieldname, value) => {
+          if (fieldname === "requestId") {
+            requestId = value;
+          } else if (fieldname === "useGoogle") {
+            useGoogle = value;
+          }
+        });
+        busboy.on("file", async (fieldname, file, filename) => {
+          uploadFile(context, requestId, body, saveToLocal, useGoogle, file, filename?.filename || filename, resolve)
+        });
+        busboy.on("error", (error) => {
+          context.log.error("Error processing file upload:", error);
+          context.res = {
+            status: 500,
+            body: "Error processing file upload.",
+          };
+          reject(error); // Reject the promise
+        });
+        req.pipe(busboy);
+      }
+    } catch (error) {
+      context.log.error("Error processing file upload:", error);
+      context.res = {
+        status: 500,
+        body: "Error processing file upload.",
+      };
+      reject(error); // Reject the promise
+    }
+  });
+}
-        //check if useGoogle is set but no gcs and warn
-        if(useGoogle && useGoogle !== "false" && !gcs) {
-          context.log.warn("Google Cloud Storage is not initialized reverting google upload ");
-          useGoogle = false;
-        }
+async function uploadFile(context, requestId, body, saveToLocal, useGoogle, file, filename, resolve) {
+  // do not use Google if the file is not an image or video
+  const ext = path.extname(filename).toLowerCase();
+  const canUseGoogle = IMAGE_EXTENSIONS.includes(ext) || VIDEO_EXTENSIONS.includes(ext) || AUDIO_EXTENSIONS.includes(ext);
+  if (!canUseGoogle) {
+    useGoogle = false;
+  }
-        if (saveToLocal) {
-          // Create the target folder if it doesn't exist
-          const localPath = join(publicFolder, requestId);
-          fs.mkdirSync(localPath, { recursive: true });
+  // check if useGoogle is set but no gcs and warn
+  if (useGoogle && useGoogle !== "false" && !gcs) {
+    context.log.warn("Google Cloud Storage is not initialized reverting google upload ");
+    useGoogle = false;
+  }
-          const filename = encodeURIComponent(`${uuidv4()}_${info.filename}`);
-          const destinationPath = `${localPath}/${filename}`;
+  const encodedFilename = encodeURIComponent(`${requestId || uuidv4()}_${filename}`);
-          await pipeline(file, fs.createWriteStream(destinationPath));
-          const message = `File '${filename}' saved to folder successfully.`;
-          context.log(message);
+  if (saveToLocal) {
+    // create the target folder if it doesn't exist
+    const localPath = join(publicFolder, requestId);
+    fs.mkdirSync(localPath, { recursive: true });
-          const url = `http://${ipAddress}:${port}/files/${requestId}/${filename}`;
+    const destinationPath = `${localPath}/${encodedFilename}`;
-          body = { message, url };
+    await pipeline(file, fs.createWriteStream(destinationPath));
-          resolve(body); // Resolve the promise
-        } else {
-          const filename = encodeURIComponent(`${requestId}/${uuidv4()}_${info.filename}`);
-          const { containerClient } = await getBlobClient();
+    const message = `File '${encodedFilename}' saved to folder successfully.`;
+    context.log(message);
-          const contentType = mime.lookup(filename);  // content type based on file extension
-          const options = {};
-          if (contentType) {
-            options.blobHTTPHeaders = { blobContentType: contentType };
-          }
+    const url = `http://${ipAddress}:${port}/files/${requestId}/${encodedFilename}`;
-          const blockBlobClient = containerClient.getBlockBlobClient(filename);
+    body = { message, url };
-          const passThroughStream = new PassThrough();
-          file.pipe(passThroughStream);
+    resolve(body); // Resolve the promise
+  } else {
+    const { containerClient } = await getBlobClient();
-          await blockBlobClient.uploadStream(passThroughStream, undefined, undefined, options);
+    const contentType = mime.lookup(encodedFilename);  // content type based on file extension
+    const options = {};
+    if (contentType) {
+      options.blobHTTPHeaders = { blobContentType: contentType };
+    }
-          const message = `File '${filename}' uploaded successfully.`;
-          const url = blockBlobClient.url;
-          context.log(message);
-          body = { message, url };
-        }
+    const blockBlobClient = containerClient.getBlockBlobClient(encodedFilename);
-        context.res = {
-          status: 200,
-          body,
-        };
+    const passThroughStream = new PassThrough();
+    file.pipe(passThroughStream);
-        if (useGoogle && useGoogle !== "false") {
-          const { url } = body;
-          const filename = encodeURIComponent(`${requestId}/${uuidv4()}_${info.filename}`);
-          const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(filename);
-          const writeStream = gcsFile.createWriteStream();
+    await blockBlobClient.uploadStream(passThroughStream, undefined, undefined, options);
-          const response = await axios({
-            method: "get",
-            url: url,
-            responseType: "stream",
-          });
+    const message = `File '${encodedFilename}' uploaded successfully.`;
+    const url = blockBlobClient.url;
+    context.log(message);
+    body = { message, url };
+  }
-          // Pipe the Axios response stream directly into the GCS Write Stream
-          response.data.pipe(writeStream);
+  context.res = {
+    status: 200,
+    body,
+  };
-          await new Promise((resolve, reject) => {
-            writeStream.on("finish", resolve);
-            writeStream.on("error", reject);
-          });
+  if (useGoogle && useGoogle !== "false") {
+    const { url } = body;
+    const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(encodedFilename);
+    const writeStream = gcsFile.createWriteStream();
-          body.gcs = `gs://${GCS_BUCKETNAME}/${filename}`;
-        }
+    const response = await axios({
+      method: "get",
+      url: url,
+      responseType: "stream",
+    });
-        resolve(body); // Resolve the promise
-      });
+    // pipe the Axios response stream directly into the GCS Write Stream
+    response.data.pipe(writeStream);
-      busboy.on("error", (error) => {
-        context.log.error("Error processing file upload:", error);
-        context.res = {
-          status: 500,
-          body: "Error processing file upload.",
-        };
-        reject(error); // Reject the promise
-      });
+    await new Promise((resolve, reject) => {
+      writeStream.on("finish", resolve);
+      writeStream.on("error", reject);
+    });
-      req.pipe(busboy);
-    } catch (error) {
-      context.log.error("Error processing file upload:", error);
-      context.res = {
-        status: 500,
-        body: "Error processing file upload.",
-      };
-      reject(error); // Reject the promise
-    }
-  });
+    body.gcs = `gs://${GCS_BUCKETNAME}/${encodedFilename}`;
+  }
+  resolve(body); // Resolve the promise
 }
 // Function to delete files that haven't been used in more than a month

package/helper-apps/cortex-file-handler/fileChunker.js CHANGED Viewed

@@ -6,7 +6,7 @@ import os from 'os';
 import { promisify } from 'util';
 import axios from 'axios';
 import { ensureEncoded } from './helper.js';
-import ytdl from 'ytdl-core';
+import ytdl from '@distube/ytdl-core';
 const ffmpegProbe = promisify(ffmpeg.ffprobe);
@@ -112,16 +112,20 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500) {
     }
 }
-const ytdlDownload = async (url, filename) => {
+const ytdlDownload = async (url, filename, video = false) => {
     return new Promise((resolve, reject) => {
-        const video = ytdl(url, { quality: 'highestaudio' });
+        const videoOptions = video
+            ? { filter: 'audioandvideo' }  // audio and video
+            : { quality: 'highestaudio' }; // audio only
+        const videoStream = ytdl(url, videoOptions);
         let lastLoggedTime = Date.now();
-        video.on('error', (error) => {
+        videoStream.on('error', (error) => {
             reject(error);
         });
-        video.on('progress', (chunkLength, downloaded, total) => {
+        videoStream.on('progress', (chunkLength, downloaded, total) => {
             const currentTime = Date.now();
             if (currentTime - lastLoggedTime >= 2000) { // Log every 2 seconds
                 const percent = downloaded / total;
@@ -130,7 +134,7 @@ const ytdlDownload = async (url, filename) => {
             }
         });
-        video.pipe(fs.createWriteStream(filename))
+        videoStream.pipe(fs.createWriteStream(filename))
             .on('finish', () => {
                 resolve();
             })
@@ -140,14 +144,15 @@ const ytdlDownload = async (url, filename) => {
     });
 };
-const processYoutubeUrl = async (url) => {
+async function processYoutubeUrl(url, video=false) {
     try {
-        const outputFileName = path.join(os.tmpdir(), `${uuidv4()}.mp3`);
-        await ytdlDownload(url, outputFileName);
+        const outputFormat = video ? '.mp4' : '.mp3';
+        const outputFileName = path.join(os.tmpdir(), `${uuidv4()}${outputFormat}`);
+        await ytdlDownload(url, outputFileName, video);
         return outputFileName;
     } catch (e) {
         console.log(e);
-        throw e;
+        throw new Error(`Error processing YouTube video, YouTube downloader might be outdated or blocked. ${e.message}`);
     }
 }

package/helper-apps/cortex-file-handler/index.js CHANGED Viewed

@@ -10,6 +10,10 @@ import { v4 as uuidv4 } from 'uuid';
 import fs from 'fs';
 import http from 'http';
 import https from 'https';
+import axios from "axios";
+import { pipeline } from "stream";
+import { promisify } from "util";
+const pipelineUtility = promisify(pipeline); // To pipe streams using async/await
 const DOC_EXTENSIONS =  [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css", '.pdf', '.docx', '.xlsx', '.csv'];
@@ -116,7 +120,48 @@ async function main(context, req) {
         return;
     }
-    const { uri, requestId, save, hash, checkHash } = req.body?.params || req.query;
+    const { uri, requestId, save, hash, checkHash, fetch, load, restore } = req.body?.params || req.query;
+    const filepond = fetch || restore || load;
+    if (req.method.toLowerCase() === `get` && filepond) {
+        context.log(`Remote file: ${filepond}`);
+        // Check if file already exists (using hash as the key)
+        const exists = await getFileStoreMap(filepond);
+        if(exists){
+            context.res = {
+                status: 200,
+                body: exists // existing file URL
+            };
+            return;
+        }
+        // Check if it's a youtube url
+        let youtubeDownloadedFile = null;
+        if(isValidYoutubeUrl(filepond)){
+            youtubeDownloadedFile = await processYoutubeUrl(filepond, true);
+        }
+        const filename = path.join(os.tmpdir(), path.basename(youtubeDownloadedFile || filepond));
+        // Download the remote file to a local/temporary location keep name & ext
+        if(!youtubeDownloadedFile){
+            const response = await axios.get(filepond, { responseType: "stream" });
+            await pipelineUtility(response.data, fs.createWriteStream(filename));
+        }
+        const res = await uploadBlob(context, null, !useAzure, true, filename);
+        context.log(`File uploaded: ${JSON.stringify(res)}`);
+        //Update Redis (using hash as the key)
+        await setFileStoreMap(filepond, res);
+        // Return the file URL
+        context.res = {
+            status: 200,
+            body: res,
+        };
+        return;
+    }
     if(hash && checkHash){ //check if hash exists
         context.log(`Checking hash: ${hash}`);
@@ -229,7 +274,8 @@ async function main(context, req) {
             if (isYoutubeUrl) {
                 // totalCount += 1; // extra 1 step for youtube download
-                file = await processYoutubeUrl(file);
+                const processAsVideo = req.body?.params?.processAsVideo || req.query?.processAsVideo;
+                file = await processYoutubeUrl(file, processAsVideo);
             }
             const { chunkPromises, chunkOffsets, uniqueOutputPath } = await splitMediaFile(file);