npm - @aj-archipelago/cortex - Versions diffs - 0.0.7 → 0.0.8 - Mend

@aj-archipelago/cortex 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +25 -1
package/graphql/plugins/azureTranslatePlugin.js +0 -10
package/graphql/plugins/modelPlugin.js +16 -2
package/graphql/plugins/openAiChatPlugin.js +0 -16
package/graphql/plugins/openAiCompletionPlugin.js +0 -15
package/graphql/plugins/openAiWhisperPlugin.js +35 -23
package/lib/fileChunker.js +4 -3
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -81,7 +81,31 @@ To add a new pathway to Cortex, you create a new JavaScript file and define the
 ### Prompt
 When you define a new pathway, you need to at least specify a prompt that will be passed to the model for processing. In the simplest case, a prompt is really just a string, but the prompt is polymorphic - it can be a string or an object that contains information for the model API that you wish to call. Prompts can also be an array of strings or an array of objects for sequential operations. In this way Cortex aims to support the most simple to advanced prompting scenarios.
-In the above spelling example, the pathway simply prompts the model to rewrite some text using British English spelling. If you look closely, you'll notice the embedded `{{text}}` parameter. In Cortex, all prompt strings are actually [Handlebars](https://handlebarsjs.com/) templates. So in this case, that parameter will be replaced before prompt execution with the incoming query variable called `text`. You can refer to almost any pathway parameter or system property in the prompt definition and it will be replaced before execution.
+```js
+// a prompt can be a string
+prompt: `{{{text}}}\nCopy the names of all people and places exactly from this document in the language above:\n`
+// or an array of strings
+prompt: [
+    `{{{text}}}\nCopy the names of all people and places exactly from this document in the language above:\n`,
+    `Original Language:\n{{{previousResult}}}\n\n{{to}}:\n`,
+    `Entities in the document:\n\n{{{previousResult}}}\n\nDocument:\n{{{text}}}\nRewrite the document in {{to}}. If the document is already in {{to}}, copy it exactly below:\n`
+]
+// or an array of one or more Prompt objects
+// as you can see below a Prompt object can also have a messages array, which is how you can
+// express your prompts for chat-style interfaces
+prompt: [
+    new Prompt({ messages: [
+        {"role": "system", "content": "Assistant is a highly skilled multilingual translator for a prestigious news agency. When the user posts any text in any language, assistant will create a translation of that text in {{to}}. Assistant will produce only the translation and no additional notes or commentary."},
+        {"role": "user", "content": "{{{text}}}"}
+    ]}),
+]
+```
+If a prompt is an array, the individual prompts in the array will be executed sequentially by the Cortex prompt execution engine. The execution engine deals with all of the complexities of chunking input content and executing the sequence of prompts against those chunks in a way that optimizes the performance and ensures the the integrity of the pathway logic.
+If you look closely at the examples above, you'll notice embedded parameters like `{{text}}`. In Cortex, all prompt strings are actually [Handlebars](https://handlebarsjs.com/) templates. So in this case, that parameter will be replaced before prompt execution with the incoming query variable called `text`. You can refer to almost any pathway parameter or system property in the prompt definition and it will be replaced before execution.
 ### Parameters
 Pathways support an arbitrary number of input parameters.  These are defined in the pathway like this:
 ```js

package/graphql/plugins/azureTranslatePlugin.js CHANGED Viewed

@@ -1,20 +1,10 @@
 // AzureTranslatePlugin.js
 const ModelPlugin = require('./modelPlugin');
-const handlebars = require("handlebars");
-const { encode } = require("gpt-3-encoder");
 class AzureTranslatePlugin extends ModelPlugin {
     constructor(config, pathway) {
         super(config, pathway);
     }
-    getCompiledPrompt(text, parameters, prompt) {
-        const combinedParameters = { ...this.promptParameters, ...parameters };
-        const modelPrompt = this.getModelPrompt(prompt, parameters);
-        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
-        return { modelPromptText, tokenLength: encode(modelPromptText).length };
-    }
     // Set up parameters specific to the Azure Translate API
     getRequestParameters(text, parameters, prompt) {

package/graphql/plugins/modelPlugin.js CHANGED Viewed

@@ -73,6 +73,20 @@ class ModelPlugin {
         return output;
     }
+    getCompiledPrompt(text, parameters, prompt) {
+        const combinedParameters = { ...this.promptParameters, ...parameters };
+        const modelPrompt = this.getModelPrompt(prompt, parameters);
+        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
+        const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
+        const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
+        if (modelPromptMessagesML) {
+            return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
+        } else {
+            return { modelPromptText, tokenLength: encode(modelPromptText).length };
+        }
+    }
     getModelMaxTokenLength() {
         return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
     }
@@ -161,7 +175,7 @@ class ModelPlugin {
         const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
-        if (data.messages && data.messages.length > 1) {
+        if (data && data.messages && data.messages.length > 1) {
             data.messages.forEach((message, index) => {
                 const words = message.content.split(" ");
                 const tokenCount = encode(message.content).length;
@@ -175,7 +189,7 @@ class ModelPlugin {
         console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
-        prompt.debugInfo += `${separator}${JSON.stringify(data)}`;
+        prompt && prompt.debugInfo && (prompt.debugInfo += `${separator}${JSON.stringify(data)}`);
     }
     async executeRequest(url, data, params, headers, prompt) {

package/graphql/plugins/openAiChatPlugin.js CHANGED Viewed

@@ -1,27 +1,11 @@
 // OpenAIChatPlugin.js
 const ModelPlugin = require('./modelPlugin');
-const handlebars = require("handlebars");
-const { encode } = require("gpt-3-encoder");
 class OpenAIChatPlugin extends ModelPlugin {
     constructor(config, pathway) {
         super(config, pathway);
     }
-    getCompiledPrompt(text, parameters, prompt) {
-        const combinedParameters = { ...this.promptParameters, ...parameters };
-        const modelPrompt = this.getModelPrompt(prompt, parameters);
-        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
-        const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
-        const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
-        if (modelPromptMessagesML) {
-            return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
-        } else {
-            return { modelPromptText, tokenLength: encode(modelPromptText).length };
-        }
-    }
     // Set up parameters specific to the OpenAI Chat API
     getRequestParameters(text, parameters, prompt) {
         const { modelPromptText, modelPromptMessages, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);

package/graphql/plugins/openAiCompletionPlugin.js CHANGED Viewed

@@ -1,6 +1,5 @@
 // OpenAICompletionPlugin.js
 const ModelPlugin = require('./modelPlugin');
-const handlebars = require("handlebars");
 const { encode } = require("gpt-3-encoder");
 class OpenAICompletionPlugin extends ModelPlugin {
@@ -8,20 +7,6 @@ class OpenAICompletionPlugin extends ModelPlugin {
         super(config, pathway);
     }
-    getCompiledPrompt(text, parameters, prompt) {
-        const combinedParameters = { ...this.promptParameters, ...parameters };
-        const modelPrompt = this.getModelPrompt(prompt, parameters);
-        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
-        const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
-        const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
-        if (modelPromptMessagesML) {
-            return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
-        } else {
-            return { modelPromptText, tokenLength: encode(modelPromptText).length };
-        }
-    }
     // Set up parameters specific to the OpenAI Completion API
     getRequestParameters(text, parameters, prompt) {
         let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);

package/graphql/plugins/openAiWhisperPlugin.js CHANGED Viewed

@@ -1,7 +1,5 @@
 // OpenAICompletionPlugin.js
 const ModelPlugin = require('./modelPlugin');
-const handlebars = require("handlebars");
-const { encode } = require("gpt-3-encoder");
 const FormData = require('form-data');
 const fs = require('fs');
 const { splitMediaFile, isValidYoutubeUrl, processYoutubeUrl, deleteTempPath } = require('../../lib/fileChunker');
@@ -12,14 +10,6 @@ class OpenAIWhisperPlugin extends ModelPlugin {
         super(config, pathway);
     }
-    getCompiledPrompt(text, parameters, prompt) {
-        const combinedParameters = { ...this.promptParameters, ...parameters };
-        const modelPrompt = this.getModelPrompt(prompt, parameters);
-        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
-        return { modelPromptText, tokenLength: encode(modelPromptText).length };
-    }
     // Execute the request to the OpenAI Whisper API
     async execute(text, parameters, prompt, pathwayResolver) {
         const url = this.requestUrl(text);
@@ -41,29 +31,51 @@ class OpenAIWhisperPlugin extends ModelPlugin {
             }
         }
-        let result;
+        let result = ``;
         let { file } = parameters;
         let folder;
         const isYoutubeUrl = isValidYoutubeUrl(file);
+        let totalCount = 0;
+        let completedCount = 0;
+        const { requestId } = pathwayResolver;
-        try {
-            if (isYoutubeUrl) {
-                file = await processYoutubeUrl(file);
-            }
-            const mediaSplit = await splitMediaFile(file);
-            const { requestId } = pathwayResolver;
+        const sendProgress = () => {
+            completedCount++;
             pubsub.publish('REQUEST_PROGRESS', {
                 requestProgress: {
                     requestId,
-                    progress: 0.5,
+                    progress: completedCount / totalCount,
                     data: null,
                 }
             });
+        }
+        try {
+            if (isYoutubeUrl) {
+                // totalCount += 1; // extra 1 step for youtube download
+                file = await processYoutubeUrl(file);
+            }
+            const { chunkPromises, uniqueOutputPath } = await splitMediaFile(file);
+            folder = uniqueOutputPath;
+            totalCount += chunkPromises.length * 2; // 2 steps for each chunk (download and upload)
+            // isYoutubeUrl && sendProgress(); // send progress for youtube download after total count is calculated
+            // sequential download of chunks
+            const chunks = [];
+            for (const chunkPromise of chunkPromises) {
+                sendProgress();
+                chunks.push(await chunkPromise);
+            }
+            // sequential processing of chunks
+            for (const chunk of chunks) {
+                result += await processChunk(chunk);
+                sendProgress();
+            }
-            folder = mediaSplit.folder;
-            result = await Promise.all(mediaSplit.chunks.map(processChunk));
+            // parallel processing, dropped
+            // result = await Promise.all(mediaSplit.chunks.map(processChunk));
         } catch (error) {
             console.error("An error occurred:", error);
@@ -71,7 +83,7 @@ class OpenAIWhisperPlugin extends ModelPlugin {
             isYoutubeUrl && (await deleteTempPath(file));
             folder && (await deleteTempPath(folder));
         }
-        return result.join('');
+        return result;
     }
 }

package/lib/fileChunker.js CHANGED Viewed

@@ -70,9 +70,10 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
             chunkPromises.push(chunkPromise);
         }
-        const chunkedFiles = await Promise.all(chunkPromises);
-        console.log('All chunks processed. Chunked file names:', chunkedFiles);
-        return { chunks: chunkedFiles, folder: uniqueOutputPath }
+        // const chunkedFiles = await Promise.all(chunkPromises);
+        // console.log('All chunks processed. Chunked file names:', chunkedFiles);
+        // return { chunks: chunkedFiles, folder: uniqueOutputPath }
+        return { chunkPromises, uniqueOutputPath }
     } catch (err) {
         console.error('Error occurred during the splitting process:', err);
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "0.0.7",
+  "version": "0.0.8",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "repository": {
     "type": "git",