npm - @aj-archipelago/cortex - Versions diffs - 0.0.5 → 0.0.7 - Mend

@aj-archipelago/cortex 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +108 -72
package/config.js +25 -0
package/graphql/graphql.js +56 -13
package/graphql/pathwayPrompter.js +10 -6
package/graphql/pathwayResolver.js +128 -63
package/graphql/plugins/azureTranslatePlugin.js +16 -8
package/graphql/plugins/modelPlugin.js +67 -9
package/graphql/plugins/openAiChatPlugin.js +34 -7
package/graphql/plugins/openAiCompletionPlugin.js +53 -33
package/graphql/plugins/openAiWhisperPlugin.js +79 -0
package/graphql/prompt.js +1 -0
package/graphql/requestState.js +5 -0
package/graphql/resolver.js +8 -8
package/graphql/subscriptions.js +15 -2
package/graphql/typeDef.js +47 -38
package/lib/fileChunker.js +152 -0
package/lib/request.js +65 -8
package/lib/requestMonitor.js +43 -0
package/package.json +18 -6
package/pathways/basePathway.js +3 -4
package/pathways/bias.js +7 -0
package/pathways/chat.js +4 -1
package/pathways/complete.js +4 -0
package/pathways/edit.js +6 -0
package/pathways/entities.js +12 -0
package/pathways/index.js +1 -1
package/pathways/paraphrase.js +4 -0
package/pathways/sentiment.js +5 -1
package/pathways/summary.js +25 -8
package/pathways/transcribe.js +8 -0
package/pathways/translate.js +10 -1
package/tests/chunking.test.js +5 -0
package/tests/main.test.js +5 -13
package/tests/translate.test.js +5 -0
package/pathways/topics.js +0 -9

package/graphql/pathwayResolver.js CHANGED Viewed

@@ -8,19 +8,20 @@ const { getFirstNToken, getLastNToken, getSemanticChunks } = require('./chunker'
 const { PathwayResponseParser } = require('./pathwayResponseParser');
 const { Prompt } = require('./prompt');
 const { getv, setv } = require('../lib/keyValueStorageClient');
+const { requestState } = require('./requestState');
 const MAX_PREVIOUS_RESULT_TOKEN_LENGTH = 1000;
-const callPathway = async (config, pathwayName, requestState, { text, ...parameters }) => {
-    const pathwayResolver = new PathwayResolver({ config, pathway: config.get(`pathways.${pathwayName}`), requestState });
+const callPathway = async (config, pathwayName, args, requestState, { text, ...parameters }) => {
+    const pathwayResolver = new PathwayResolver({ config, pathway: config.get(`pathways.${pathwayName}`), args, requestState });
     return await pathwayResolver.resolve({ text, ...parameters });
 }
 class PathwayResolver {
-    constructor({ config, pathway, requestState }) {
+    constructor({ config, pathway, args }) {
         this.config = config;
-        this.requestState = requestState;
         this.pathway = pathway;
+        this.args = args;
         this.useInputChunking = pathway.useInputChunking;
         this.chunkMaxTokenLength = 0;
         this.warnings = [];
@@ -29,38 +30,89 @@ class PathwayResolver {
         this.pathwayPrompter = new PathwayPrompter({ config, pathway });
         this.previousResult = '';
         this.prompts = [];
-        this._pathwayPrompt = '';
         Object.defineProperty(this, 'pathwayPrompt', {
             get() {
-                return this._pathwayPrompt;
+                return this.prompts
             },
             set(value) {
-                this._pathwayPrompt = value;
-                if (!Array.isArray(this._pathwayPrompt)) {
-                    this._pathwayPrompt = [this._pathwayPrompt];
+                if (!Array.isArray(value)) {
+                    value = [value];
                 }
-                this.prompts = this._pathwayPrompt.map(p => (p instanceof Prompt) ? p : new Prompt({ prompt:p }));
+                this.prompts = value.map(p => (p instanceof Prompt) ? p : new Prompt({ prompt:p }));
                 this.chunkMaxTokenLength = this.getChunkMaxTokenLength();
             }
         });
+        // set up initial prompt
         this.pathwayPrompt = pathway.prompt;
     }
-    async resolve(args) {
-        if (args.async) {
-            // Asynchronously process the request
-            this.promptAndParse(args).then((data) => {
-                this.requestState[this.requestId].data = data;
-                pubsub.publish('REQUEST_PROGRESS', {
-                    requestProgress: {
-                        requestId: this.requestId,
-                        data: JSON.stringify(data)
+    async asyncResolve(args) {
+        // Wait with a sleep promise for the race condition to resolve
+        // const results = await Promise.all([this.promptAndParse(args), await new Promise(resolve => setTimeout(resolve, 250))]);
+        const data = await this.promptAndParse(args);
+        // Process the results for async
+        if(args.async || typeof data === 'string') { // if async flag set or processed async and got string response
+            const { completedCount, totalCount } = requestState[this.requestId];
+            requestState[this.requestId].data = data;
+            pubsub.publish('REQUEST_PROGRESS', {
+                requestProgress: {
+                    requestId: this.requestId,
+                    progress: completedCount / totalCount,
+                    data: JSON.stringify(data),
+                }
+            });
+        } else { //stream
+            for (const handle of data) {
+                handle.on('data', data => {
+                    console.log(data.toString());
+                    const lines = data.toString().split('\n').filter(line => line.trim() !== '');
+                    for (const line of lines) {
+                        const message = line.replace(/^data: /, '');
+                        if (message === '[DONE]') {
+                            // Send stream finished message
+                            pubsub.publish('REQUEST_PROGRESS', {
+                                requestProgress: {
+                                    requestId: this.requestId,
+                                    data: null,
+                                    progress: 1,
+                                }
+                            });
+                            return; // Stream finished
+                        }
+                        try {
+                            const parsed = JSON.parse(message);
+                            const result = this.pathwayPrompter.plugin.parseResponse(parsed)
+                            pubsub.publish('REQUEST_PROGRESS', {
+                                requestProgress: {
+                                    requestId: this.requestId,
+                                    data: JSON.stringify(result)
+                                }
+                            });
+                        } catch (error) {
+                            console.error('Could not JSON parse stream message', message, error);
+                        }
                     }
                 });
-            });
+                // data.on('end', () => {
+                //     console.log("stream done");
+                // });
+            }
+        }
+    }
+    async resolve(args) {
+        if (args.async || args.stream) {
+            // Asyncronously process the request
+            // this.asyncResolve(args);
+            if (!requestState[this.requestId]) {
+                requestState[this.requestId] = {}
+            }
+            requestState[this.requestId] = { ...requestState[this.requestId], args, resolver: this.asyncResolve.bind(this) };
             return this.requestId;
         }
         else {
@@ -70,7 +122,6 @@ class PathwayResolver {
     }
     async promptAndParse(args) {
         // Get saved context from contextId or change contextId if needed
         const { contextId } = args;
         this.savedContextId = contextId ? contextId : null;
@@ -94,25 +145,25 @@ class PathwayResolver {
     // Here we choose how to handle long input - either summarize or chunk
     processInputText(text) {
-        let chunkMaxChunkTokenLength = 0;
+        let chunkTokenLength = 0;
         if (this.pathway.inputChunkSize) {
-            chunkMaxChunkTokenLength = Math.min(this.pathway.inputChunkSize, this.chunkMaxTokenLength);
+            chunkTokenLength = Math.min(this.pathway.inputChunkSize, this.chunkMaxTokenLength);
         } else {
-             chunkMaxChunkTokenLength = this.chunkMaxTokenLength;
+            chunkTokenLength = this.chunkMaxTokenLength;
         }
         const encoded = encode(text);
-        if (!this.useInputChunking || encoded.length <= chunkMaxChunkTokenLength) { // no chunking, return as is
-            if (encoded.length >= chunkMaxChunkTokenLength) {
-                const warnText = `Your input is possibly too long, truncating! Text length: ${text.length}`;
+        if (!this.useInputChunking || encoded.length <= chunkTokenLength) { // no chunking, return as is
+            if (encoded.length >= chunkTokenLength) {
+                const warnText = `Truncating long input text. Text length: ${text.length}`;
                 this.warnings.push(warnText);
                 console.warn(warnText);
-                text = this.truncate(text, chunkMaxChunkTokenLength);
+                text = this.truncate(text, chunkTokenLength);
             }
             return [text];
         }
         // chunk the text and return the chunks with newline separators
-        return getSemanticChunks({ text, maxChunkToken: chunkMaxChunkTokenLength });
+        return getSemanticChunks({ text, maxChunkToken: chunkTokenLength });
     }
     truncate(str, n) {
@@ -124,7 +175,7 @@ class PathwayResolver {
     async summarizeIfEnabled({ text, ...parameters }) {
         if (this.pathway.useInputSummarization) {
-            return await callPathway(this.config, 'summary', this.requestState, { text, targetLength: 1000, ...parameters });
+            return await callPathway(this.config, 'summary', this.args, requestState, { text, targetLength: 1000, ...parameters });
         }
         return text;
     }
@@ -132,46 +183,44 @@ class PathwayResolver {
     // Calculate the maximum token length for a chunk
     getChunkMaxTokenLength() {
         // find the longest prompt
-        const maxPromptTokenLength = Math.max(...this.prompts.map(({ prompt }) => prompt ? encode(String(prompt)).length : 0));
-        const maxMessagesTokenLength = Math.max(...this.prompts.map(({ messages }) => messages ? messages.reduce((acc, {role, content}) => {
-            return (role && content) ? acc + encode(role).length + encode(content).length : acc;
-        }, 0) : 0));
-        const maxTokenLength = Math.max(maxPromptTokenLength, maxMessagesTokenLength);
+        const maxPromptTokenLength = Math.max(...this.prompts.map((promptData) => this.pathwayPrompter.plugin.getCompiledPrompt('', this.args, promptData).tokenLength));
         // find out if any prompts use both text input and previous result
-        const hasBothProperties = this.prompts.some(prompt => prompt.usesInputText && prompt.usesPreviousResult);
+        const hasBothProperties = this.prompts.some(prompt => prompt.usesTextInput && prompt.usesPreviousResult);
         // the token ratio is the ratio of the total prompt to the result text - both have to be included
         // in computing the max token length
         const promptRatio = this.pathwayPrompter.plugin.getPromptTokenRatio();
-        let maxChunkToken = promptRatio * this.pathwayPrompter.plugin.getModelMaxTokenLength() - maxTokenLength;
+        let chunkMaxTokenLength = promptRatio * this.pathwayPrompter.plugin.getModelMaxTokenLength() - maxPromptTokenLength;
         // if we have to deal with prompts that have both text input
         // and previous result, we need to split the maxChunkToken in half
-        maxChunkToken = hasBothProperties ? maxChunkToken / 2 : maxChunkToken;
-        // detect if the longest prompt might be too long to allow any chunk size
-        if (maxChunkToken && maxChunkToken <= 0) {
-            throw new Error(`Your prompt is too long! Split to multiple prompts or reduce length of your prompt, prompt length: ${maxPromptLength}`);
-        }
-        return maxChunkToken;
+        chunkMaxTokenLength = hasBothProperties ? chunkMaxTokenLength / 2 : chunkMaxTokenLength;
+        return chunkMaxTokenLength;
     }
     // Process the request and return the result
     async processRequest({ text, ...parameters }) {
         text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled
         const chunks = this.processInputText(text);
         const anticipatedRequestCount = chunks.length * this.prompts.length;
-        if ((this.requestState[this.requestId] || {}).canceled) {
+        if ((requestState[this.requestId] || {}).canceled) {
             throw new Error('Request canceled');
         }
         // Store the request state
-        this.requestState[this.requestId] = { totalCount: anticipatedRequestCount, completedCount: 0 };
+        requestState[this.requestId] = { ...requestState[this.requestId], totalCount: anticipatedRequestCount, completedCount: 0 };
+        if (chunks.length > 1) {
+            // stream behaves as async if there are multiple chunks
+            if (parameters.stream) {
+                parameters.async = true;
+                parameters.stream = false;
+            }
+        }
         // If pre information is needed, apply current prompt with previous prompt info, only parallelize current call
         if (this.pathway.useParallelChunkProcessing) {
@@ -189,17 +238,31 @@ class PathwayResolver {
             let result = '';
             for (let i = 0; i < this.prompts.length; i++) {
+                const currentParameters = { ...parameters, previousResult };
+                if (currentParameters.stream) { // stream special flow
+                    if (i < this.prompts.length - 1) {
+                        currentParameters.stream = false; // if not the last prompt then don't stream
+                    }
+                    else {
+                        // use the stream parameter if not async
+                        currentParameters.stream = currentParameters.async ? false : currentParameters.stream;
+                    }
+                }
                 // If the prompt doesn't contain {{text}} then we can skip the chunking, and also give that token space to the previous result
                 if (!this.prompts[i].usesTextInput) {
                     // Limit context to it's N + text's characters
                     previousResult = this.truncate(previousResult, 2 * this.chunkMaxTokenLength);
-                    result = await this.applyPrompt(this.prompts[i], null, { ...parameters, previousResult });
+                    result = await this.applyPrompt(this.prompts[i], null, currentParameters);
                 } else {
                     // Limit context to N characters
                     previousResult = this.truncate(previousResult, this.chunkMaxTokenLength);
                     result = await Promise.all(chunks.map(chunk =>
-                        this.applyPrompt(this.prompts[i], chunk, { ...parameters, previousResult })));
-                    result = result.join("\n\n")
+                        this.applyPrompt(this.prompts[i], chunk, currentParameters)));
+                    if (!currentParameters.stream) {
+                        result = result.join("\n\n")
+                    }
                 }
                 // If this is any prompt other than the last, use the result as the previous context
@@ -225,20 +288,22 @@ class PathwayResolver {
     }
     async applyPrompt(prompt, text, parameters) {
-        if (this.requestState[this.requestId].canceled) {
+        if (requestState[this.requestId].canceled) {
             return;
         }
-        const result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt);
-        this.requestState[this.requestId].completedCount++;
+        const result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt, this);
+        requestState[this.requestId].completedCount++;
-        const { completedCount, totalCount } = this.requestState[this.requestId];
+        const { completedCount, totalCount } = requestState[this.requestId];
-        pubsub.publish('REQUEST_PROGRESS', {
-            requestProgress: {
-                requestId: this.requestId,
-                progress: completedCount / totalCount,
-            }
-        });
+        if (completedCount < totalCount) {
+            pubsub.publish('REQUEST_PROGRESS', {
+                requestProgress: {
+                    requestId: this.requestId,
+                    progress: completedCount / totalCount,
+                }
+            });
+        }
         if (prompt.saveResultTo) {
             this.savedContext[prompt.saveResultTo] = result;

package/graphql/plugins/azureTranslatePlugin.js CHANGED Viewed

@@ -1,19 +1,26 @@
 // AzureTranslatePlugin.js
 const ModelPlugin = require('./modelPlugin');
 const handlebars = require("handlebars");
+const { encode } = require("gpt-3-encoder");
 class AzureTranslatePlugin extends ModelPlugin {
-    constructor(config, modelName, pathway) {
-        super(config, modelName, pathway);
+    constructor(config, pathway) {
+        super(config, pathway);
     }
-    // Set up parameters specific to the Azure Translate API
-    requestParameters(text, parameters, prompt) {
+    getCompiledPrompt(text, parameters, prompt) {
         const combinedParameters = { ...this.promptParameters, ...parameters };
         const modelPrompt = this.getModelPrompt(prompt, parameters);
         const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
-        return {
+        return { modelPromptText, tokenLength: encode(modelPromptText).length };
+    }
+    // Set up parameters specific to the Azure Translate API
+    getRequestParameters(text, parameters, prompt) {
+        const combinedParameters = { ...this.promptParameters, ...parameters };
+        const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
+        const requestParameters = {
             data: [
                 {
                 Text: modelPromptText,
@@ -23,11 +30,12 @@ class AzureTranslatePlugin extends ModelPlugin {
                 to: combinedParameters.to
             }
         };
+        return requestParameters;
     }
     // Execute the request to the Azure Translate API
     async execute(text, parameters, prompt) {
-        const requestParameters = this.requestParameters(text, parameters, prompt);
+        const requestParameters = this.getRequestParameters(text, parameters, prompt);
         const url = this.requestUrl(text);
@@ -35,7 +43,7 @@ class AzureTranslatePlugin extends ModelPlugin {
         const params = requestParameters.params;
         const headers = this.model.headers || {};
-        return this.executeRequest(url, data, params, headers);
+        return this.executeRequest(url, data, params, headers, prompt);
     }
 }

package/graphql/plugins/modelPlugin.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // ModelPlugin.js
 const handlebars = require('handlebars');
 const { request } = require("../../lib/request");
-const { getResponseResult } = require("../parser");
+const { encode } = require("gpt-3-encoder");
 const DEFAULT_MAX_TOKENS = 4096;
 const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
@@ -35,6 +35,42 @@ class ModelPlugin {
         }
         this.requestCount = 1;
+        this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
+    }
+    // Function to remove non-system messages until token length is less than target
+    removeMessagesUntilTarget = (messages, targetTokenLength) => {
+        let chatML = this.messagesToChatML(messages);
+        let tokenLength = encode(chatML).length;
+        while (tokenLength > targetTokenLength) {
+            for (let i = 0; i < messages.length; i++) {
+                if (messages[i].role !== 'system') {
+                    messages.splice(i, 1);
+                    chatML = this.messagesToChatML(messages);
+                    tokenLength = encode(chatML).length;
+                    break;
+                }
+            }
+            if (messages.every(message => message.role === 'system')) {
+                break; // All remaining messages are 'system', stop removing messages
+            }
+        }
+        return messages;
+    }
+    //convert a messages array to a simple chatML format
+    messagesToChatML = (messages) => {
+        let output = "";
+        if (messages && messages.length) {
+            for (let message of messages) {
+                output += (message.role && message.content) ? `<|im_start|>${message.role}\n${message.content}\n<|im_end|>\n` : `${message}\n`;
+            }
+            // you always want the assistant to respond next so add a
+            // directive for that
+            output += "<|im_start|>assistant\n";
+        }
+        return output;
     }
     getModelMaxTokenLength() {
@@ -102,6 +138,8 @@ class ModelPlugin {
         if (!choices || !choices.length) {
             if (Array.isArray(data) && data.length > 0 && data[0].translations) {
                 return data[0].translations[0].text.trim();
+            } else {
+                return data;
             }
         }
@@ -114,20 +152,40 @@ class ModelPlugin {
         const textResult = choices[0].text && choices[0].text.trim();
         const messageResult = choices[0].message && choices[0].message.content && choices[0].message.content.trim();
-        return messageResult || textResult || null;
+        return messageResult ?? textResult ?? null;
     }
-    async executeRequest(url, data, params, headers) {
-        const responseData = await request({ url, data, params, headers }, this.modelName);
-        const modelInput = data.prompt || (data.messages && data.messages[0].content) || data[0].Text || null;
-        console.log(`=== ${this.pathwayName}.${this.requestCount++} ===`)
-        console.log(`\x1b[36m${modelInput}\x1b[0m`)
+    logRequestData(data, responseData, prompt) {
+        const separator = `\n=== ${this.pathwayName}.${this.requestCount++} ===\n`;
+        console.log(separator);
+        const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
+        if (data.messages && data.messages.length > 1) {
+            data.messages.forEach((message, index) => {
+                const words = message.content.split(" ");
+                const tokenCount = encode(message.content).length;
+                const preview = words.length < 41 ? message.content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
+                console.log(`\x1b[36mMessage ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"\x1b[0m`);
+            });
+        } else {
+            console.log(`\x1b[36m${modelInput}\x1b[0m`);
+        }
         console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
+        prompt.debugInfo += `${separator}${JSON.stringify(data)}`;
+    }
+    async executeRequest(url, data, params, headers, prompt) {
+        const responseData = await request({ url, data, params, headers, cache: this.shouldCache }, this.modelName);
         if (responseData.error) {
             throw new Exception(`An error was returned from the server: ${JSON.stringify(responseData.error)}`);
         }
+        this.logRequestData(data, responseData, prompt);
         return this.parseResponse(responseData);
     }

package/graphql/plugins/openAiChatPlugin.js CHANGED Viewed

@@ -1,34 +1,61 @@
 // OpenAIChatPlugin.js
 const ModelPlugin = require('./modelPlugin');
 const handlebars = require("handlebars");
+const { encode } = require("gpt-3-encoder");
 class OpenAIChatPlugin extends ModelPlugin {
     constructor(config, pathway) {
         super(config, pathway);
     }
-    // Set up parameters specific to the OpenAI Chat API
-    requestParameters(text, parameters, prompt) {
+    getCompiledPrompt(text, parameters, prompt) {
         const combinedParameters = { ...this.promptParameters, ...parameters };
         const modelPrompt = this.getModelPrompt(prompt, parameters);
         const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
         const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
+        const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
+        if (modelPromptMessagesML) {
+            return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
+        } else {
+            return { modelPromptText, tokenLength: encode(modelPromptText).length };
+        }
+    }
-        return {
-            messages: modelPromptMessages || [{ "role": "user", "content": modelPromptText }],
-            temperature: this.temperature ?? 0.7,
+    // Set up parameters specific to the OpenAI Chat API
+    getRequestParameters(text, parameters, prompt) {
+        const { modelPromptText, modelPromptMessages, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
+        const { stream } = parameters;
+        // Define the model's max token length
+        const modelMaxTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        let requestMessages = modelPromptMessages || [{ "role": "user", "content": modelPromptText }];
+        // Check if the token length exceeds the model's max token length
+        if (tokenLength > modelMaxTokenLength) {
+            // Remove older messages until the token length is within the model's limit
+            requestMessages = this.removeMessagesUntilTarget(requestMessages, modelMaxTokenLength);
+        }
+        const requestParameters = {
+        messages: requestMessages,
+        temperature: this.temperature ?? 0.7,
+        stream
         };
+        return requestParameters;
     }
     // Execute the request to the OpenAI Chat API
     async execute(text, parameters, prompt) {
         const url = this.requestUrl(text);
-        const requestParameters = this.requestParameters(text, parameters, prompt);
+        const requestParameters = this.getRequestParameters(text, parameters, prompt);
         const data = { ...(this.model.params || {}), ...requestParameters };
         const params = {};
         const headers = this.model.headers || {};
-        return this.executeRequest(url, data, params, headers);
+        return this.executeRequest(url, data, params, headers, prompt);
     }
 }

package/graphql/plugins/openAiCompletionPlugin.js CHANGED Viewed

@@ -3,61 +3,81 @@ const ModelPlugin = require('./modelPlugin');
 const handlebars = require("handlebars");
 const { encode } = require("gpt-3-encoder");
-//convert a messages array to a simple chatML format
-const messagesToChatML = (messages) => {
-    let output = "";
-    if (messages && messages.length) {
-        for (let message of messages) {
-            output += (message.role && message.content) ? `<|im_start|>${message.role}\n${message.content}\n<|im_end|>\n` : `${message}\n`;
-        }
-        // you always want the assistant to respond next so add a
-        // directive for that
-        output += "<|im_start|>assistant\n";
-    }
-    return output;
-}
 class OpenAICompletionPlugin extends ModelPlugin {
     constructor(config, pathway) {
         super(config, pathway);
     }
-    // Set up parameters specific to the OpenAI Completion API
-    requestParameters(text, parameters, prompt) {
+    getCompiledPrompt(text, parameters, prompt) {
         const combinedParameters = { ...this.promptParameters, ...parameters };
         const modelPrompt = this.getModelPrompt(prompt, parameters);
         const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
         const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
-        const modelPromptMessagesML = messagesToChatML(modelPromptMessages);
+        const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
         if (modelPromptMessagesML) {
-        return {
-            prompt: modelPromptMessagesML,
-            max_tokens: this.getModelMaxTokenLength() - encode(modelPromptMessagesML).length - 1,
-            temperature: this.temperature ?? 0.7,
-            top_p: 0.95,
-            frequency_penalty: 0,
-            presence_penalty: 0,
-            stop: ["<|im_end|>"]
-        };
+            return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
+        } else {
+            return { modelPromptText, tokenLength: encode(modelPromptText).length };
+        }
+    }
+    // Set up parameters specific to the OpenAI Completion API
+    getRequestParameters(text, parameters, prompt) {
+        let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
+        const { stream } = parameters;
+        let modelPromptMessagesML = '';
+        const modelMaxTokenLength = this.getModelMaxTokenLength();
+        let requestParameters = {};
+        if (modelPromptMessages) {
+            const requestMessages = this.removeMessagesUntilTarget(modelPromptMessages, modelMaxTokenLength - 1);
+            modelPromptMessagesML = this.messagesToChatML(requestMessages);
+            tokenLength = encode(modelPromptMessagesML).length;
+            if (tokenLength >= modelMaxTokenLength) {
+                throw new Error(`The maximum number of tokens for this model is ${modelMaxTokenLength}. Please reduce the number of messages in the prompt.`);
+            }
+            const max_tokens = modelMaxTokenLength - tokenLength - 1;
+            requestParameters = {
+                prompt: modelPromptMessagesML,
+                max_tokens: max_tokens,
+                temperature: this.temperature ?? 0.7,
+                top_p: 0.95,
+                frequency_penalty: 0,
+                presence_penalty: 0,
+                stop: ["<|im_end|>"],
+                stream
+            };
         } else {
-        return {
-            prompt: modelPromptText,
-            max_tokens: this.getModelMaxTokenLength() - encode(modelPromptText).length - 1,
-            temperature: this.temperature ?? 0.7,
-        };
+            if (tokenLength >= modelMaxTokenLength) {
+                throw new Error(`The maximum number of tokens for this model is ${modelMaxTokenLength}. Please reduce the length of the prompt.`);
+            }
+            const max_tokens = modelMaxTokenLength - tokenLength - 1;
+            requestParameters = {
+                prompt: modelPromptText,
+                max_tokens: max_tokens,
+                temperature: this.temperature ?? 0.7,
+                stream
+            };
         }
+        return requestParameters;
     }
     // Execute the request to the OpenAI Completion API
     async execute(text, parameters, prompt) {
         const url = this.requestUrl(text);
-        const requestParameters = this.requestParameters(text, parameters, prompt);
+        const requestParameters = this.getRequestParameters(text, parameters, prompt);
         const data = { ...(this.model.params || {}), ...requestParameters };
         const params = {};
         const headers = this.model.headers || {};
-        return this.executeRequest(url, data, params, headers);
+        return this.executeRequest(url, data, params, headers, prompt);
     }
 }