npm - @aj-archipelago/cortex - Versions diffs - 0.0.4 → 0.0.6 - Mend

@aj-archipelago/cortex 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/config.js +6 -0
package/graphql/graphql.js +3 -4
package/graphql/parser.js +1 -21
package/graphql/pathwayPrompter.js +35 -122
package/graphql/pathwayResolver.js +109 -35
package/graphql/plugins/azureTranslatePlugin.js +42 -0
package/graphql/plugins/modelPlugin.js +164 -0
package/graphql/plugins/openAiChatPlugin.js +38 -0
package/graphql/plugins/openAiCompletionPlugin.js +69 -0
package/graphql/prompt.js +1 -1
package/graphql/requestState.js +5 -0
package/graphql/resolver.js +4 -4
package/graphql/subscriptions.js +15 -2
package/graphql/typeDef.js +17 -13
package/lib/request.js +67 -10
package/lib/requestMonitor.js +43 -0
package/package.json +14 -5
package/pathways/basePathway.js +4 -5
package/pathways/bias.js +1 -0
package/pathways/paraphrase.js +1 -1
package/pathways/translate.js +1 -0
package/tests/chunking.test.js +5 -0
package/tests/main.test.js +5 -0
package/tests/translate.test.js +5 -0

package/config.js CHANGED Viewed

@@ -41,6 +41,11 @@ var config = convict({
         default: true,
         env: 'CORTEX_ENABLE_CACHE'
     },
+    enableGraphqlCache: {
+        format: Boolean,
+        default: false,
+        env: 'CORTEX_ENABLE_GRAPHQL_CACHE'
+    },
     defaultModelName: {
         format: String,
         default: null,
@@ -50,6 +55,7 @@ var config = convict({
         format: Object,
         default: {
             "oai-td3": {
+                "type": "OPENAI-COMPLETION",
                 "url": "{{openaiApiUrl}}",
                 "headers": {
                     "Authorization": "Bearer {{openaiApiKey}}",

package/graphql/graphql.js CHANGED Viewed

@@ -17,8 +17,7 @@ const subscriptions = require('./subscriptions');
 const { buildLimiters } = require('../lib/request');
 const { cancelRequestResolver } = require('./resolver');
 const { buildPathways, buildModels } = require('../config');
-const requestState = {}; // Stores the state of each request
+const { requestState } = require('./requestState');
 const getPlugins = (config) => {
     // server plugins
@@ -28,7 +27,7 @@ const getPlugins = (config) => {
     //if cache is enabled and Redis is available, use it
     let cache;
-    if (config.get('enableCache') && config.get('storageConnectionString')) {
+    if (config.get('enableGraphqlCache') && config.get('storageConnectionString')) {
         cache = new KeyvAdapter(new Keyv(config.get('storageConnectionString'),{
             ssl: true,
             abortConnect: false,
@@ -72,7 +71,7 @@ const getTypedefs = (pathways) => {
     }
     type Subscription {
-        requestProgress(requestId: String!): RequestSubscription
+        requestProgress(requestIds: [String!]): RequestSubscription
     }
 `;

package/graphql/parser.js CHANGED Viewed

@@ -1,22 +1,3 @@
-//simples form string single or list return
-const getResponseResult = (data) => {
-    const { choices } = data;
-    if (!choices || !choices.length) {
-        return; //TODO no choices case
-    }
-    // if we got a choices array back with more than one choice, return the whole array
-    if (choices.length > 1) {
-        return choices;
-    }
-    // otherwise, return the first choice
-    const textResult = choices[0].text && choices[0].text.trim();
-    const messageResult = choices[0].message && choices[0].message.content && choices[0].message.content.trim();
-    return messageResult || textResult || null;
-}
 //simply trim and parse with given regex
 const regexParser = (text, regex) => {
     return text.trim().split(regex).map(s => s.trim()).filter(s => s.length);
@@ -51,8 +32,7 @@ const parseNumberedObjectList = (text, format) => {
 }
 module.exports = {
-    getResponseResult,
     regexParser,
     parseNumberedList,
-    parseNumberedObjectList
+    parseNumberedObjectList,
 };

package/graphql/pathwayPrompter.js CHANGED Viewed

@@ -1,145 +1,58 @@
-const { request } = require("../lib/request");
+// PathwayPrompter.js
+const OpenAIChatPlugin = require('./plugins/openAIChatPlugin');
+const OpenAICompletionPlugin = require('./plugins/openAICompletionPlugin');
+const AzureTranslatePlugin = require('./plugins/azureTranslatePlugin');
 const handlebars = require("handlebars");
-const { getResponseResult } = require("./parser");
 const { Exception } = require("handlebars");
-const { encode } = require("gpt-3-encoder");
-const DEFAULT_MAX_TOKENS = 4096;
-const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
 // register functions that can be called directly in the prompt markdown
-handlebars.registerHelper('stripHTML', function(value) {
+handlebars.registerHelper('stripHTML', function (value) {
     return value.replace(/<[^>]*>/g, '');
-    });
+});
-handlebars.registerHelper('now', function() {
+handlebars.registerHelper('now', function () {
     return new Date().toISOString();
-    });
-class PathwayPrompter {
-    constructor({ config, pathway }) {
-        // If the pathway specifies a model, use that, otherwise use the default
-        this.modelName = pathway.model || config.get('defaultModelName');
-        // Get the model from the config
-        this.model = config.get('models')[this.modelName];
-        // If the model doesn't exist, throw an exception
-        if (!this.model) {
-            throw new Exception(`Model ${this.modelName} not found in config`);
-        }
-        this.environmentVariables = config.getEnv();
-        this.temperature = pathway.temperature;
-        this.pathwayPrompt = pathway.prompt;
-        this.pathwayName = pathway.name;
-        this.promptParameters = {}
-        // Make all of the parameters defined on the pathway itself available to the prompt
-        for (const [k, v] of Object.entries(pathway)) {
-            this.promptParameters[k] = v.default ?? v;
-        }
-        if (pathway.inputParameters) {
-            for (const [k, v] of Object.entries(pathway.inputParameters)) {
-                this.promptParameters[k] = v.default ?? v;
-            }
-        }
-        this.requestCount = 1
-    }
-    getModelMaxTokenLength() {
-        return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
-    }
-    getPromptTokenRatio() {
-        return this.promptParameters.inputParameters.tokenRatio ?? this.promptParameters.tokenRatio ?? DEFAULT_PROMPT_TOKEN_RATIO;
-    }
-    requestUrl() {
-        const generateUrl = handlebars.compile(this.model.url);
-        return generateUrl({ ...this.model, ...this.environmentVariables, ...this.config });
-    }
-    requestParameters(text, parameters, prompt) {
-        // the prompt object will either have a messages property or a prompt propery
-        // or it could be a function that returns prompt text
-        const combinedParameters = { ...this.promptParameters, ...parameters };
+});
-        // if it's a messages prompt, compile the messages and send them directly
-        // to the API - a messages prompt automatically means its a chat-style
-        // conversation
-        if (prompt.messages)
-        {
-            const compiledMessages = prompt.messages.map((message) => {
-                const compileText = handlebars.compile(message.content);
-                return { role: message.role,
-                content: compileText({...combinedParameters, text})
-                }
-            })
+handlebars.registerHelper('toJSON', function(object) {
+    return JSON.stringify(object);
+});
-            return {
-                messages: compiledMessages,
-                temperature: this.temperature ?? 0.7,
-            }
-        }
+class PathwayPrompter {
+    constructor({ config, pathway }) {
-        // otherwise, we need to get the prompt text
-        let promptText;
+        const modelName = pathway.model || config.get('defaultModelName');
+        const model = config.get('models')[modelName];
-        if (typeof (prompt) === 'function') {
-            promptText = prompt(parameters);
-        }
-        else {
-            promptText = prompt.prompt;
+        if (!model) {
+        throw new Exception(`Model ${modelName} not found in config`);
         }
-        const interpolatePrompt = handlebars.compile(promptText);
-        const constructedPrompt = interpolatePrompt({ ...combinedParameters, text });
-        // this prompt could be for either a chat-style conversation or a completion-style
-        // conversation. They require different parameters.
-        let params = {};
+        let plugin;
-        if (this.model.type === 'OPENAI_CHAT') {
-            params = {
-                messages: [ {"role": "user", "content": constructedPrompt} ],
-                temperature: this.temperature ?? 0.7,
-            }
-        } else {
-            params = {
-                prompt: constructedPrompt,
-                max_tokens: this.getModelMaxTokenLength() - encode(constructedPrompt).length - 1,
-                // model: "text-davinci-002",
-                temperature: this.temperature ?? 0.7,
-                // "top_p": 1,
-                // "n": 1,
-                // "presence_penalty": 0,
-                // "frequency_penalty": 0,
-                // "best_of": 1,
-            }
+        switch (model.type) {
+            case 'OPENAI-CHAT':
+                plugin = new OpenAIChatPlugin(config, pathway);
+                break;
+            case 'AZURE-TRANSLATE':
+                plugin = new AzureTranslatePlugin(config, pathway);
+                break;
+            case 'OPENAI-COMPLETION':
+                plugin = new OpenAICompletionPlugin(config, pathway);
+                break;
+            default:
+                throw new Exception(`Unsupported model type: ${model.type}`);
         }
-        return params;
+        this.plugin = plugin;
     }
     async execute(text, parameters, prompt) {
-        const requestParameters = this.requestParameters(text, parameters, prompt);
-        const url = this.requestUrl(text);
-        const params = { ...(this.model.params || {}), ...requestParameters }
-        const headers = this.model.headers || {};
-        const data = await request({ url, params, headers }, this.modelName);
-        const modelInput = params.prompt || params.messages[0].content;
-        console.log(`=== ${this.pathwayName}.${this.requestCount++} ===`)
-        console.log(`\x1b[36m${modelInput}\x1b[0m`)
-        console.log(`\x1b[34m> ${getResponseResult(data)}\x1b[0m`)
-        if (data.error) {
-            throw new Exception(`An error was returned from the server: ${JSON.stringify(data.error)}`);
-        }
-        return getResponseResult(data);
+        return await this.plugin.execute(text, parameters, prompt);
     }
 }
 module.exports = {
-    PathwayPrompter
-}
+  PathwayPrompter
+};

package/graphql/pathwayResolver.js CHANGED Viewed

@@ -8,6 +8,7 @@ const { getFirstNToken, getLastNToken, getSemanticChunks } = require('./chunker'
 const { PathwayResponseParser } = require('./pathwayResponseParser');
 const { Prompt } = require('./prompt');
 const { getv, setv } = require('../lib/keyValueStorageClient');
+const { requestState } = require('./requestState');
 const MAX_PREVIOUS_RESULT_TOKEN_LENGTH = 1000;
@@ -17,9 +18,8 @@ const callPathway = async (config, pathwayName, requestState, { text, ...paramet
 }
 class PathwayResolver {
-    constructor({ config, pathway, requestState }) {
+    constructor({ config, pathway }) {
         this.config = config;
-        this.requestState = requestState;
         this.pathway = pathway;
         this.useInputChunking = pathway.useInputChunking;
         this.chunkMaxTokenLength = 0;
@@ -48,19 +48,71 @@ class PathwayResolver {
         this.pathwayPrompt = pathway.prompt;
     }
-    async resolve(args) {
-        if (args.async) {
-            // Asynchronously process the request
-            this.promptAndParse(args).then((data) => {
-                this.requestState[this.requestId].data = data;
-                pubsub.publish('REQUEST_PROGRESS', {
-                    requestProgress: {
-                        requestId: this.requestId,
-                        data: JSON.stringify(data)
+    async asyncResolve(args) {
+        // Wait with a sleep promise for the race condition to resolve
+        // const results = await Promise.all([this.promptAndParse(args), await new Promise(resolve => setTimeout(resolve, 250))]);
+        const data = await this.promptAndParse(args);
+        // Process the results for async
+        if(args.async || typeof data === 'string') { // if async flag set or processed async and got string response
+            const { completedCount, totalCount } = requestState[this.requestId];
+            requestState[this.requestId].data = data;
+            pubsub.publish('REQUEST_PROGRESS', {
+                requestProgress: {
+                    requestId: this.requestId,
+                    progress: completedCount / totalCount,
+                    data: JSON.stringify(data),
+                }
+            });
+        } else { //stream
+            for (const handle of data) {
+                handle.on('data', data => {
+                    console.log(data.toString());
+                    const lines = data.toString().split('\n').filter(line => line.trim() !== '');
+                    for (const line of lines) {
+                        const message = line.replace(/^data: /, '');
+                        if (message === '[DONE]') {
+                            // Send stream finished message
+                            pubsub.publish('REQUEST_PROGRESS', {
+                                requestProgress: {
+                                    requestId: this.requestId,
+                                    data: null,
+                                    progress: 1,
+                                }
+                            });
+                            return; // Stream finished
+                        }
+                        try {
+                            const parsed = JSON.parse(message);
+                            const result = this.pathwayPrompter.plugin.parseResponse(parsed)
+                            pubsub.publish('REQUEST_PROGRESS', {
+                                requestProgress: {
+                                    requestId: this.requestId,
+                                    data: JSON.stringify(result)
+                                }
+                            });
+                        } catch (error) {
+                            console.error('Could not JSON parse stream message', message, error);
+                        }
                     }
                 });
-            });
+                // data.on('end', () => {
+                //     console.log("stream done");
+                // });
+            }
+        }
+    }
+    async resolve(args) {
+        if (args.async || args.stream) {
+            // Asyncronously process the request
+            // this.asyncResolve(args);
+            if (!requestState[this.requestId]) {
+                requestState[this.requestId] = {}
+            }
+            requestState[this.requestId] = { ...requestState[this.requestId], args, resolver: this.asyncResolve.bind(this) };
             return this.requestId;
         }
         else {
@@ -70,7 +122,6 @@ class PathwayResolver {
     }
     async promptAndParse(args) {
         // Get saved context from contextId or change contextId if needed
         const { contextId } = args;
         this.savedContextId = contextId ? contextId : null;
@@ -98,7 +149,7 @@ class PathwayResolver {
         if (this.pathway.inputChunkSize) {
             chunkMaxChunkTokenLength = Math.min(this.pathway.inputChunkSize, this.chunkMaxTokenLength);
         } else {
-             chunkMaxChunkTokenLength = this.chunkMaxTokenLength;
+            chunkMaxChunkTokenLength = this.chunkMaxTokenLength;
         }
         const encoded = encode(text);
         if (!this.useInputChunking || encoded.length <= chunkMaxChunkTokenLength) { // no chunking, return as is
@@ -106,7 +157,7 @@ class PathwayResolver {
                 const warnText = `Your input is possibly too long, truncating! Text length: ${text.length}`;
                 this.warnings.push(warnText);
                 console.warn(warnText);
-                text = truncate(text, chunkMaxChunkTokenLength);
+                text = this.truncate(text, chunkMaxChunkTokenLength);
             }
             return [text];
         }
@@ -116,7 +167,7 @@ class PathwayResolver {
     }
     truncate(str, n) {
-        if (this.pathwayPrompter.promptParameters.truncateFromFront) {
+        if (this.pathwayPrompter.plugin.promptParameters.truncateFromFront) {
             return getFirstNToken(str, n);
         }
         return getLastNToken(str, n);
@@ -124,7 +175,7 @@ class PathwayResolver {
     async summarizeIfEnabled({ text, ...parameters }) {
         if (this.pathway.useInputSummarization) {
-            return await callPathway(this.config, 'summary', this.requestState, { text, targetLength: 1000, ...parameters });
+            return await callPathway(this.config, 'summary', requestState, { text, targetLength: 1000, ...parameters });
         }
         return text;
     }
@@ -134,7 +185,7 @@ class PathwayResolver {
         // find the longest prompt
         const maxPromptTokenLength = Math.max(...this.prompts.map(({ prompt }) => prompt ? encode(String(prompt)).length : 0));
         const maxMessagesTokenLength = Math.max(...this.prompts.map(({ messages }) => messages ? messages.reduce((acc, {role, content}) => {
-            return acc + encode(role).length + encode(content).length;
+            return (role && content) ? acc + encode(role).length + encode(content).length : acc;
         }, 0) : 0));
         const maxTokenLength = Math.max(maxPromptTokenLength, maxMessagesTokenLength);
@@ -144,8 +195,8 @@ class PathwayResolver {
         // the token ratio is the ratio of the total prompt to the result text - both have to be included
         // in computing the max token length
-        const promptRatio = this.pathwayPrompter.getPromptTokenRatio();
-        let maxChunkToken = promptRatio * this.pathwayPrompter.getModelMaxTokenLength() - maxTokenLength;
+        const promptRatio = this.pathwayPrompter.plugin.getPromptTokenRatio();
+        let maxChunkToken = promptRatio * this.pathwayPrompter.plugin.getModelMaxTokenLength() - maxTokenLength;
         // if we have to deal with prompts that have both text input
         // and previous result, we need to split the maxChunkToken in half
@@ -160,18 +211,25 @@ class PathwayResolver {
     // Process the request and return the result
     async processRequest({ text, ...parameters }) {
         text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled
         const chunks = this.processInputText(text);
         const anticipatedRequestCount = chunks.length * this.prompts.length;
-        if ((this.requestState[this.requestId] || {}).canceled) {
+        if ((requestState[this.requestId] || {}).canceled) {
             throw new Error('Request canceled');
         }
         // Store the request state
-        this.requestState[this.requestId] = { totalCount: anticipatedRequestCount, completedCount: 0 };
+        requestState[this.requestId] = { ...requestState[this.requestId], totalCount: anticipatedRequestCount, completedCount: 0 };
+        if (chunks.length > 1) {
+            // stream behaves as async if there are multiple chunks
+            if (parameters.stream) {
+                parameters.async = true;
+                parameters.stream = false;
+            }
+        }
         // If pre information is needed, apply current prompt with previous prompt info, only parallelize current call
         if (this.pathway.useParallelChunkProcessing) {
@@ -189,17 +247,31 @@ class PathwayResolver {
             let result = '';
             for (let i = 0; i < this.prompts.length; i++) {
+                const currentParameters = { ...parameters, previousResult };
+                if (currentParameters.stream) { // stream special flow
+                    if (i < this.prompts.length - 1) {
+                        currentParameters.stream = false; // if not the last prompt then don't stream
+                    }
+                    else {
+                        // use the stream parameter if not async
+                        currentParameters.stream = currentParameters.async ? false : currentParameters.stream;
+                    }
+                }
                 // If the prompt doesn't contain {{text}} then we can skip the chunking, and also give that token space to the previous result
                 if (!this.prompts[i].usesTextInput) {
                     // Limit context to it's N + text's characters
                     previousResult = this.truncate(previousResult, 2 * this.chunkMaxTokenLength);
-                    result = await this.applyPrompt(this.prompts[i], null, { ...parameters, previousResult });
+                    result = await this.applyPrompt(this.prompts[i], null, currentParameters);
                 } else {
                     // Limit context to N characters
                     previousResult = this.truncate(previousResult, this.chunkMaxTokenLength);
                     result = await Promise.all(chunks.map(chunk =>
-                        this.applyPrompt(this.prompts[i], chunk, { ...parameters, previousResult })));
-                    result = result.join("\n\n")
+                        this.applyPrompt(this.prompts[i], chunk, currentParameters)));
+                    if (!currentParameters.stream) {
+                        result = result.join("\n\n")
+                    }
                 }
                 // If this is any prompt other than the last, use the result as the previous context
@@ -225,20 +297,22 @@ class PathwayResolver {
     }
     async applyPrompt(prompt, text, parameters) {
-        if (this.requestState[this.requestId].canceled) {
+        if (requestState[this.requestId].canceled) {
             return;
         }
         const result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt);
-        this.requestState[this.requestId].completedCount++;
+        requestState[this.requestId].completedCount++;
-        const { completedCount, totalCount } = this.requestState[this.requestId];
+        const { completedCount, totalCount } = requestState[this.requestId];
-        pubsub.publish('REQUEST_PROGRESS', {
-            requestProgress: {
-                requestId: this.requestId,
-                progress: completedCount / totalCount,
-            }
-        });
+        if (completedCount < totalCount) {
+            pubsub.publish('REQUEST_PROGRESS', {
+                requestProgress: {
+                    requestId: this.requestId,
+                    progress: completedCount / totalCount,
+                }
+            });
+        }
         if (prompt.saveResultTo) {
             this.savedContext[prompt.saveResultTo] = result;

package/graphql/plugins/azureTranslatePlugin.js ADDED Viewed

@@ -0,0 +1,42 @@
+// AzureTranslatePlugin.js
+const ModelPlugin = require('./modelPlugin');
+const handlebars = require("handlebars");
+class AzureTranslatePlugin extends ModelPlugin {
+    constructor(config, modelName, pathway) {
+        super(config, modelName, pathway);
+    }
+    // Set up parameters specific to the Azure Translate API
+    requestParameters(text, parameters, prompt) {
+        const combinedParameters = { ...this.promptParameters, ...parameters };
+        const modelPrompt = this.getModelPrompt(prompt, parameters);
+        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
+        return {
+            data: [
+                {
+                Text: modelPromptText,
+                },
+            ],
+            params: {
+                to: combinedParameters.to
+            }
+        };
+    }
+    // Execute the request to the Azure Translate API
+    async execute(text, parameters, prompt) {
+        const requestParameters = this.requestParameters(text, parameters, prompt);
+        const url = this.requestUrl(text);
+        const data = requestParameters.data;
+        const params = requestParameters.params;
+        const headers = this.model.headers || {};
+        return this.executeRequest(url, data, params, headers);
+    }
+}
+module.exports = AzureTranslatePlugin;

package/graphql/plugins/modelPlugin.js ADDED Viewed

@@ -0,0 +1,164 @@
+// ModelPlugin.js
+const handlebars = require('handlebars');
+const { request } = require("../../lib/request");
+const { encode } = require("gpt-3-encoder");
+const DEFAULT_MAX_TOKENS = 4096;
+const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
+class ModelPlugin {
+    constructor(config, pathway) {
+        // If the pathway specifies a model, use that, otherwise use the default
+        this.modelName = pathway.model || config.get('defaultModelName');
+        // Get the model from the config
+        this.model = config.get('models')[this.modelName];
+        // If the model doesn't exist, throw an exception
+        if (!this.model) {
+            throw new Error(`Model ${this.modelName} not found in config`);
+        }
+        this.config = config;
+        this.environmentVariables = config.getEnv();
+        this.temperature = pathway.temperature;
+        this.pathwayPrompt = pathway.prompt;
+        this.pathwayName = pathway.name;
+        this.promptParameters = {};
+        // Make all of the parameters defined on the pathway itself available to the prompt
+        for (const [k, v] of Object.entries(pathway)) {
+            this.promptParameters[k] = v.default ?? v;
+        }
+        if (pathway.inputParameters) {
+            for (const [k, v] of Object.entries(pathway.inputParameters)) {
+                this.promptParameters[k] = v.default ?? v;
+            }
+        }
+        this.requestCount = 1;
+        this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
+    }
+    getModelMaxTokenLength() {
+        return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
+    }
+    getPromptTokenRatio() {
+        // TODO: Is this the right order of precedence? inputParameters should maybe be second?
+        return this.promptParameters.inputParameters.tokenRatio ?? this.promptParameters.tokenRatio ?? DEFAULT_PROMPT_TOKEN_RATIO;
+    }
+    getModelPrompt(prompt, parameters) {
+        if (typeof(prompt) === 'function') {
+        return prompt(parameters);
+        } else {
+        return prompt;
+        }
+    }
+    getModelPromptMessages(modelPrompt, combinedParameters, text) {
+        if (!modelPrompt.messages) {
+            return null;
+        }
+        // First run handlebars compile on the pathway messages
+        const compiledMessages = modelPrompt.messages.map((message) => {
+            if (message.content) {
+                const compileText = handlebars.compile(message.content);
+                return {
+                    role: message.role,
+                    content: compileText({ ...combinedParameters, text }),
+                };
+            } else {
+                return message;
+            }
+        });
+        // Next add in any parameters that are referenced by name in the array
+        const expandedMessages = compiledMessages.flatMap((message) => {
+            if (typeof message === 'string') {
+                const match = message.match(/{{(.+?)}}/);
+                const placeholder = match ? match[1] : null;
+                if (placeholder === null) {
+                    return message;
+                } else {
+                    return combinedParameters[placeholder] || [];
+                }
+            } else {
+                return [message];
+            }
+        });
+        return expandedMessages;
+    }
+    requestUrl() {
+        const generateUrl = handlebars.compile(this.model.url);
+        return generateUrl({ ...this.model, ...this.environmentVariables, ...this.config });
+    }
+    //simples form string single or list return
+    parseResponse(data) {
+        const { choices } = data;
+        if (!choices || !choices.length) {
+            if (Array.isArray(data) && data.length > 0 && data[0].translations) {
+                return data[0].translations[0].text.trim();
+            } else {
+                return data;
+            }
+        }
+        // if we got a choices array back with more than one choice, return the whole array
+        if (choices.length > 1) {
+            return choices;
+        }
+        // otherwise, return the first choice
+        const textResult = choices[0].text && choices[0].text.trim();
+        const messageResult = choices[0].message && choices[0].message.content && choices[0].message.content.trim();
+        return messageResult ?? textResult ?? null;
+    }
+    logMessagePreview(messages) {
+        messages.forEach((message, index) => {
+            const words = message.content.split(" ");
+            const tokenCount = encode(message.content).length;
+            let preview;
+            if (index === 0) {
+                preview = message.content;
+            } else {
+                preview = words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
+            }
+            console.log(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
+        });
+    }
+    async executeRequest(url, data, params, headers) {
+        const responseData = await request({ url, data, params, headers, cache: this.shouldCache }, this.modelName);
+        const modelInput = data.prompt || (data.messages && data.messages[0].content) || data[0].Text || null;
+        console.log(`=== ${this.pathwayName}.${this.requestCount++} ===`);
+        if (data.messages && data.messages.length > 1) {
+            this.logMessagePreview(data.messages);
+        } else {
+            console.log(`\x1b[36m${modelInput}\x1b[0m`);
+        }
+        console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
+        if (responseData.error) {
+            throw new Exception(`An error was returned from the server: ${JSON.stringify(responseData.error)}`);
+        }
+        return this.parseResponse(responseData);
+    }
+}
+module.exports = ModelPlugin;

package/graphql/plugins/openAiChatPlugin.js ADDED Viewed

@@ -0,0 +1,38 @@
+// OpenAIChatPlugin.js
+const ModelPlugin = require('./modelPlugin');
+const handlebars = require("handlebars");
+class OpenAIChatPlugin extends ModelPlugin {
+    constructor(config, pathway) {
+        super(config, pathway);
+    }
+    // Set up parameters specific to the OpenAI Chat API
+    requestParameters(text, parameters, prompt) {
+        const combinedParameters = { ...this.promptParameters, ...parameters };
+        const modelPrompt = this.getModelPrompt(prompt, parameters);
+        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
+        const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
+        const { stream } = parameters;
+        return {
+            messages: modelPromptMessages || [{ "role": "user", "content": modelPromptText }],
+            temperature: this.temperature ?? 0.7,
+            stream
+        };
+    }
+    // Execute the request to the OpenAI Chat API
+    async execute(text, parameters, prompt) {
+        const url = this.requestUrl(text);
+        const requestParameters = this.requestParameters(text, parameters, prompt);
+        const data = { ...(this.model.params || {}), ...requestParameters };
+        const params = {};
+        const headers = this.model.headers || {};
+        return this.executeRequest(url, data, params, headers);
+    }
+}
+module.exports = OpenAIChatPlugin;

package/graphql/plugins/openAiCompletionPlugin.js ADDED Viewed

@@ -0,0 +1,69 @@
+// OpenAICompletionPlugin.js
+const ModelPlugin = require('./modelPlugin');
+const handlebars = require("handlebars");
+const { encode } = require("gpt-3-encoder");
+//convert a messages array to a simple chatML format
+const messagesToChatML = (messages) => {
+    let output = "";
+    if (messages && messages.length) {
+        for (let message of messages) {
+            output += (message.role && message.content) ? `<|im_start|>${message.role}\n${message.content}\n<|im_end|>\n` : `${message}\n`;
+        }
+        // you always want the assistant to respond next so add a
+        // directive for that
+        output += "<|im_start|>assistant\n";
+    }
+    return output;
+}
+class OpenAICompletionPlugin extends ModelPlugin {
+    constructor(config, pathway) {
+        super(config, pathway);
+    }
+    // Set up parameters specific to the OpenAI Completion API
+    requestParameters(text, parameters, prompt) {
+        const combinedParameters = { ...this.promptParameters, ...parameters };
+        const modelPrompt = this.getModelPrompt(prompt, parameters);
+        const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
+        const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
+        const modelPromptMessagesML = messagesToChatML(modelPromptMessages);
+        const { stream } = parameters;
+        if (modelPromptMessagesML) {
+        return {
+            prompt: modelPromptMessagesML,
+            max_tokens: this.getModelMaxTokenLength() - encode(modelPromptMessagesML).length - 1,
+            temperature: this.temperature ?? 0.7,
+            top_p: 0.95,
+            frequency_penalty: 0,
+            presence_penalty: 0,
+            stop: ["<|im_end|>"],
+            stream
+        };
+        } else {
+        return {
+            prompt: modelPromptText,
+            max_tokens: this.getModelMaxTokenLength() - encode(modelPromptText).length - 1,
+            temperature: this.temperature ?? 0.7,
+            stream
+        };
+        }
+    }
+    // Execute the request to the OpenAI Completion API
+    async execute(text, parameters, prompt) {
+        const url = this.requestUrl(text);
+        const requestParameters = this.requestParameters(text, parameters, prompt);
+        const data = { ...(this.model.params || {}), ...requestParameters };
+        const params = {};
+        const headers = this.model.headers || {};
+        return this.executeRequest(url, data, params, headers);
+    }
+}
+module.exports = OpenAICompletionPlugin;

package/graphql/prompt.js CHANGED Viewed

@@ -25,7 +25,7 @@ function promptContains(variable, prompt) {
     // if it's an array, it's the messages format
     if (Array.isArray(prompt)) {
       prompt.forEach(p => {
-        while ((match = p.content && regexp.exec(p.content)) !== null) {
+        while (match = p.content && regexp.exec(p.content)) {
           matches.push(match[1]);
         }
       });

package/graphql/requestState.js ADDED Viewed

@@ -0,0 +1,5 @@
+const requestState = {}; // Stores the state of each request
+module.exports = {
+    requestState
+};

package/graphql/resolver.js CHANGED Viewed

@@ -5,10 +5,10 @@ const { PathwayResolver } = require("./pathwayResolver");
 // (parent, args, contextValue, info)
 const rootResolver = async (parent, args, contextValue, info) => {
     const { config, pathway, requestState } = contextValue;
-    const { temperature } = pathway;
+    const { temperature, enableGraphqlCache } = pathway;
-    // Turn off caching if temperature is 0
-    if (temperature == 0) {
+    // Turn on graphql caching if enableGraphqlCache true and temperature is 0
+    if (enableGraphqlCache && temperature == 0) { // ||
         info.cacheControl.setCacheHint({ maxAge: 60 * 60 * 24, scope: 'PUBLIC' });
     }
@@ -16,7 +16,7 @@ const rootResolver = async (parent, args, contextValue, info) => {
     contextValue.pathwayResolver = pathwayResolver;
     // Add request parameters back as debug
-    const requestParameters = pathwayResolver.prompts.map((prompt) => pathwayResolver.pathwayPrompter.requestParameters(args.text, args, prompt));
+    const requestParameters = pathwayResolver.prompts.map((prompt) => pathwayResolver.pathwayPrompter.plugin.requestParameters(args.text, args, prompt));
     const debug = JSON.stringify(requestParameters);
     // Execute the request with timeout

package/graphql/subscriptions.js CHANGED Viewed

@@ -4,14 +4,27 @@
 const pubsub = require("./pubsub");
 const { withFilter } = require("graphql-subscriptions");
+const { requestState } = require("./requestState");
 const subscriptions = {
     requestProgress: {
         subscribe: withFilter(
-            () => pubsub.asyncIterator(['REQUEST_PROGRESS']),
+            (_, args, __, info) => {
+                const { requestIds } = args;
+                for (const requestId of requestIds) {
+                    if (!requestState[requestId]) {
+                        console.log(`requestProgress, requestId: ${requestId} not found`);
+                    } else {
+                        console.log(`starting async requestProgress, requestId: ${requestId}`);
+                        const { resolver, args } = requestState[requestId];
+                        resolver(args);
+                    }
+                }
+                return pubsub.asyncIterator(['REQUEST_PROGRESS'])
+            },
             (payload, variables) => {
                 return (
-                    payload.requestProgress.requestId === variables.requestId
+                    variables.requestIds.includes(payload.requestProgress.requestId)
                 );
             },
         ),

package/graphql/typeDef.js CHANGED Viewed

@@ -12,10 +12,11 @@ const typeDef = (pathway) => {
     const fieldsStr = !fields ? `` : fields.map(f => `${f}: String`).join('\n    ');
     const typeName = fields ? `${objName}Result` : `String`;
+    const messageType = `input Message { role: String, content: String }`;
     const type = fields ? `type ${typeName} {
     ${fieldsStr}
-}` : ``;
+    }` : ``;
     const resultStr = pathway.list ? `[${typeName}]` : typeName;
@@ -29,18 +30,21 @@ const typeDef = (pathway) => {
     const params = { ...defaultInputParameters, ...inputParameters };
-    const paramsStr = Object.entries(params).map(
-        ([key, value]) => `${key}: ${GRAPHQL_TYPE_MAP[typeof (value)]} = ${typeof (value) == `string` ? `"${value}"` : value}`).join('\n');
-    return `${type}
-${responseType}
-extend type Query {
-    ${name}(${paramsStr}): ${objName}
-}
-`;
+    const paramsStr = Object.entries(params).map(
+        ([key, value]) => {
+            if (typeof value === 'object' && Array.isArray(value)) {
+                return `${key}: [Message] = []`;
+            } else {
+                return `${key}: ${GRAPHQL_TYPE_MAP[typeof (value)]} = ${typeof (value) === 'string' ? `"${value}"` : value}`;
+            }
+        }
+        ).join('\n');
+    const definition = `${messageType}\n\n${type}\n\n${responseType}\n\nextend type Query {${name}(${paramsStr}): ${objName}}`;
+    //console.log(definition);
+    return definition;
 }
 module.exports = {

package/lib/request.js CHANGED Viewed

@@ -1,34 +1,88 @@
-const axios = require('axios');
 const Bottleneck = require("bottleneck/es5");
+const RequestMonitor = require('./requestMonitor');
+const { config } = require('../config');
+let axios = require('axios');
+if (config.get('enableCache')) {
+    // Setup cache
+    const { setupCache } = require('axios-cache-interceptor');
+    axios = setupCache(axios, {
+        // enable cache for all requests by default
+        methods: ['get', 'post', 'put', 'delete', 'patch'],
+        interpretHeader: false,
+        ttl: 1000 * 60 * 60 * 24 * 7, // 7 days
+    });
+}
 const limiters = {};
+const monitors = {};
 const buildLimiters = (config) => {
     console.log('Building limiters...');
     for (const [name, model] of Object.entries(config.get('models'))) {
+        const rps = model.requestsPerSecond ?? 100;
         limiters[name] = new Bottleneck({
-            minTime: 1000 / (model.requestsPerSecond ?? 100),
-            // maxConcurrent: 20,
-        })
+            minTime: 1000 / rps,
+            maxConcurrent: rps,
+            reservoir: rps,      // Number of tokens available initially
+            reservoirRefreshAmount: rps,     // Number of tokens added per interval
+            reservoirRefreshInterval: 1000, // Interval in milliseconds
+        });
+        monitors[name] = new RequestMonitor();
+    }
+}
+setInterval(() => {
+    const monitorKeys = Object.keys(monitors);
+    // Skip logging if the monitors object does not exist or is empty
+    if (!monitorKeys || monitorKeys.length === 0) {
+      return;
     }
+    monitorKeys.forEach((monitorName) => {
+        const monitor = monitors[monitorName];
+        const callRate = monitor.getPeakCallRate();
+        const error429Rate = monitor.getError429Rate();
+        if (callRate > 0) {
+            console.log('------------------------');
+            console.log(`${monitorName} Call rate: ${callRate} calls/sec, 429 errors: ${error429Rate * 100}%`);
+            console.log('------------------------');
+            // Reset the rate monitor to start a new monitoring interval.
+            monitor.reset();
+        }
+    });
+  }, 10000); // Log rates every 10 seconds (10000 ms).
+const postWithMonitor = async (model, url, data, axiosConfigObj) => {
+    const monitor = monitors[model];
+    monitor.incrementCallCount();
+    return axios.post(url, data, axiosConfigObj);
 }
 const MAX_RETRY = 10;
-const postRequest = async ({ url, params, headers }, model) => {
+const postRequest = async ({ url, data, params, headers, cache }, model) => {
     let retry = 0;
     const errors = []
     for (let i = 0; i < MAX_RETRY; i++) {
         try {
             if (i > 0) {
-                console.log(`Retrying request #retry ${i}: ${JSON.stringify(params)}...`);
+                console.log(`Retrying request #retry ${i}: ${JSON.stringify(data)}...`);
                 await new Promise(r => setTimeout(r, 200 * Math.pow(2, i))); // exponential backoff
-            }
+            }
             if (!limiters[model]) {
                 throw new Error(`No limiter for model ${model}!`);
             }
-            return await limiters[model].schedule(() => axios.post(url, params, { headers }));
+            const axiosConfigObj = { params, headers, cache };
+            if (params.stream || data.stream) {
+                axiosConfigObj.responseType = 'stream';
+            }
+            return await limiters[model].schedule(() => postWithMonitor(model, url, data, axiosConfigObj));
         } catch (e) {
-            console.error(`Failed request with params ${JSON.stringify(params)}: ${e}`);
+            console.error(`Failed request with data ${JSON.stringify(data)}: ${e}`);
+            if (e.response.status === 429) {
+                monitors[model].incrementError429Count();
+            }
             errors.push(e);
         }
     }
@@ -37,7 +91,10 @@ const postRequest = async ({ url, params, headers }, model) => {
 const request = async (params, model) => {
     const response = await postRequest(params, model);
-    const { error, data } = response;
+    const { error, data, cached } = response;
+    if (cached) {
+        console.info('/Request served with cached response.');
+    }
     if (error && error.length > 0) {
         const lastError = error[error.length - 1];
         return { error: lastError.toJSON() ?? lastError ?? error };

package/lib/requestMonitor.js ADDED Viewed

@@ -0,0 +1,43 @@
+class RequestMonitor {
+    constructor() {
+      this.callCount = 0;
+      this.peakCallRate = 0;
+      this.error429Count = 0;
+      this.startTime = new Date();
+    }
+    incrementCallCount() {
+      this.callCount++;
+      if (this.getCallRate() > this.peakCallRate) {
+        this.peakCallRate = this.getCallRate();
+      }
+    }
+    incrementError429Count() {
+      this.error429Count++;
+    }
+    getCallRate() {
+      const currentTime = new Date();
+      const timeElapsed = (currentTime - this.startTime) / 1000; // time elapsed in seconds
+      return timeElapsed < 1 ? this.callCount : this.callCount / timeElapsed;
+    }
+    getPeakCallRate() {
+      return this.peakCallRate;
+    }
+    getError429Rate() {
+      return this.error429Count / this.callCount;
+    }
+    reset() {
+      this.callCount = 0;
+      this.error429Count = 0;
+      this.peakCallRate = 0;
+      this.startTime = new Date();
+    }
+  }
+  module.exports = RequestMonitor;

package/package.json CHANGED Viewed

@@ -1,14 +1,22 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "0.0.4",
-  "description": "Project Cortex",
+  "version": "0.0.6",
+  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "repository": {
     "type": "git",
     "url": "git+https://github.com/aj-archipelago/cortex.git"
   },
   "keywords": [
     "cortex",
-    "ai"
+    "AI",
+    "prompt engineering",
+    "LLM",
+    "OpenAI",
+    "Azure",
+    "GPT-3",
+    "GPT-4",
+    "chatGPT",
+    "GraphQL"
   ],
   "main": "index.js",
   "scripts": {
@@ -22,11 +30,12 @@
     "@apollo/utils.keyvadapter": "^1.1.2",
     "@graphql-tools/schema": "^9.0.12",
     "@keyv/redis": "^2.5.4",
-    "apollo-server": "^3.11.1",
+    "apollo-server": "^3.12.0",
     "apollo-server-core": "^3.11.1",
     "apollo-server-express": "^3.11.1",
     "apollo-server-plugin-response-cache": "^3.8.1",
-    "axios": "^1.2.0",
+    "axios": "^1.3.4",
+    "axios-cache-interceptor": "^1.0.1",
     "bottleneck": "^2.19.5",
     "compromise": "^14.8.1",
     "compromise-paragraphs": "^0.1.0",

package/pathways/basePathway.js CHANGED Viewed

@@ -1,4 +1,3 @@
-const { parseResponse } = require("../graphql/parser");
 const { rootResolver, resolver } = require("../graphql/resolver");
 const { typeDef } = require('../graphql/typeDef')
@@ -7,9 +6,9 @@ module.exports = {
     prompt: `{{text}}`,
     defaultInputParameters: {
         text: ``,
-        // Add the option of making every call async
-        async: false,
-        contextId : ``, // used to identify the context of the request
+        async: false, // switch to enable async mode
+        contextId: ``, // used to identify the context of the request,
+        stream: false, // switch to enable stream mode
     },
     inputParameters: {},
     typeDef,
@@ -19,5 +18,5 @@ module.exports = {
     useParallelChunkProcessing: false,
     useInputSummarization: false,
     truncateFromFront: false,
-    timeout: 60, // in seconds
+    timeout: 120, // in seconds
 }

package/pathways/bias.js CHANGED Viewed

@@ -1,3 +1,4 @@
 module.exports = {
+    // enableCache: true,
     prompt: `{{text}}\n\nIs the above text written objectively?  Why or why not, explain with details:\n`
 }

package/pathways/paraphrase.js CHANGED Viewed

@@ -1,3 +1,3 @@
 module.exports = {
-    prompt: `Rewrite the following:\n\n{{text}}`
+    prompt: `Rewrite the following:\n\n{{{text}}}`
 }

package/pathways/translate.js CHANGED Viewed

@@ -6,4 +6,5 @@ module.exports = {
     inputParameters: {
         to: `Arabic`,
     },
+    timeout: 300, // in seconds
 }

package/tests/chunking.test.js CHANGED Viewed

@@ -4,6 +4,11 @@ jest.setTimeout(1800000);
 const testServer = getTestServer();
+//stop server after all tests
+afterAll(async () => {
+    await testServer.stop();
+});
 it('chunking test of translate endpoint with huge text', async () => {
     const response = await testServer.executeOperation({
         query: 'query translate($text: String!) { translate(text: $text) { result } }',

package/tests/main.test.js CHANGED Viewed

@@ -14,6 +14,11 @@ const getTestServer = () => {
 const testServer = getTestServer();
+//stop server after all tests
+afterAll(async () => {
+    await testServer.stop();
+});
 it('validates bias endpoint', async () => {
     const response = await testServer.executeOperation({
         query: 'query bias($text: String!) { bias(text: $text) { result } }',

package/tests/translate.test.js CHANGED Viewed

@@ -4,6 +4,11 @@ jest.setTimeout(1800000);
 const testServer = getTestServer();
+//stop server after all tests
+afterAll(async () => {
+    await testServer.stop();
+});
 it('test translate endpoint with huge arabic text english translation and check return non-arabic/english', async () => {
     const response = await testServer.executeOperation({
         query: 'query translate($text: String!, $to:String) { translate(text: $text, to:$to) { result } }',