npm - @aj-archipelago/cortex - Versions diffs - 1.0.1 → 1.0.2 - Mend

@aj-archipelago/cortex 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.eslintignore +30 -0
package/.eslintrc +31 -0
package/README.md +10 -0
package/config/default.example.json +70 -0
package/config.js +0 -6
package/graphql/chunker.js +1 -1
package/graphql/graphql.js +1 -1
package/graphql/parser.js +1 -0
package/graphql/pathwayPrompter.js +6 -2
package/graphql/pathwayResolver.js +0 -2
package/graphql/plugins/localModelPlugin.js +54 -5
package/graphql/plugins/modelPlugin.js +2 -2
package/graphql/prompt.js +1 -0
package/graphql/resolver.js +2 -2
package/graphql/subscriptions.js +1 -1
package/lib/fileChunker.js +0 -13
package/lib/request.js +0 -1
package/package.json +6 -2
package/pathways/lc_test.mjs +9 -5
package/pathways/summary.js +1 -1

package/.eslintignore ADDED Viewed

@@ -0,0 +1,30 @@
+# Ignore build artifacts
+/dist
+/build
+# Ignore node_modules
+/node_modules
+# Ignore log files
+*.log
+# Ignore any config files
+.env
+.env.*
+# Ignore coverage reports
+/coverage
+# Ignore documentation
+/docs
+# Ignore any generated or bundled files
+*.min.js
+*.bundle.js
+# Ignore any files generated by your IDE or text editor
+.idea/
+.vscode/
+*.sublime-*
+*.iml
+*.swp

package/.eslintrc ADDED Viewed

@@ -0,0 +1,31 @@
+{
+    "env": {
+      "browser": true,
+      "es2021": true,
+        "node": true
+    },
+    "extends": [
+      "eslint:recommended"
+    ],
+    "parserOptions": {
+      "ecmaVersion": 12,
+      "sourceType": "module"
+    },
+    "plugins": [
+      "import"
+    ],
+    "rules": {
+        "import/no-unresolved": "error",
+        "import/no-extraneous-dependencies": ["error", {"devDependencies": true}],
+        "no-unused-vars": ["error", { "argsIgnorePattern": "^_" }]
+    },
+    "settings": {
+        "import/resolver": {
+          "node": {
+            "extensions": [".js"],
+            "moduleDirectory": ["node_modules", "src"]
+          }
+        },
+        "import/core-modules": ["ava"]
+    }
+  }

package/README.md CHANGED Viewed

@@ -2,6 +2,16 @@
 Cortex simplifies and accelerates the process of creating applications that harness the power of modern AI models like chatGPT and GPT-4 by providing a structured interface (GraphQL or REST) to a powerful prompt execution environment. This enables complex augmented prompting and abstracts away most of the complexity of managing model connections like chunking input, rate limiting, formatting output, caching, and handling errors.
 ## Why build Cortex?
 Modern AI models are transformational, but a number of complexities emerge when developers start using them to deliver application-ready functions. Most models require precisely formatted, carefully engineered and sequenced prompts to produce consistent results, and the responses are typically largely unstructured text without validation or formatting. Additionally, these models are evolving rapidly, are typically costly and slow to query and implement hard request size and rate restrictions that need to be carefully navigated for optimum throughput. Cortex offers a solution to these problems and provides a simple and extensible package for interacting with NL AI models.
+## Okay, but what can I really do with this thing?
+Yikes.  Everything! It's kind of an LLM swiss army knife.  Here are some ideas:
+* Create custom chat agents with memory and personalization and then expose it to a bunch of different UIs (custom chat portals, Slack, teams, etc. - anything that can speak to a REST or GraphQL endpoint)
+* Create custom coding assistants (code generation, code reviews, test writing, AI pair programming) and easily integrate them with your existing editing tools.
+* Create powerful AI editing tools (copy editing, paraphrasing, summarization, etc.) tools for your company and then integrate them with your existing workflow tools without having to build all the LLM-handling logic into those tools.
+* Make LLM chains and agents from LangChain.js available via scalable REST or GraphQL endpoints.
+* Put a REST or GraphQL front end on your locally-run models (e.g. llama.cpp) and use them in concert with other tools.
+* The sky is the limit!
 ## Features
 * Simple architecture to build custom functional endpoints (called `pathways`), that implement common NL AI tasks. Default pathways include chat, summarization, translation, paraphrasing, completion, spelling and grammar correction, entity extraction, sentiment analysis, and bias analysis.

package/config/default.example.json ADDED Viewed

@@ -0,0 +1,70 @@
+{
+    "defaultModelName": "oai-td3",
+    "models": {
+        "azure-translate": {
+            "type": "AZURE-TRANSLATE",
+            "url": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
+            "headers": {
+                "Ocp-Apim-Subscription-Key": "{{ARCHIPELAGO_TRANSLATE_KEY}}",
+                "Ocp-Apim-Subscription-Region": "eastus",
+                "Content-Type": "application/json"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 2000
+        },
+        "oai-td3": {
+            "type": "OPENAI-COMPLETION",
+            "url": "https://api.openai.com/v1/completions",
+            "headers": {
+                "Authorization": "Bearer {{OPENAI_API_KEY}}",
+                "Content-Type": "application/json"
+            },
+            "params": {
+                "model": "text-davinci-003"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 4096
+        },
+        "oai-gpturbo": {
+            "type": "OPENAI-CHAT",
+            "url": "https://api.openai.com/v1/chat/completions",
+            "headers": {
+                "Authorization": "Bearer {{OPENAI_API_KEY}}",
+                "Content-Type": "application/json"
+            },
+            "params": {
+                "model": "gpt-3.5-turbo"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 8192
+        },
+        "oai-gpt4": {
+            "type": "OPENAI-CHAT",
+            "url": "https://api.openai.com/v1/chat/completions",
+            "headers": {
+                "Authorization": "Bearer {{OPENAI_API_KEY}}",
+                "Content-Type": "application/json"
+            },
+            "params": {
+                "model": "gpt-4"
+            },
+            "requestsPerSecond": 10,
+            "maxTokenLength": 8192
+        },
+        "local-llama13B": {
+            "type": "LOCAL-CPP-MODEL",
+            "executablePath": "../llm/llama.cpp/main",
+            "args": [
+                "-m", "../llm/llama.cpp/models/13B/ggml-model-q4_0.bin",
+                "--repeat_penalty", "1.0",
+                "--keep", "0",
+                "-t", "8",
+                "--mlock"
+            ],
+            "requestsPerSecond": 10,
+            "maxTokenLength": 1024
+        }
+    },
+    "enableCache": false,
+    "enableRestEndpoints": false
+}

package/config.js CHANGED Viewed

@@ -110,12 +110,6 @@ var config = convict({
         default: null,
         env: 'CORTEX_CONFIG_FILE'
     },
-    serpApiKey: {
-        format: String,
-        default: null,
-        env: 'SERPAPI_API_KEY',
-        sensitive: true
-    },
 });
 // Read in environment variables and set up service configuration

package/graphql/chunker.js CHANGED Viewed

@@ -43,7 +43,7 @@ const getSemanticChunks = (text, chunkSize) => {
   };
   const breakByParagraphs = (str) => breakByRegex(str, /[\r\n]+/, true);
-  const breakBySentences = (str) => breakByRegex(str, /(?<=[.。؟！\?!\n])\s+/, true);
+  const breakBySentences = (str) => breakByRegex(str, /(?<=[.。؟！?!\n])\s+/, true);
   const breakByWords = (str) => breakByRegex(str, /(\s,;:.+)/);
   const createChunks = (tokens) => {

package/graphql/graphql.js CHANGED Viewed

@@ -48,7 +48,7 @@ const buildRestEndpoints = (pathways, app, server, config) => {
     app.post(`/rest/${name}`, async (req, res) => {
       const variables = fieldVariableDefs.reduce((acc, variableDef) => {
-        if (req.body.hasOwnProperty(variableDef.name)) {
+        if (Object.prototype.hasOwnProperty.call(req.body, variableDef.name)) {
           acc[variableDef.name] = req.body[variableDef.name];
         }
         return acc;

package/graphql/parser.js CHANGED Viewed

@@ -6,6 +6,7 @@ const regexParser = (text, regex) => {
 // parse numbered list text format into list
 // this supports most common numbered list returns like "1.", "1)", "1-"
 const parseNumberedList = (str) => {
+    // eslint-disable-next-line no-useless-escape
     return regexParser(str, /^\s*[\[\{\(]*\d+[\s.=\-:,;\]\)\}]/gm);
 }

package/graphql/pathwayPrompter.js CHANGED Viewed

@@ -1,8 +1,9 @@
 // PathwayPrompter.js
-import OpenAIChatPlugin from './plugins/openAIChatPlugin.js';
-import OpenAICompletionPlugin from './plugins/openAICompletionPlugin.js';
+import OpenAIChatPlugin from './plugins/openAiChatPlugin.js';
+import OpenAICompletionPlugin from './plugins/openAiCompletionPlugin.js';
 import AzureTranslatePlugin from './plugins/azureTranslatePlugin.js';
 import OpenAIWhisperPlugin from './plugins/openAiWhisperPlugin.js';
+import LocalModelPlugin from './plugins/localModelPlugin.js';
 import handlebars from 'handlebars';
 // register functions that can be called directly in the prompt markdown
@@ -44,6 +45,9 @@ class PathwayPrompter {
             case 'OPENAI_WHISPER':
                 plugin = new OpenAIWhisperPlugin(config, pathway);
                 break;
+            case 'LOCAL-CPP-MODEL':
+                plugin = new LocalModelPlugin(config, pathway);
+                break;
             default:
                 throw new handlebars.Exception(`Unsupported model type: ${model.type}`);
         }

package/graphql/pathwayResolver.js CHANGED Viewed

@@ -8,8 +8,6 @@ import { Prompt } from './prompt.js';
 import { getv, setv } from '../lib/keyValueStorageClient.js';
 import { requestState } from './requestState.js';
-const MAX_PREVIOUS_RESULT_TOKEN_LENGTH = 1000;
 const callPathway = async (config, pathwayName, args, requestState, { text, ...parameters }) => {
     const pathwayResolver = new PathwayResolver({ config, pathway: config.get(`pathways.${pathwayName}`), args, requestState });
     return await pathwayResolver.resolve({ text, ...parameters });

package/graphql/plugins/localModelPlugin.js CHANGED Viewed

@@ -1,20 +1,69 @@
 // localModelPlugin.js
 import ModelPlugin from './modelPlugin.js';
 import { execFileSync } from 'child_process';
+import { encode } from 'gpt-3-encoder';
 class LocalModelPlugin extends ModelPlugin {
     constructor(config, pathway) {
         super(config, pathway);
     }
-    async execute(text, parameters, prompt, pathwayResolver) {
-        const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
+    // if the input starts with a chatML response, just return that
+    filterFirstResponse(inputString) {
+        const regex = /^(.*?)(?=\n<\|im_end\|>|$)/;
+        const match = inputString.match(regex);
+        if (match) {
+            const firstAssistantResponse = match[1];
+            return firstAssistantResponse;
+        } else {
+            return inputString;
+        }
+    }
+    getRequestParameters(text, parameters, prompt) {
+        let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
+        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        if (modelPromptMessages) {
+            const minMsg = [{ role: "system", content: "" }];
+            const addAssistantTokens = encode(this.messagesToChatML(minMsg, true).replace(this.messagesToChatML(minMsg, false), '')).length;
+            const requestMessages = this.truncateMessagesToTargetLength(modelPromptMessages, (modelTargetTokenLength - addAssistantTokens));
+            modelPromptText = this.messagesToChatML(requestMessages);
+            tokenLength = encode(modelPromptText).length;
+        }
+        if (tokenLength > modelTargetTokenLength) {
+            throw new Error(`Input is too long at ${tokenLength} tokens. The target token length for this pathway is ${modelTargetTokenLength} tokens because the response is expected to take up the rest of the ${this.getModelMaxTokenLength()} tokens that the model can handle. You must reduce the size of the prompt to continue.`);
+        }
+        const max_tokens = this.getModelMaxTokenLength() - tokenLength;
+        return {
+            prompt: modelPromptText,
+            max_tokens: max_tokens,
+            temperature: this.temperature ?? 0.7,
+        };
+    }
+    async execute(text, parameters, prompt, _pathwayResolver) {
+        const requestParameters = this.getRequestParameters(text, parameters, prompt);
+        const { executablePath, args } = this.model;
+        args.push("--prompt", requestParameters.prompt);
+        //args.push("--max-tokens", requestParameters.max_tokens);
+        //args.push("--temperature", requestParameters.temperature);
         try {
-            const result = execFileSync(executablePath, [text], { encoding: 'utf8' });
-            return result;
+            console.log(`\x1b[36mRunning local model:\x1b[0m`, executablePath, args);
+            const result = execFileSync(executablePath, args, { encoding: 'utf8' });
+            // Remove only the first occurrence of requestParameters.prompt from the result
+            // Could have used regex here but then would need to escape the prompt
+            const parts = result.split(requestParameters.prompt, 2);
+            const modifiedResult = parts[0] + parts[1];
+            console.log(`\x1b[36mResult:\x1b[0m`, modifiedResult);
+            return this.filterFirstResponse(modifiedResult);
         } catch (error) {
-            console.error('Error running local model:', error);
+            console.error(`\x1b[31mError running local model:\x1b[0m`, error);
             throw error;
         }
     }

package/graphql/plugins/modelPlugin.js CHANGED Viewed

@@ -40,7 +40,7 @@ class ModelPlugin {
         this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
     }
-    truncateMessagesToTargetLength = (messages, targetTokenLength) => {
+    truncateMessagesToTargetLength(messages, targetTokenLength) {
         // Calculate the token length of each message
         const tokenLengths = messages.map((message) => ({
             message,
@@ -97,7 +97,7 @@ class ModelPlugin {
         // Return the modified messages array
         return tokenLengths.map(({ message }) => message);
-    };
+    }
     //convert a messages array to a simple chatML format
     messagesToChatML(messages, addAssistant = true) {

package/graphql/prompt.js CHANGED Viewed

@@ -26,6 +26,7 @@ function promptContains(variable, prompt) {
     // if it's an array, it's the messages format
     if (Array.isArray(prompt)) {
       prompt.forEach(p => {
+        // eslint-disable-next-line no-cond-assign
         while (match = p.content && regexp.exec(p.content)) {
           matches.push(match[1]);
         }

package/graphql/resolver.js CHANGED Viewed

@@ -26,12 +26,12 @@ const rootResolver = async (parent, args, contextValue, info) => {
 }
 // This resolver is used by the root resolver to process the request
-const resolver = async (parent, args, contextValue, info) => {
+const resolver = async (parent, args, contextValue, _info) => {
     const { pathwayResolver } = contextValue;
     return await pathwayResolver.resolve(args);
 }
-const cancelRequestResolver = (parent, args, contextValue, info) => {
+const cancelRequestResolver = (parent, args, contextValue, _info) => {
     const { requestId } = args;
     const { requestState } = contextValue;
     requestState[requestId] = { canceled: true };

package/graphql/subscriptions.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { requestState } from './requestState.js';
 const subscriptions = {
     requestProgress: {
         subscribe: withFilter(
-            (_, args, __, info) => {
+            (_, args, __, _info) => {
                 const { requestIds } = args;
                 for (const requestId of requestIds) {
                     if (!requestState[requestId]) {

package/lib/fileChunker.js CHANGED Viewed

@@ -42,10 +42,6 @@ const generateUniqueFolderName = () => {
     return uniqueOutputPath;
 }
-const generateUniqueTempFileName = () => {
-    return path.join(os.tmpdir(), uuidv4());
-}
 async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
     try {
         const metadata = await ffmpegProbe(inputPath);
@@ -146,15 +142,6 @@ const processYoutubeUrl = async (url) => {
     return outputFileName;
 }
-function deleteFile(filePath) {
-    try {
-        fs.unlinkSync(filePath);
-        console.log(`File ${filePath} cleaned successfully.`);
-    } catch (error) {
-        console.error(`Error deleting file ${filePath}:`, error);
-    }
-}
 export {
     splitMediaFile, deleteTempPath, processYoutubeUrl, isValidYoutubeUrl
 };

package/lib/request.js CHANGED Viewed

@@ -64,7 +64,6 @@ const postWithMonitor = async (model, url, data, axiosConfigObj) => {
 const MAX_RETRY = 10;
 const postRequest = async ({ url, data, params, headers, cache }, model) => {
-    let retry = 0;
     const errors = []
     for (let i = 0; i < MAX_RETRY; i++) {
         try {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.0.1",
+  "version": "1.0.2",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "repository": {
     "type": "git",
@@ -42,6 +42,7 @@
     "compromise": "^14.8.1",
     "compromise-paragraphs": "^0.1.0",
     "convict": "^6.2.3",
+    "express": "^4.18.2",
     "fluent-ffmpeg": "^2.1.2",
     "form-data": "^4.0.0",
     "gpt-3-encoder": "^1.1.4",
@@ -51,12 +52,15 @@
     "handlebars": "^4.7.7",
     "keyv": "^4.5.2",
     "langchain": "^0.0.47",
+    "uuid": "^9.0.0",
     "ws": "^8.12.0",
     "ytdl-core": "^4.11.2"
   },
   "devDependencies": {
     "ava": "^5.2.0",
-    "dotenv": "^16.0.3"
+    "dotenv": "^16.0.3",
+    "eslint": "^8.38.0",
+    "eslint-plugin-import": "^2.27.5"
   },
   "publishConfig": {
     "access": "restricted"

package/pathways/lc_test.mjs CHANGED Viewed

@@ -3,20 +3,24 @@
 // Import required modules
 import { OpenAI } from "langchain/llms";
-import { PromptTemplate } from "langchain/prompts";
-import { LLMChain, ConversationChain } from "langchain/chains";
+//import { PromptTemplate } from "langchain/prompts";
+//import { LLMChain, ConversationChain } from "langchain/chains";
 import { initializeAgentExecutor } from "langchain/agents";
 import { SerpAPI, Calculator } from "langchain/tools";
-import { BufferMemory } from "langchain/memory";
+//import { BufferMemory } from "langchain/memory";
 export default {
     // Agent test case
-    resolver: async (parent, args, contextValue, info) => {
+    resolver: async (parent, args, contextValue, _info) => {
         const { config } = contextValue;
+        const env = config.getEnv();
+        // example of reading from a predefined config variable
         const openAIApiKey = config.get('openaiApiKey');
-        const serpApiKey = config.get('serpApiKey');
+        // example of reading straight from environment
+        const serpApiKey = env.SERPAPI_API_KEY;
         const model = new OpenAI({ openAIApiKey: openAIApiKey, temperature: 0 });
         const tools = [new SerpAPI( serpApiKey ), new Calculator()];

package/pathways/summary.js CHANGED Viewed

@@ -16,7 +16,7 @@ export default {
     },
     // Custom resolver to generate summaries by reprompting if they are too long or too short.
-    resolver: async (parent, args, contextValue, info) => {
+    resolver: async (parent, args, contextValue, _info) => {
         const { config, pathway, requestState } = contextValue;
         const originalTargetLength = args.targetLength;