@aj-archipelago/cortex 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +30 -0
- package/.eslintrc +31 -0
- package/README.md +13 -1
- package/config/default.example.json +70 -0
- package/config.js +5 -6
- package/graphql/chunker.js +1 -1
- package/graphql/graphql.js +1 -1
- package/graphql/parser.js +7 -0
- package/graphql/pathwayPrompter.js +8 -19
- package/graphql/pathwayResolver.js +10 -10
- package/graphql/pathwayResponseParser.js +13 -4
- package/graphql/plugins/localModelPlugin.js +54 -5
- package/graphql/plugins/modelPlugin.js +29 -20
- package/graphql/plugins/openAiCompletionPlugin.js +29 -12
- package/graphql/plugins/openAiWhisperPlugin.js +112 -19
- package/graphql/prompt.js +1 -0
- package/graphql/resolver.js +2 -2
- package/graphql/subscriptions.js +1 -1
- package/helper_apps/MediaFileChunker/blobHandler.js +150 -0
- package/helper_apps/MediaFileChunker/fileChunker.js +123 -0
- package/helper_apps/MediaFileChunker/function.json +20 -0
- package/helper_apps/MediaFileChunker/helper.js +33 -0
- package/helper_apps/MediaFileChunker/index.js +116 -0
- package/helper_apps/MediaFileChunker/localFileHandler.js +36 -0
- package/helper_apps/MediaFileChunker/package-lock.json +2919 -0
- package/helper_apps/MediaFileChunker/package.json +22 -0
- package/helper_apps/MediaFileChunker/redis.js +32 -0
- package/helper_apps/MediaFileChunker/start.js +27 -0
- package/lib/handleBars.js +26 -0
- package/lib/pathwayTools.js +15 -0
- package/lib/redisSubscription.js +51 -0
- package/lib/request.js +4 -5
- package/package.json +9 -6
- package/pathways/lc_test.mjs +9 -5
- package/pathways/summary.js +1 -1
- package/pathways/transcribe.js +2 -1
- package/tests/config.test.js +69 -0
- package/tests/handleBars.test.js +43 -0
- package/tests/mocks.js +39 -0
- package/tests/modelPlugin.test.js +129 -0
- package/tests/pathwayResolver.test.js +77 -0
- package/tests/truncateMessages.test.js +99 -0
- package/lib/fileChunker.js +0 -160
package/.eslintignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Ignore build artifacts
|
|
2
|
+
/dist
|
|
3
|
+
/build
|
|
4
|
+
|
|
5
|
+
# Ignore node_modules
|
|
6
|
+
/node_modules
|
|
7
|
+
|
|
8
|
+
# Ignore log files
|
|
9
|
+
*.log
|
|
10
|
+
|
|
11
|
+
# Ignore any config files
|
|
12
|
+
.env
|
|
13
|
+
.env.*
|
|
14
|
+
|
|
15
|
+
# Ignore coverage reports
|
|
16
|
+
/coverage
|
|
17
|
+
|
|
18
|
+
# Ignore documentation
|
|
19
|
+
/docs
|
|
20
|
+
|
|
21
|
+
# Ignore any generated or bundled files
|
|
22
|
+
*.min.js
|
|
23
|
+
*.bundle.js
|
|
24
|
+
|
|
25
|
+
# Ignore any files generated by your IDE or text editor
|
|
26
|
+
.idea/
|
|
27
|
+
.vscode/
|
|
28
|
+
*.sublime-*
|
|
29
|
+
*.iml
|
|
30
|
+
*.swp
|
package/.eslintrc
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"env": {
|
|
3
|
+
"browser": true,
|
|
4
|
+
"es2021": true,
|
|
5
|
+
"node": true
|
|
6
|
+
},
|
|
7
|
+
"extends": [
|
|
8
|
+
"eslint:recommended"
|
|
9
|
+
],
|
|
10
|
+
"parserOptions": {
|
|
11
|
+
"ecmaVersion": "latest",
|
|
12
|
+
"sourceType": "module"
|
|
13
|
+
},
|
|
14
|
+
"plugins": [
|
|
15
|
+
"import"
|
|
16
|
+
],
|
|
17
|
+
"rules": {
|
|
18
|
+
"import/no-unresolved": "error",
|
|
19
|
+
"import/no-extraneous-dependencies": ["error", {"devDependencies": true}],
|
|
20
|
+
"no-unused-vars": ["error", { "argsIgnorePattern": "^_" }]
|
|
21
|
+
},
|
|
22
|
+
"settings": {
|
|
23
|
+
"import/resolver": {
|
|
24
|
+
"node": {
|
|
25
|
+
"extensions": [".js"],
|
|
26
|
+
"moduleDirectory": ["node_modules", "src"]
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"import/core-modules": ["ava"]
|
|
30
|
+
}
|
|
31
|
+
}
|
package/README.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
Cortex simplifies and accelerates the process of creating applications that harness the power of modern AI models like chatGPT and GPT-4 by providing a structured interface (GraphQL or REST) to a powerful prompt execution environment. This enables complex augmented prompting and abstracts away most of the complexity of managing model connections like chunking input, rate limiting, formatting output, caching, and handling errors.
|
|
3
3
|
## Why build Cortex?
|
|
4
4
|
Modern AI models are transformational, but a number of complexities emerge when developers start using them to deliver application-ready functions. Most models require precisely formatted, carefully engineered and sequenced prompts to produce consistent results, and the responses are typically largely unstructured text without validation or formatting. Additionally, these models are evolving rapidly, are typically costly and slow to query and implement hard request size and rate restrictions that need to be carefully navigated for optimum throughput. Cortex offers a solution to these problems and provides a simple and extensible package for interacting with NL AI models.
|
|
5
|
+
|
|
6
|
+
## Okay, but what can I really do with this thing?
|
|
7
|
+
Just about anything! It's kind of an LLM swiss army knife. Here are some ideas:
|
|
8
|
+
* Create custom chat agents with memory and personalization and then expose them through a bunch of different UIs (custom chat portals, Slack, Microsoft Teams, etc. - anything that can be extended and speak to a REST or GraphQL endpoint)
|
|
9
|
+
* Spin up LLM powered automatons with their prompting logic and AI API handling logic all centrally encapsulated.
|
|
10
|
+
* Make LLM chains and agents from LangChain.js available via scalable REST or GraphQL endpoints.
|
|
11
|
+
* Put a REST or GraphQL front end on your locally-run models (e.g. llama.cpp) and use them in concert with other tools.
|
|
12
|
+
* Create modular custom coding assistants (code generation, code reviews, test writing, AI pair programming) and easily integrate them with your existing editing tools.
|
|
13
|
+
* Create powerful AI editing tools (copy editing, paraphrasing, summarization, etc.) for your company and then integrate them with your existing workflow tools without having to build all the LLM-handling logic into those tools.
|
|
14
|
+
* Create cached endpoints for functions with repeated calls so the results return instantly and you don't run up LLM token charges.
|
|
15
|
+
* Route all of your company's LLM access through a single API layer to optimize and monitor usage and centrally control rate limiting and which models are being used.
|
|
16
|
+
|
|
5
17
|
## Features
|
|
6
18
|
|
|
7
19
|
* Simple architecture to build custom functional endpoints (called `pathways`), that implement common NL AI tasks. Default pathways include chat, summarization, translation, paraphrasing, completion, spelling and grammar correction, entity extraction, sentiment analysis, and bias analysis.
|
|
@@ -24,7 +36,7 @@ npm install
|
|
|
24
36
|
export OPENAI_API_KEY=<your key>
|
|
25
37
|
npm start
|
|
26
38
|
```
|
|
27
|
-
Yup, that's it, at least in the simplest possible case. That will get you access to all of the built in pathways.
|
|
39
|
+
Yup, that's it, at least in the simplest possible case. That will get you access to all of the built in pathways. If you prefer to use npm instead instead of cloning, we have an npm package too: [@aj-archipelago/cortex](https://www.npmjs.com/package/@aj-archipelago/cortex)
|
|
28
40
|
## Connecting Applications to Cortex
|
|
29
41
|
Cortex speaks GraphQL and by default it enables the GraphQL playground. If you're just using default options, that's at [http://localhost:4000/graphql](http://localhost:4000/graphql). From there you can begin making requests and test out the pathways (listed under Query) to your heart's content. If GraphQL isn't your thing or if you have a client that would rather have REST that's fine - Cortex speaks REST as well.
|
|
30
42
|
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"defaultModelName": "oai-td3",
|
|
3
|
+
"models": {
|
|
4
|
+
"azure-translate": {
|
|
5
|
+
"type": "AZURE-TRANSLATE",
|
|
6
|
+
"url": "https://api.cognitive.microsofttranslator.com/translate?api-version=3.0",
|
|
7
|
+
"headers": {
|
|
8
|
+
"Ocp-Apim-Subscription-Key": "{{ARCHIPELAGO_TRANSLATE_KEY}}",
|
|
9
|
+
"Ocp-Apim-Subscription-Region": "eastus",
|
|
10
|
+
"Content-Type": "application/json"
|
|
11
|
+
},
|
|
12
|
+
"requestsPerSecond": 10,
|
|
13
|
+
"maxTokenLength": 2000
|
|
14
|
+
},
|
|
15
|
+
"oai-td3": {
|
|
16
|
+
"type": "OPENAI-COMPLETION",
|
|
17
|
+
"url": "https://api.openai.com/v1/completions",
|
|
18
|
+
"headers": {
|
|
19
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
20
|
+
"Content-Type": "application/json"
|
|
21
|
+
},
|
|
22
|
+
"params": {
|
|
23
|
+
"model": "text-davinci-003"
|
|
24
|
+
},
|
|
25
|
+
"requestsPerSecond": 10,
|
|
26
|
+
"maxTokenLength": 4096
|
|
27
|
+
},
|
|
28
|
+
"oai-gpturbo": {
|
|
29
|
+
"type": "OPENAI-CHAT",
|
|
30
|
+
"url": "https://api.openai.com/v1/chat/completions",
|
|
31
|
+
"headers": {
|
|
32
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
33
|
+
"Content-Type": "application/json"
|
|
34
|
+
},
|
|
35
|
+
"params": {
|
|
36
|
+
"model": "gpt-3.5-turbo"
|
|
37
|
+
},
|
|
38
|
+
"requestsPerSecond": 10,
|
|
39
|
+
"maxTokenLength": 8192
|
|
40
|
+
},
|
|
41
|
+
"oai-gpt4": {
|
|
42
|
+
"type": "OPENAI-CHAT",
|
|
43
|
+
"url": "https://api.openai.com/v1/chat/completions",
|
|
44
|
+
"headers": {
|
|
45
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
46
|
+
"Content-Type": "application/json"
|
|
47
|
+
},
|
|
48
|
+
"params": {
|
|
49
|
+
"model": "gpt-4"
|
|
50
|
+
},
|
|
51
|
+
"requestsPerSecond": 10,
|
|
52
|
+
"maxTokenLength": 8192
|
|
53
|
+
},
|
|
54
|
+
"local-llama13B": {
|
|
55
|
+
"type": "LOCAL-CPP-MODEL",
|
|
56
|
+
"executablePath": "../llm/llama.cpp/main",
|
|
57
|
+
"args": [
|
|
58
|
+
"-m", "../llm/llama.cpp/models/13B/ggml-model-q4_0.bin",
|
|
59
|
+
"--repeat_penalty", "1.0",
|
|
60
|
+
"--keep", "0",
|
|
61
|
+
"-t", "8",
|
|
62
|
+
"--mlock"
|
|
63
|
+
],
|
|
64
|
+
"requestsPerSecond": 10,
|
|
65
|
+
"maxTokenLength": 1024
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"enableCache": false,
|
|
69
|
+
"enableRestEndpoints": false
|
|
70
|
+
}
|
package/config.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
3
3
|
import convict from 'convict';
|
|
4
|
-
import
|
|
4
|
+
import HandleBars from './lib/handleBars.js';
|
|
5
5
|
import fs from 'fs';
|
|
6
6
|
|
|
7
7
|
// Schema for config
|
|
@@ -110,11 +110,10 @@ var config = convict({
|
|
|
110
110
|
default: null,
|
|
111
111
|
env: 'CORTEX_CONFIG_FILE'
|
|
112
112
|
},
|
|
113
|
-
|
|
113
|
+
whisperMediaApiUrl: {
|
|
114
114
|
format: String,
|
|
115
|
-
default: null,
|
|
116
|
-
env: '
|
|
117
|
-
sensitive: true
|
|
115
|
+
default: 'null',
|
|
116
|
+
env: 'WHISPER_MEDIA_API_URL'
|
|
118
117
|
},
|
|
119
118
|
});
|
|
120
119
|
|
|
@@ -173,7 +172,7 @@ const buildModels = (config) => {
|
|
|
173
172
|
|
|
174
173
|
for (const [key, model] of Object.entries(models)) {
|
|
175
174
|
// Compile handlebars templates for models
|
|
176
|
-
models[key] = JSON.parse(
|
|
175
|
+
models[key] = JSON.parse(HandleBars.compile(JSON.stringify(model))({ ...config.getEnv(), ...config.getProperties() }))
|
|
177
176
|
}
|
|
178
177
|
|
|
179
178
|
// Add constructed models to config
|
package/graphql/chunker.js
CHANGED
|
@@ -43,7 +43,7 @@ const getSemanticChunks = (text, chunkSize) => {
|
|
|
43
43
|
};
|
|
44
44
|
|
|
45
45
|
const breakByParagraphs = (str) => breakByRegex(str, /[\r\n]+/, true);
|
|
46
|
-
const breakBySentences = (str) => breakByRegex(str, /(?<=[
|
|
46
|
+
const breakBySentences = (str) => breakByRegex(str, /(?<=[.。؟!?!\n])\s+/, true);
|
|
47
47
|
const breakByWords = (str) => breakByRegex(str, /(\s,;:.+)/);
|
|
48
48
|
|
|
49
49
|
const createChunks = (tokens) => {
|
package/graphql/graphql.js
CHANGED
|
@@ -48,7 +48,7 @@ const buildRestEndpoints = (pathways, app, server, config) => {
|
|
|
48
48
|
|
|
49
49
|
app.post(`/rest/${name}`, async (req, res) => {
|
|
50
50
|
const variables = fieldVariableDefs.reduce((acc, variableDef) => {
|
|
51
|
-
if (
|
|
51
|
+
if (Object.prototype.hasOwnProperty.call(req.body, variableDef.name)) {
|
|
52
52
|
acc[variableDef.name] = req.body[variableDef.name];
|
|
53
53
|
}
|
|
54
54
|
return acc;
|
package/graphql/parser.js
CHANGED
|
@@ -6,6 +6,7 @@ const regexParser = (text, regex) => {
|
|
|
6
6
|
// parse numbered list text format into list
|
|
7
7
|
// this supports most common numbered list returns like "1.", "1)", "1-"
|
|
8
8
|
const parseNumberedList = (str) => {
|
|
9
|
+
// eslint-disable-next-line no-useless-escape
|
|
9
10
|
return regexParser(str, /^\s*[\[\{\(]*\d+[\s.=\-:,;\]\)\}]/gm);
|
|
10
11
|
}
|
|
11
12
|
|
|
@@ -31,8 +32,14 @@ const parseNumberedObjectList = (text, format) => {
|
|
|
31
32
|
return result;
|
|
32
33
|
}
|
|
33
34
|
|
|
35
|
+
// parse a comma-separated list text format into list
|
|
36
|
+
const parseCommaSeparatedList = (str) => {
|
|
37
|
+
return str.split(',').map(s => s.trim()).filter(s => s.length);
|
|
38
|
+
}
|
|
39
|
+
|
|
34
40
|
export {
|
|
35
41
|
regexParser,
|
|
36
42
|
parseNumberedList,
|
|
37
43
|
parseNumberedObjectList,
|
|
44
|
+
parseCommaSeparatedList,
|
|
38
45
|
};
|
|
@@ -1,23 +1,9 @@
|
|
|
1
1
|
// PathwayPrompter.js
|
|
2
|
-
import OpenAIChatPlugin from './plugins/
|
|
3
|
-
import OpenAICompletionPlugin from './plugins/
|
|
2
|
+
import OpenAIChatPlugin from './plugins/openAiChatPlugin.js';
|
|
3
|
+
import OpenAICompletionPlugin from './plugins/openAiCompletionPlugin.js';
|
|
4
4
|
import AzureTranslatePlugin from './plugins/azureTranslatePlugin.js';
|
|
5
5
|
import OpenAIWhisperPlugin from './plugins/openAiWhisperPlugin.js';
|
|
6
|
-
import
|
|
7
|
-
|
|
8
|
-
// register functions that can be called directly in the prompt markdown
|
|
9
|
-
handlebars.registerHelper('stripHTML', function (value) {
|
|
10
|
-
return value.replace(/<[^>]*>/g, '');
|
|
11
|
-
});
|
|
12
|
-
|
|
13
|
-
handlebars.registerHelper('now', function () {
|
|
14
|
-
return new Date().toISOString();
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
handlebars.registerHelper('toJSON', function (object) {
|
|
18
|
-
return JSON.stringify(object);
|
|
19
|
-
});
|
|
20
|
-
|
|
6
|
+
import LocalModelPlugin from './plugins/localModelPlugin.js';
|
|
21
7
|
|
|
22
8
|
class PathwayPrompter {
|
|
23
9
|
constructor({ config, pathway }) {
|
|
@@ -26,7 +12,7 @@ class PathwayPrompter {
|
|
|
26
12
|
const model = config.get('models')[modelName];
|
|
27
13
|
|
|
28
14
|
if (!model) {
|
|
29
|
-
throw new
|
|
15
|
+
throw new Error(`Model ${modelName} not found in config`);
|
|
30
16
|
}
|
|
31
17
|
|
|
32
18
|
let plugin;
|
|
@@ -44,8 +30,11 @@ class PathwayPrompter {
|
|
|
44
30
|
case 'OPENAI_WHISPER':
|
|
45
31
|
plugin = new OpenAIWhisperPlugin(config, pathway);
|
|
46
32
|
break;
|
|
33
|
+
case 'LOCAL-CPP-MODEL':
|
|
34
|
+
plugin = new LocalModelPlugin(config, pathway);
|
|
35
|
+
break;
|
|
47
36
|
default:
|
|
48
|
-
throw new
|
|
37
|
+
throw new Error(`Unsupported model type: ${model.type}`);
|
|
49
38
|
}
|
|
50
39
|
|
|
51
40
|
this.plugin = plugin;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { PathwayPrompter } from './pathwayPrompter.js';
|
|
2
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
2
3
|
import { v4 as uuidv4 } from 'uuid';
|
|
3
4
|
import pubsub from './pubsub.js';
|
|
4
5
|
import { encode } from 'gpt-3-encoder';
|
|
@@ -7,13 +8,7 @@ import { PathwayResponseParser } from './pathwayResponseParser.js';
|
|
|
7
8
|
import { Prompt } from './prompt.js';
|
|
8
9
|
import { getv, setv } from '../lib/keyValueStorageClient.js';
|
|
9
10
|
import { requestState } from './requestState.js';
|
|
10
|
-
|
|
11
|
-
const MAX_PREVIOUS_RESULT_TOKEN_LENGTH = 1000;
|
|
12
|
-
|
|
13
|
-
const callPathway = async (config, pathwayName, args, requestState, { text, ...parameters }) => {
|
|
14
|
-
const pathwayResolver = new PathwayResolver({ config, pathway: config.get(`pathways.${pathwayName}`), args, requestState });
|
|
15
|
-
return await pathwayResolver.resolve({ text, ...parameters });
|
|
16
|
-
}
|
|
11
|
+
import { callPathway } from '../lib/pathwayTools.js';
|
|
17
12
|
|
|
18
13
|
class PathwayResolver {
|
|
19
14
|
constructor({ config, pathway, args }) {
|
|
@@ -141,6 +136,12 @@ class PathwayResolver {
|
|
|
141
136
|
return this.responseParser.parse(data);
|
|
142
137
|
}
|
|
143
138
|
|
|
139
|
+
// Add a warning and log it
|
|
140
|
+
logWarning(warning) {
|
|
141
|
+
this.warnings.push(warning);
|
|
142
|
+
console.warn(warning);
|
|
143
|
+
}
|
|
144
|
+
|
|
144
145
|
// Here we choose how to handle long input - either summarize or chunk
|
|
145
146
|
processInputText(text) {
|
|
146
147
|
let chunkTokenLength = 0;
|
|
@@ -153,8 +154,7 @@ class PathwayResolver {
|
|
|
153
154
|
if (!this.useInputChunking || encoded.length <= chunkTokenLength) { // no chunking, return as is
|
|
154
155
|
if (encoded.length > 0 && encoded.length >= chunkTokenLength) {
|
|
155
156
|
const warnText = `Truncating long input text. Text length: ${text.length}`;
|
|
156
|
-
this.
|
|
157
|
-
console.warn(warnText);
|
|
157
|
+
this.logWarning(warnText);
|
|
158
158
|
text = this.truncate(text, chunkTokenLength);
|
|
159
159
|
}
|
|
160
160
|
return [text];
|
|
@@ -173,7 +173,7 @@ class PathwayResolver {
|
|
|
173
173
|
|
|
174
174
|
async summarizeIfEnabled({ text, ...parameters }) {
|
|
175
175
|
if (this.pathway.useInputSummarization) {
|
|
176
|
-
return await callPathway(this.config, 'summary', this.args,
|
|
176
|
+
return await callPathway(this.config, 'summary', { ...this.args, ...parameters, targetLength: 0});
|
|
177
177
|
}
|
|
178
178
|
return text;
|
|
179
179
|
}
|
|
@@ -1,20 +1,29 @@
|
|
|
1
|
-
import { parseNumberedList, parseNumberedObjectList } from './parser.js';
|
|
1
|
+
import { parseNumberedList, parseNumberedObjectList, parseCommaSeparatedList } from './parser.js';
|
|
2
2
|
|
|
3
3
|
class PathwayResponseParser {
|
|
4
4
|
constructor(pathway) {
|
|
5
5
|
this.pathway = pathway;
|
|
6
6
|
}
|
|
7
7
|
|
|
8
|
+
isCommaSeparatedList(data) {
|
|
9
|
+
const commaSeparatedPattern = /^([^,\n]+,)+[^,\n]+$/;
|
|
10
|
+
return commaSeparatedPattern.test(data.trim());
|
|
11
|
+
}
|
|
12
|
+
|
|
8
13
|
parse(data) {
|
|
9
14
|
if (this.pathway.parser) {
|
|
10
15
|
return this.pathway.parser(data);
|
|
11
16
|
}
|
|
12
17
|
|
|
13
18
|
if (this.pathway.list) {
|
|
14
|
-
if (this.
|
|
15
|
-
return
|
|
19
|
+
if (this.isCommaSeparatedList(data)) {
|
|
20
|
+
return parseCommaSeparatedList(data);
|
|
21
|
+
} else {
|
|
22
|
+
if (this.pathway.format) {
|
|
23
|
+
return parseNumberedObjectList(data, this.pathway.format);
|
|
24
|
+
}
|
|
25
|
+
return parseNumberedList(data);
|
|
16
26
|
}
|
|
17
|
-
return parseNumberedList(data)
|
|
18
27
|
}
|
|
19
28
|
|
|
20
29
|
return data;
|
|
@@ -1,20 +1,69 @@
|
|
|
1
1
|
// localModelPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
3
|
import { execFileSync } from 'child_process';
|
|
4
|
+
import { encode } from 'gpt-3-encoder';
|
|
4
5
|
|
|
5
6
|
class LocalModelPlugin extends ModelPlugin {
|
|
6
7
|
constructor(config, pathway) {
|
|
7
8
|
super(config, pathway);
|
|
8
9
|
}
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
// if the input starts with a chatML response, just return that
|
|
12
|
+
filterFirstResponse(inputString) {
|
|
13
|
+
const regex = /^(.*?)(?=\n<\|im_end\|>|$)/;
|
|
14
|
+
const match = inputString.match(regex);
|
|
15
|
+
|
|
16
|
+
if (match) {
|
|
17
|
+
const firstAssistantResponse = match[1];
|
|
18
|
+
return firstAssistantResponse;
|
|
19
|
+
} else {
|
|
20
|
+
return inputString;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
getRequestParameters(text, parameters, prompt) {
|
|
25
|
+
let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
|
|
26
|
+
const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
|
|
27
|
+
|
|
28
|
+
if (modelPromptMessages) {
|
|
29
|
+
const minMsg = [{ role: "system", content: "" }];
|
|
30
|
+
const addAssistantTokens = encode(this.messagesToChatML(minMsg, true).replace(this.messagesToChatML(minMsg, false), '')).length;
|
|
31
|
+
const requestMessages = this.truncateMessagesToTargetLength(modelPromptMessages, (modelTargetTokenLength - addAssistantTokens));
|
|
32
|
+
modelPromptText = this.messagesToChatML(requestMessages);
|
|
33
|
+
tokenLength = encode(modelPromptText).length;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (tokenLength > modelTargetTokenLength) {
|
|
37
|
+
throw new Error(`Input is too long at ${tokenLength} tokens. The target token length for this pathway is ${modelTargetTokenLength} tokens because the response is expected to take up the rest of the ${this.getModelMaxTokenLength()} tokens that the model can handle. You must reduce the size of the prompt to continue.`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const max_tokens = this.getModelMaxTokenLength() - tokenLength;
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
prompt: modelPromptText,
|
|
44
|
+
max_tokens: max_tokens,
|
|
45
|
+
temperature: this.temperature ?? 0.7,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async execute(text, parameters, prompt, _pathwayResolver) {
|
|
50
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
51
|
+
const { executablePath, args } = this.model;
|
|
52
|
+
args.push("--prompt", requestParameters.prompt);
|
|
53
|
+
//args.push("--max-tokens", requestParameters.max_tokens);
|
|
54
|
+
//args.push("--temperature", requestParameters.temperature);
|
|
12
55
|
|
|
13
56
|
try {
|
|
14
|
-
|
|
15
|
-
|
|
57
|
+
console.log(`\x1b[36mRunning local model:\x1b[0m`, executablePath, args);
|
|
58
|
+
const result = execFileSync(executablePath, args, { encoding: 'utf8' });
|
|
59
|
+
// Remove only the first occurrence of requestParameters.prompt from the result
|
|
60
|
+
// Could have used regex here but then would need to escape the prompt
|
|
61
|
+
const parts = result.split(requestParameters.prompt, 2);
|
|
62
|
+
const modifiedResult = parts[0] + parts[1];
|
|
63
|
+
console.log(`\x1b[36mResult:\x1b[0m`, modifiedResult);
|
|
64
|
+
return this.filterFirstResponse(modifiedResult);
|
|
16
65
|
} catch (error) {
|
|
17
|
-
console.error(
|
|
66
|
+
console.error(`\x1b[31mError running local model:\x1b[0m`, error);
|
|
18
67
|
throw error;
|
|
19
68
|
}
|
|
20
69
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// ModelPlugin.js
|
|
2
|
-
import
|
|
2
|
+
import HandleBars from '../../lib/handleBars.js';
|
|
3
3
|
|
|
4
4
|
import { request } from '../../lib/request.js';
|
|
5
5
|
import { encode } from 'gpt-3-encoder';
|
|
@@ -40,7 +40,7 @@ class ModelPlugin {
|
|
|
40
40
|
this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
truncateMessagesToTargetLength
|
|
43
|
+
truncateMessagesToTargetLength(messages, targetTokenLength) {
|
|
44
44
|
// Calculate the token length of each message
|
|
45
45
|
const tokenLengths = messages.map((message) => ({
|
|
46
46
|
message,
|
|
@@ -58,7 +58,7 @@ class ModelPlugin {
|
|
|
58
58
|
|
|
59
59
|
// Remove and/or truncate messages until the target token length is reached
|
|
60
60
|
let index = 0;
|
|
61
|
-
while (totalTokenLength > targetTokenLength) {
|
|
61
|
+
while ((totalTokenLength > targetTokenLength) && (index < tokenLengths.length)) {
|
|
62
62
|
const message = tokenLengths[index].message;
|
|
63
63
|
|
|
64
64
|
// Skip system messages
|
|
@@ -79,25 +79,34 @@ class ModelPlugin {
|
|
|
79
79
|
const otherMessageTokens = totalTokenLength - currentTokenLength;
|
|
80
80
|
const tokensToKeep = targetTokenLength - (otherMessageTokens + emptyContentLength);
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
82
|
+
if (tokensToKeep <= 0) {
|
|
83
|
+
// If the message needs to be empty to make the target, remove it entirely
|
|
84
|
+
totalTokenLength -= currentTokenLength;
|
|
85
|
+
tokenLengths.splice(index, 1);
|
|
86
|
+
} else {
|
|
87
|
+
// Otherwise, update the message and token length
|
|
88
|
+
const truncatedContent = getFirstNToken(message.content, tokensToKeep);
|
|
89
|
+
const truncatedMessage = { ...message, content: truncatedContent };
|
|
90
|
+
|
|
91
|
+
tokenLengths[index] = {
|
|
92
|
+
message: truncatedMessage,
|
|
93
|
+
tokenLength: encode(this.messagesToChatML([ truncatedMessage ], false)).length
|
|
94
|
+
}
|
|
89
95
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
96
|
+
// calculate the length again to keep us honest
|
|
97
|
+
totalTokenLength = tokenLengths.reduce(
|
|
98
|
+
(sum, { tokenLength }) => sum + tokenLength,
|
|
99
|
+
0
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
index++;
|
|
103
|
+
}
|
|
95
104
|
}
|
|
96
105
|
}
|
|
97
106
|
|
|
98
107
|
// Return the modified messages array
|
|
99
108
|
return tokenLengths.map(({ message }) => message);
|
|
100
|
-
}
|
|
109
|
+
}
|
|
101
110
|
|
|
102
111
|
//convert a messages array to a simple chatML format
|
|
103
112
|
messagesToChatML(messages, addAssistant = true) {
|
|
@@ -118,7 +127,7 @@ class ModelPlugin {
|
|
|
118
127
|
getCompiledPrompt(text, parameters, prompt) {
|
|
119
128
|
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
120
129
|
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
121
|
-
const modelPromptText = modelPrompt.prompt ?
|
|
130
|
+
const modelPromptText = modelPrompt.prompt ? HandleBars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
122
131
|
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
123
132
|
const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
|
|
124
133
|
|
|
@@ -135,7 +144,7 @@ class ModelPlugin {
|
|
|
135
144
|
|
|
136
145
|
getPromptTokenRatio() {
|
|
137
146
|
// TODO: Is this the right order of precedence? inputParameters should maybe be second?
|
|
138
|
-
return this.promptParameters.inputParameters
|
|
147
|
+
return this.promptParameters.inputParameters?.tokenRatio ?? this.promptParameters.tokenRatio ?? DEFAULT_PROMPT_TOKEN_RATIO;
|
|
139
148
|
}
|
|
140
149
|
|
|
141
150
|
|
|
@@ -155,7 +164,7 @@ class ModelPlugin {
|
|
|
155
164
|
// First run handlebars compile on the pathway messages
|
|
156
165
|
const compiledMessages = modelPrompt.messages.map((message) => {
|
|
157
166
|
if (message.content) {
|
|
158
|
-
const compileText =
|
|
167
|
+
const compileText = HandleBars.compile(message.content);
|
|
159
168
|
return {
|
|
160
169
|
role: message.role,
|
|
161
170
|
content: compileText({ ...combinedParameters, text }),
|
|
@@ -184,7 +193,7 @@ class ModelPlugin {
|
|
|
184
193
|
}
|
|
185
194
|
|
|
186
195
|
requestUrl() {
|
|
187
|
-
const generateUrl =
|
|
196
|
+
const generateUrl = HandleBars.compile(this.model.url);
|
|
188
197
|
return generateUrl({ ...this.model, ...this.environmentVariables, ...this.config });
|
|
189
198
|
}
|
|
190
199
|
|
|
@@ -1,15 +1,27 @@
|
|
|
1
1
|
// OpenAICompletionPlugin.js
|
|
2
|
+
|
|
2
3
|
import ModelPlugin from './modelPlugin.js';
|
|
3
4
|
|
|
4
5
|
import { encode } from 'gpt-3-encoder';
|
|
5
6
|
|
|
7
|
+
// Helper function to truncate the prompt if it is too long
|
|
8
|
+
const truncatePromptIfNecessary = (text, textTokenCount, modelMaxTokenCount, targetTextTokenCount, pathwayResolver) => {
|
|
9
|
+
const maxAllowedTextTokenCount = textTokenCount + ((modelMaxTokenCount - targetTextTokenCount) * 0.5);
|
|
10
|
+
|
|
11
|
+
if (textTokenCount > maxAllowedTextTokenCount) {
|
|
12
|
+
pathwayResolver.logWarning(`Prompt is too long at ${textTokenCount} tokens (this target token length for this pathway is ${targetTextTokenCount} tokens because the response is expected to take up the rest of the model's max tokens (${modelMaxTokenCount}). Prompt will be truncated.`);
|
|
13
|
+
return pathwayResolver.truncate(text, maxAllowedTextTokenCount);
|
|
14
|
+
}
|
|
15
|
+
return text;
|
|
16
|
+
}
|
|
17
|
+
|
|
6
18
|
class OpenAICompletionPlugin extends ModelPlugin {
|
|
7
19
|
constructor(config, pathway) {
|
|
8
20
|
super(config, pathway);
|
|
9
21
|
}
|
|
10
22
|
|
|
11
23
|
// Set up parameters specific to the OpenAI Completion API
|
|
12
|
-
getRequestParameters(text, parameters, prompt) {
|
|
24
|
+
getRequestParameters(text, parameters, prompt, pathwayResolver) {
|
|
13
25
|
let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
|
|
14
26
|
const { stream } = parameters;
|
|
15
27
|
let modelPromptMessagesML = '';
|
|
@@ -23,12 +35,14 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
23
35
|
const requestMessages = this.truncateMessagesToTargetLength(modelPromptMessages, (modelTargetTokenLength - addAssistantTokens));
|
|
24
36
|
modelPromptMessagesML = this.messagesToChatML(requestMessages);
|
|
25
37
|
tokenLength = encode(modelPromptMessagesML).length;
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
38
|
+
|
|
39
|
+
modelPromptMessagesML = truncatePromptIfNecessary(modelPromptMessagesML, tokenLength, this.getModelMaxTokenLength(), modelTargetTokenLength, pathwayResolver);
|
|
40
|
+
|
|
31
41
|
const max_tokens = this.getModelMaxTokenLength() - tokenLength;
|
|
42
|
+
|
|
43
|
+
if (max_tokens < 0) {
|
|
44
|
+
throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens. The model will not be called.`);
|
|
45
|
+
}
|
|
32
46
|
|
|
33
47
|
requestParameters = {
|
|
34
48
|
prompt: modelPromptMessagesML,
|
|
@@ -41,11 +55,14 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
41
55
|
stream
|
|
42
56
|
};
|
|
43
57
|
} else {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
58
|
+
|
|
59
|
+
modelPromptText = truncatePromptIfNecessary(modelPromptText, tokenLength, this.getModelMaxTokenLength(), modelTargetTokenLength, pathwayResolver);
|
|
60
|
+
|
|
48
61
|
const max_tokens = this.getModelMaxTokenLength() - tokenLength;
|
|
62
|
+
|
|
63
|
+
if (max_tokens < 0) {
|
|
64
|
+
throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens. The model will not be called.`);
|
|
65
|
+
}
|
|
49
66
|
|
|
50
67
|
requestParameters = {
|
|
51
68
|
prompt: modelPromptText,
|
|
@@ -59,9 +76,9 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
59
76
|
}
|
|
60
77
|
|
|
61
78
|
// Execute the request to the OpenAI Completion API
|
|
62
|
-
async execute(text, parameters, prompt) {
|
|
79
|
+
async execute(text, parameters, prompt, pathwayResolver) {
|
|
63
80
|
const url = this.requestUrl(text);
|
|
64
|
-
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
81
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt, pathwayResolver);
|
|
65
82
|
|
|
66
83
|
const data = { ...(this.model.params || {}), ...requestParameters };
|
|
67
84
|
const params = {};
|