@aj-archipelago/cortex 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +6 -0
- package/graphql/graphql.js +3 -4
- package/graphql/parser.js +1 -21
- package/graphql/pathwayPrompter.js +35 -122
- package/graphql/pathwayResolver.js +109 -35
- package/graphql/plugins/azureTranslatePlugin.js +42 -0
- package/graphql/plugins/modelPlugin.js +164 -0
- package/graphql/plugins/openAiChatPlugin.js +38 -0
- package/graphql/plugins/openAiCompletionPlugin.js +69 -0
- package/graphql/prompt.js +1 -1
- package/graphql/requestState.js +5 -0
- package/graphql/resolver.js +4 -4
- package/graphql/subscriptions.js +15 -2
- package/graphql/typeDef.js +17 -13
- package/lib/request.js +67 -10
- package/lib/requestMonitor.js +43 -0
- package/package.json +14 -5
- package/pathways/basePathway.js +4 -5
- package/pathways/bias.js +1 -0
- package/pathways/paraphrase.js +1 -1
- package/pathways/translate.js +1 -0
- package/tests/chunking.test.js +5 -0
- package/tests/main.test.js +5 -0
- package/tests/translate.test.js +5 -0
package/config.js
CHANGED
|
@@ -41,6 +41,11 @@ var config = convict({
|
|
|
41
41
|
default: true,
|
|
42
42
|
env: 'CORTEX_ENABLE_CACHE'
|
|
43
43
|
},
|
|
44
|
+
enableGraphqlCache: {
|
|
45
|
+
format: Boolean,
|
|
46
|
+
default: false,
|
|
47
|
+
env: 'CORTEX_ENABLE_GRAPHQL_CACHE'
|
|
48
|
+
},
|
|
44
49
|
defaultModelName: {
|
|
45
50
|
format: String,
|
|
46
51
|
default: null,
|
|
@@ -50,6 +55,7 @@ var config = convict({
|
|
|
50
55
|
format: Object,
|
|
51
56
|
default: {
|
|
52
57
|
"oai-td3": {
|
|
58
|
+
"type": "OPENAI-COMPLETION",
|
|
53
59
|
"url": "{{openaiApiUrl}}",
|
|
54
60
|
"headers": {
|
|
55
61
|
"Authorization": "Bearer {{openaiApiKey}}",
|
package/graphql/graphql.js
CHANGED
|
@@ -17,8 +17,7 @@ const subscriptions = require('./subscriptions');
|
|
|
17
17
|
const { buildLimiters } = require('../lib/request');
|
|
18
18
|
const { cancelRequestResolver } = require('./resolver');
|
|
19
19
|
const { buildPathways, buildModels } = require('../config');
|
|
20
|
-
|
|
21
|
-
const requestState = {}; // Stores the state of each request
|
|
20
|
+
const { requestState } = require('./requestState');
|
|
22
21
|
|
|
23
22
|
const getPlugins = (config) => {
|
|
24
23
|
// server plugins
|
|
@@ -28,7 +27,7 @@ const getPlugins = (config) => {
|
|
|
28
27
|
|
|
29
28
|
//if cache is enabled and Redis is available, use it
|
|
30
29
|
let cache;
|
|
31
|
-
if (config.get('
|
|
30
|
+
if (config.get('enableGraphqlCache') && config.get('storageConnectionString')) {
|
|
32
31
|
cache = new KeyvAdapter(new Keyv(config.get('storageConnectionString'),{
|
|
33
32
|
ssl: true,
|
|
34
33
|
abortConnect: false,
|
|
@@ -72,7 +71,7 @@ const getTypedefs = (pathways) => {
|
|
|
72
71
|
}
|
|
73
72
|
|
|
74
73
|
type Subscription {
|
|
75
|
-
requestProgress(
|
|
74
|
+
requestProgress(requestIds: [String!]): RequestSubscription
|
|
76
75
|
}
|
|
77
76
|
`;
|
|
78
77
|
|
package/graphql/parser.js
CHANGED
|
@@ -1,22 +1,3 @@
|
|
|
1
|
-
//simples form string single or list return
|
|
2
|
-
const getResponseResult = (data) => {
|
|
3
|
-
const { choices } = data;
|
|
4
|
-
if (!choices || !choices.length) {
|
|
5
|
-
return; //TODO no choices case
|
|
6
|
-
}
|
|
7
|
-
|
|
8
|
-
// if we got a choices array back with more than one choice, return the whole array
|
|
9
|
-
if (choices.length > 1) {
|
|
10
|
-
return choices;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
// otherwise, return the first choice
|
|
14
|
-
const textResult = choices[0].text && choices[0].text.trim();
|
|
15
|
-
const messageResult = choices[0].message && choices[0].message.content && choices[0].message.content.trim();
|
|
16
|
-
|
|
17
|
-
return messageResult || textResult || null;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
1
|
//simply trim and parse with given regex
|
|
21
2
|
const regexParser = (text, regex) => {
|
|
22
3
|
return text.trim().split(regex).map(s => s.trim()).filter(s => s.length);
|
|
@@ -51,8 +32,7 @@ const parseNumberedObjectList = (text, format) => {
|
|
|
51
32
|
}
|
|
52
33
|
|
|
53
34
|
module.exports = {
|
|
54
|
-
getResponseResult,
|
|
55
35
|
regexParser,
|
|
56
36
|
parseNumberedList,
|
|
57
|
-
parseNumberedObjectList
|
|
37
|
+
parseNumberedObjectList,
|
|
58
38
|
};
|
|
@@ -1,145 +1,58 @@
|
|
|
1
|
-
|
|
1
|
+
// PathwayPrompter.js
|
|
2
|
+
const OpenAIChatPlugin = require('./plugins/openAIChatPlugin');
|
|
3
|
+
const OpenAICompletionPlugin = require('./plugins/openAICompletionPlugin');
|
|
4
|
+
const AzureTranslatePlugin = require('./plugins/azureTranslatePlugin');
|
|
2
5
|
const handlebars = require("handlebars");
|
|
3
|
-
const { getResponseResult } = require("./parser");
|
|
4
6
|
const { Exception } = require("handlebars");
|
|
5
|
-
const { encode } = require("gpt-3-encoder");
|
|
6
|
-
|
|
7
|
-
const DEFAULT_MAX_TOKENS = 4096;
|
|
8
|
-
const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
|
|
9
7
|
|
|
10
8
|
// register functions that can be called directly in the prompt markdown
|
|
11
|
-
handlebars.registerHelper('stripHTML', function(value) {
|
|
9
|
+
handlebars.registerHelper('stripHTML', function (value) {
|
|
12
10
|
return value.replace(/<[^>]*>/g, '');
|
|
13
|
-
|
|
11
|
+
});
|
|
14
12
|
|
|
15
|
-
handlebars.registerHelper('now', function() {
|
|
13
|
+
handlebars.registerHelper('now', function () {
|
|
16
14
|
return new Date().toISOString();
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class PathwayPrompter {
|
|
20
|
-
constructor({ config, pathway }) {
|
|
21
|
-
// If the pathway specifies a model, use that, otherwise use the default
|
|
22
|
-
this.modelName = pathway.model || config.get('defaultModelName');
|
|
23
|
-
// Get the model from the config
|
|
24
|
-
this.model = config.get('models')[this.modelName];
|
|
25
|
-
// If the model doesn't exist, throw an exception
|
|
26
|
-
if (!this.model) {
|
|
27
|
-
throw new Exception(`Model ${this.modelName} not found in config`);
|
|
28
|
-
}
|
|
29
|
-
this.environmentVariables = config.getEnv();
|
|
30
|
-
this.temperature = pathway.temperature;
|
|
31
|
-
this.pathwayPrompt = pathway.prompt;
|
|
32
|
-
this.pathwayName = pathway.name;
|
|
33
|
-
this.promptParameters = {}
|
|
34
|
-
// Make all of the parameters defined on the pathway itself available to the prompt
|
|
35
|
-
for (const [k, v] of Object.entries(pathway)) {
|
|
36
|
-
this.promptParameters[k] = v.default ?? v;
|
|
37
|
-
}
|
|
38
|
-
if (pathway.inputParameters) {
|
|
39
|
-
for (const [k, v] of Object.entries(pathway.inputParameters)) {
|
|
40
|
-
this.promptParameters[k] = v.default ?? v;
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
this.requestCount = 1
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
getModelMaxTokenLength() {
|
|
47
|
-
return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
getPromptTokenRatio() {
|
|
51
|
-
return this.promptParameters.inputParameters.tokenRatio ?? this.promptParameters.tokenRatio ?? DEFAULT_PROMPT_TOKEN_RATIO;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
requestUrl() {
|
|
55
|
-
const generateUrl = handlebars.compile(this.model.url);
|
|
56
|
-
return generateUrl({ ...this.model, ...this.environmentVariables, ...this.config });
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
requestParameters(text, parameters, prompt) {
|
|
60
|
-
// the prompt object will either have a messages property or a prompt propery
|
|
61
|
-
// or it could be a function that returns prompt text
|
|
62
|
-
|
|
63
|
-
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
15
|
+
});
|
|
64
16
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
{
|
|
70
|
-
const compiledMessages = prompt.messages.map((message) => {
|
|
71
|
-
const compileText = handlebars.compile(message.content);
|
|
72
|
-
return { role: message.role,
|
|
73
|
-
content: compileText({...combinedParameters, text})
|
|
74
|
-
}
|
|
75
|
-
})
|
|
17
|
+
handlebars.registerHelper('toJSON', function(object) {
|
|
18
|
+
return JSON.stringify(object);
|
|
19
|
+
});
|
|
20
|
+
|
|
76
21
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
temperature: this.temperature ?? 0.7,
|
|
80
|
-
}
|
|
81
|
-
}
|
|
22
|
+
class PathwayPrompter {
|
|
23
|
+
constructor({ config, pathway }) {
|
|
82
24
|
|
|
83
|
-
|
|
84
|
-
|
|
25
|
+
const modelName = pathway.model || config.get('defaultModelName');
|
|
26
|
+
const model = config.get('models')[modelName];
|
|
85
27
|
|
|
86
|
-
if (
|
|
87
|
-
|
|
88
|
-
}
|
|
89
|
-
else {
|
|
90
|
-
promptText = prompt.prompt;
|
|
28
|
+
if (!model) {
|
|
29
|
+
throw new Exception(`Model ${modelName} not found in config`);
|
|
91
30
|
}
|
|
92
31
|
|
|
93
|
-
|
|
94
|
-
const constructedPrompt = interpolatePrompt({ ...combinedParameters, text });
|
|
95
|
-
|
|
96
|
-
// this prompt could be for either a chat-style conversation or a completion-style
|
|
97
|
-
// conversation. They require different parameters.
|
|
98
|
-
|
|
99
|
-
let params = {};
|
|
32
|
+
let plugin;
|
|
100
33
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
// "n": 1,
|
|
114
|
-
// "presence_penalty": 0,
|
|
115
|
-
// "frequency_penalty": 0,
|
|
116
|
-
// "best_of": 1,
|
|
117
|
-
}
|
|
34
|
+
switch (model.type) {
|
|
35
|
+
case 'OPENAI-CHAT':
|
|
36
|
+
plugin = new OpenAIChatPlugin(config, pathway);
|
|
37
|
+
break;
|
|
38
|
+
case 'AZURE-TRANSLATE':
|
|
39
|
+
plugin = new AzureTranslatePlugin(config, pathway);
|
|
40
|
+
break;
|
|
41
|
+
case 'OPENAI-COMPLETION':
|
|
42
|
+
plugin = new OpenAICompletionPlugin(config, pathway);
|
|
43
|
+
break;
|
|
44
|
+
default:
|
|
45
|
+
throw new Exception(`Unsupported model type: ${model.type}`);
|
|
118
46
|
}
|
|
119
47
|
|
|
120
|
-
|
|
48
|
+
this.plugin = plugin;
|
|
121
49
|
}
|
|
122
50
|
|
|
123
51
|
async execute(text, parameters, prompt) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
const url = this.requestUrl(text);
|
|
127
|
-
const params = { ...(this.model.params || {}), ...requestParameters }
|
|
128
|
-
const headers = this.model.headers || {};
|
|
129
|
-
const data = await request({ url, params, headers }, this.modelName);
|
|
130
|
-
const modelInput = params.prompt || params.messages[0].content;
|
|
131
|
-
console.log(`=== ${this.pathwayName}.${this.requestCount++} ===`)
|
|
132
|
-
console.log(`\x1b[36m${modelInput}\x1b[0m`)
|
|
133
|
-
console.log(`\x1b[34m> ${getResponseResult(data)}\x1b[0m`)
|
|
134
|
-
|
|
135
|
-
if (data.error) {
|
|
136
|
-
throw new Exception(`An error was returned from the server: ${JSON.stringify(data.error)}`);
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
return getResponseResult(data);
|
|
52
|
+
return await this.plugin.execute(text, parameters, prompt);
|
|
140
53
|
}
|
|
141
54
|
}
|
|
142
55
|
|
|
143
56
|
module.exports = {
|
|
144
|
-
|
|
145
|
-
}
|
|
57
|
+
PathwayPrompter
|
|
58
|
+
};
|
|
@@ -8,6 +8,7 @@ const { getFirstNToken, getLastNToken, getSemanticChunks } = require('./chunker'
|
|
|
8
8
|
const { PathwayResponseParser } = require('./pathwayResponseParser');
|
|
9
9
|
const { Prompt } = require('./prompt');
|
|
10
10
|
const { getv, setv } = require('../lib/keyValueStorageClient');
|
|
11
|
+
const { requestState } = require('./requestState');
|
|
11
12
|
|
|
12
13
|
const MAX_PREVIOUS_RESULT_TOKEN_LENGTH = 1000;
|
|
13
14
|
|
|
@@ -17,9 +18,8 @@ const callPathway = async (config, pathwayName, requestState, { text, ...paramet
|
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
class PathwayResolver {
|
|
20
|
-
constructor({ config, pathway
|
|
21
|
+
constructor({ config, pathway }) {
|
|
21
22
|
this.config = config;
|
|
22
|
-
this.requestState = requestState;
|
|
23
23
|
this.pathway = pathway;
|
|
24
24
|
this.useInputChunking = pathway.useInputChunking;
|
|
25
25
|
this.chunkMaxTokenLength = 0;
|
|
@@ -48,19 +48,71 @@ class PathwayResolver {
|
|
|
48
48
|
this.pathwayPrompt = pathway.prompt;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
async
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
51
|
+
async asyncResolve(args) {
|
|
52
|
+
// Wait with a sleep promise for the race condition to resolve
|
|
53
|
+
// const results = await Promise.all([this.promptAndParse(args), await new Promise(resolve => setTimeout(resolve, 250))]);
|
|
54
|
+
const data = await this.promptAndParse(args);
|
|
55
|
+
// Process the results for async
|
|
56
|
+
if(args.async || typeof data === 'string') { // if async flag set or processed async and got string response
|
|
57
|
+
const { completedCount, totalCount } = requestState[this.requestId];
|
|
58
|
+
requestState[this.requestId].data = data;
|
|
59
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
60
|
+
requestProgress: {
|
|
61
|
+
requestId: this.requestId,
|
|
62
|
+
progress: completedCount / totalCount,
|
|
63
|
+
data: JSON.stringify(data),
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
} else { //stream
|
|
67
|
+
for (const handle of data) {
|
|
68
|
+
handle.on('data', data => {
|
|
69
|
+
console.log(data.toString());
|
|
70
|
+
const lines = data.toString().split('\n').filter(line => line.trim() !== '');
|
|
71
|
+
for (const line of lines) {
|
|
72
|
+
const message = line.replace(/^data: /, '');
|
|
73
|
+
if (message === '[DONE]') {
|
|
74
|
+
// Send stream finished message
|
|
75
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
76
|
+
requestProgress: {
|
|
77
|
+
requestId: this.requestId,
|
|
78
|
+
data: null,
|
|
79
|
+
progress: 1,
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
return; // Stream finished
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const parsed = JSON.parse(message);
|
|
86
|
+
const result = this.pathwayPrompter.plugin.parseResponse(parsed)
|
|
87
|
+
|
|
88
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
89
|
+
requestProgress: {
|
|
90
|
+
requestId: this.requestId,
|
|
91
|
+
data: JSON.stringify(result)
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
} catch (error) {
|
|
95
|
+
console.error('Could not JSON parse stream message', message, error);
|
|
96
|
+
}
|
|
60
97
|
}
|
|
61
98
|
});
|
|
62
|
-
});
|
|
63
99
|
|
|
100
|
+
// data.on('end', () => {
|
|
101
|
+
// console.log("stream done");
|
|
102
|
+
// });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async resolve(args) {
|
|
109
|
+
if (args.async || args.stream) {
|
|
110
|
+
// Asyncronously process the request
|
|
111
|
+
// this.asyncResolve(args);
|
|
112
|
+
if (!requestState[this.requestId]) {
|
|
113
|
+
requestState[this.requestId] = {}
|
|
114
|
+
}
|
|
115
|
+
requestState[this.requestId] = { ...requestState[this.requestId], args, resolver: this.asyncResolve.bind(this) };
|
|
64
116
|
return this.requestId;
|
|
65
117
|
}
|
|
66
118
|
else {
|
|
@@ -70,7 +122,6 @@ class PathwayResolver {
|
|
|
70
122
|
}
|
|
71
123
|
|
|
72
124
|
async promptAndParse(args) {
|
|
73
|
-
|
|
74
125
|
// Get saved context from contextId or change contextId if needed
|
|
75
126
|
const { contextId } = args;
|
|
76
127
|
this.savedContextId = contextId ? contextId : null;
|
|
@@ -98,7 +149,7 @@ class PathwayResolver {
|
|
|
98
149
|
if (this.pathway.inputChunkSize) {
|
|
99
150
|
chunkMaxChunkTokenLength = Math.min(this.pathway.inputChunkSize, this.chunkMaxTokenLength);
|
|
100
151
|
} else {
|
|
101
|
-
|
|
152
|
+
chunkMaxChunkTokenLength = this.chunkMaxTokenLength;
|
|
102
153
|
}
|
|
103
154
|
const encoded = encode(text);
|
|
104
155
|
if (!this.useInputChunking || encoded.length <= chunkMaxChunkTokenLength) { // no chunking, return as is
|
|
@@ -106,7 +157,7 @@ class PathwayResolver {
|
|
|
106
157
|
const warnText = `Your input is possibly too long, truncating! Text length: ${text.length}`;
|
|
107
158
|
this.warnings.push(warnText);
|
|
108
159
|
console.warn(warnText);
|
|
109
|
-
text = truncate(text, chunkMaxChunkTokenLength);
|
|
160
|
+
text = this.truncate(text, chunkMaxChunkTokenLength);
|
|
110
161
|
}
|
|
111
162
|
return [text];
|
|
112
163
|
}
|
|
@@ -116,7 +167,7 @@ class PathwayResolver {
|
|
|
116
167
|
}
|
|
117
168
|
|
|
118
169
|
truncate(str, n) {
|
|
119
|
-
if (this.pathwayPrompter.promptParameters.truncateFromFront) {
|
|
170
|
+
if (this.pathwayPrompter.plugin.promptParameters.truncateFromFront) {
|
|
120
171
|
return getFirstNToken(str, n);
|
|
121
172
|
}
|
|
122
173
|
return getLastNToken(str, n);
|
|
@@ -124,7 +175,7 @@ class PathwayResolver {
|
|
|
124
175
|
|
|
125
176
|
async summarizeIfEnabled({ text, ...parameters }) {
|
|
126
177
|
if (this.pathway.useInputSummarization) {
|
|
127
|
-
return await callPathway(this.config, 'summary',
|
|
178
|
+
return await callPathway(this.config, 'summary', requestState, { text, targetLength: 1000, ...parameters });
|
|
128
179
|
}
|
|
129
180
|
return text;
|
|
130
181
|
}
|
|
@@ -134,7 +185,7 @@ class PathwayResolver {
|
|
|
134
185
|
// find the longest prompt
|
|
135
186
|
const maxPromptTokenLength = Math.max(...this.prompts.map(({ prompt }) => prompt ? encode(String(prompt)).length : 0));
|
|
136
187
|
const maxMessagesTokenLength = Math.max(...this.prompts.map(({ messages }) => messages ? messages.reduce((acc, {role, content}) => {
|
|
137
|
-
return acc + encode(role).length + encode(content).length;
|
|
188
|
+
return (role && content) ? acc + encode(role).length + encode(content).length : acc;
|
|
138
189
|
}, 0) : 0));
|
|
139
190
|
|
|
140
191
|
const maxTokenLength = Math.max(maxPromptTokenLength, maxMessagesTokenLength);
|
|
@@ -144,8 +195,8 @@ class PathwayResolver {
|
|
|
144
195
|
|
|
145
196
|
// the token ratio is the ratio of the total prompt to the result text - both have to be included
|
|
146
197
|
// in computing the max token length
|
|
147
|
-
const promptRatio = this.pathwayPrompter.getPromptTokenRatio();
|
|
148
|
-
let maxChunkToken = promptRatio * this.pathwayPrompter.getModelMaxTokenLength() - maxTokenLength;
|
|
198
|
+
const promptRatio = this.pathwayPrompter.plugin.getPromptTokenRatio();
|
|
199
|
+
let maxChunkToken = promptRatio * this.pathwayPrompter.plugin.getModelMaxTokenLength() - maxTokenLength;
|
|
149
200
|
|
|
150
201
|
// if we have to deal with prompts that have both text input
|
|
151
202
|
// and previous result, we need to split the maxChunkToken in half
|
|
@@ -160,18 +211,25 @@ class PathwayResolver {
|
|
|
160
211
|
|
|
161
212
|
// Process the request and return the result
|
|
162
213
|
async processRequest({ text, ...parameters }) {
|
|
163
|
-
|
|
164
214
|
text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled
|
|
165
215
|
const chunks = this.processInputText(text);
|
|
166
216
|
|
|
167
217
|
const anticipatedRequestCount = chunks.length * this.prompts.length;
|
|
168
218
|
|
|
169
|
-
if ((
|
|
219
|
+
if ((requestState[this.requestId] || {}).canceled) {
|
|
170
220
|
throw new Error('Request canceled');
|
|
171
221
|
}
|
|
172
222
|
|
|
173
223
|
// Store the request state
|
|
174
|
-
|
|
224
|
+
requestState[this.requestId] = { ...requestState[this.requestId], totalCount: anticipatedRequestCount, completedCount: 0 };
|
|
225
|
+
|
|
226
|
+
if (chunks.length > 1) {
|
|
227
|
+
// stream behaves as async if there are multiple chunks
|
|
228
|
+
if (parameters.stream) {
|
|
229
|
+
parameters.async = true;
|
|
230
|
+
parameters.stream = false;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
175
233
|
|
|
176
234
|
// If pre information is needed, apply current prompt with previous prompt info, only parallelize current call
|
|
177
235
|
if (this.pathway.useParallelChunkProcessing) {
|
|
@@ -189,17 +247,31 @@ class PathwayResolver {
|
|
|
189
247
|
let result = '';
|
|
190
248
|
|
|
191
249
|
for (let i = 0; i < this.prompts.length; i++) {
|
|
250
|
+
const currentParameters = { ...parameters, previousResult };
|
|
251
|
+
|
|
252
|
+
if (currentParameters.stream) { // stream special flow
|
|
253
|
+
if (i < this.prompts.length - 1) {
|
|
254
|
+
currentParameters.stream = false; // if not the last prompt then don't stream
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
// use the stream parameter if not async
|
|
258
|
+
currentParameters.stream = currentParameters.async ? false : currentParameters.stream;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
192
262
|
// If the prompt doesn't contain {{text}} then we can skip the chunking, and also give that token space to the previous result
|
|
193
263
|
if (!this.prompts[i].usesTextInput) {
|
|
194
264
|
// Limit context to it's N + text's characters
|
|
195
265
|
previousResult = this.truncate(previousResult, 2 * this.chunkMaxTokenLength);
|
|
196
|
-
result = await this.applyPrompt(this.prompts[i], null,
|
|
266
|
+
result = await this.applyPrompt(this.prompts[i], null, currentParameters);
|
|
197
267
|
} else {
|
|
198
268
|
// Limit context to N characters
|
|
199
269
|
previousResult = this.truncate(previousResult, this.chunkMaxTokenLength);
|
|
200
270
|
result = await Promise.all(chunks.map(chunk =>
|
|
201
|
-
this.applyPrompt(this.prompts[i], chunk,
|
|
202
|
-
|
|
271
|
+
this.applyPrompt(this.prompts[i], chunk, currentParameters)));
|
|
272
|
+
if (!currentParameters.stream) {
|
|
273
|
+
result = result.join("\n\n")
|
|
274
|
+
}
|
|
203
275
|
}
|
|
204
276
|
|
|
205
277
|
// If this is any prompt other than the last, use the result as the previous context
|
|
@@ -225,20 +297,22 @@ class PathwayResolver {
|
|
|
225
297
|
}
|
|
226
298
|
|
|
227
299
|
async applyPrompt(prompt, text, parameters) {
|
|
228
|
-
if (
|
|
300
|
+
if (requestState[this.requestId].canceled) {
|
|
229
301
|
return;
|
|
230
302
|
}
|
|
231
303
|
const result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt);
|
|
232
|
-
|
|
304
|
+
requestState[this.requestId].completedCount++;
|
|
233
305
|
|
|
234
|
-
const { completedCount, totalCount } =
|
|
306
|
+
const { completedCount, totalCount } = requestState[this.requestId];
|
|
235
307
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
308
|
+
if (completedCount < totalCount) {
|
|
309
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
310
|
+
requestProgress: {
|
|
311
|
+
requestId: this.requestId,
|
|
312
|
+
progress: completedCount / totalCount,
|
|
313
|
+
}
|
|
314
|
+
});
|
|
315
|
+
}
|
|
242
316
|
|
|
243
317
|
if (prompt.saveResultTo) {
|
|
244
318
|
this.savedContext[prompt.saveResultTo] = result;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
// AzureTranslatePlugin.js
|
|
2
|
+
const ModelPlugin = require('./modelPlugin');
|
|
3
|
+
const handlebars = require("handlebars");
|
|
4
|
+
|
|
5
|
+
class AzureTranslatePlugin extends ModelPlugin {
|
|
6
|
+
constructor(config, modelName, pathway) {
|
|
7
|
+
super(config, modelName, pathway);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Set up parameters specific to the Azure Translate API
|
|
11
|
+
requestParameters(text, parameters, prompt) {
|
|
12
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
13
|
+
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
14
|
+
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
15
|
+
|
|
16
|
+
return {
|
|
17
|
+
data: [
|
|
18
|
+
{
|
|
19
|
+
Text: modelPromptText,
|
|
20
|
+
},
|
|
21
|
+
],
|
|
22
|
+
params: {
|
|
23
|
+
to: combinedParameters.to
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Execute the request to the Azure Translate API
|
|
29
|
+
async execute(text, parameters, prompt) {
|
|
30
|
+
const requestParameters = this.requestParameters(text, parameters, prompt);
|
|
31
|
+
|
|
32
|
+
const url = this.requestUrl(text);
|
|
33
|
+
|
|
34
|
+
const data = requestParameters.data;
|
|
35
|
+
const params = requestParameters.params;
|
|
36
|
+
const headers = this.model.headers || {};
|
|
37
|
+
|
|
38
|
+
return this.executeRequest(url, data, params, headers);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
module.exports = AzureTranslatePlugin;
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// ModelPlugin.js
|
|
2
|
+
const handlebars = require('handlebars');
|
|
3
|
+
const { request } = require("../../lib/request");
|
|
4
|
+
const { encode } = require("gpt-3-encoder");
|
|
5
|
+
|
|
6
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
7
|
+
const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
|
|
8
|
+
|
|
9
|
+
class ModelPlugin {
|
|
10
|
+
constructor(config, pathway) {
|
|
11
|
+
// If the pathway specifies a model, use that, otherwise use the default
|
|
12
|
+
this.modelName = pathway.model || config.get('defaultModelName');
|
|
13
|
+
// Get the model from the config
|
|
14
|
+
this.model = config.get('models')[this.modelName];
|
|
15
|
+
// If the model doesn't exist, throw an exception
|
|
16
|
+
if (!this.model) {
|
|
17
|
+
throw new Error(`Model ${this.modelName} not found in config`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
this.config = config;
|
|
21
|
+
this.environmentVariables = config.getEnv();
|
|
22
|
+
this.temperature = pathway.temperature;
|
|
23
|
+
this.pathwayPrompt = pathway.prompt;
|
|
24
|
+
this.pathwayName = pathway.name;
|
|
25
|
+
this.promptParameters = {};
|
|
26
|
+
|
|
27
|
+
// Make all of the parameters defined on the pathway itself available to the prompt
|
|
28
|
+
for (const [k, v] of Object.entries(pathway)) {
|
|
29
|
+
this.promptParameters[k] = v.default ?? v;
|
|
30
|
+
}
|
|
31
|
+
if (pathway.inputParameters) {
|
|
32
|
+
for (const [k, v] of Object.entries(pathway.inputParameters)) {
|
|
33
|
+
this.promptParameters[k] = v.default ?? v;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
this.requestCount = 1;
|
|
38
|
+
this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
getModelMaxTokenLength() {
|
|
42
|
+
return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
getPromptTokenRatio() {
|
|
46
|
+
// TODO: Is this the right order of precedence? inputParameters should maybe be second?
|
|
47
|
+
return this.promptParameters.inputParameters.tokenRatio ?? this.promptParameters.tokenRatio ?? DEFAULT_PROMPT_TOKEN_RATIO;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
getModelPrompt(prompt, parameters) {
|
|
52
|
+
if (typeof(prompt) === 'function') {
|
|
53
|
+
return prompt(parameters);
|
|
54
|
+
} else {
|
|
55
|
+
return prompt;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
getModelPromptMessages(modelPrompt, combinedParameters, text) {
|
|
60
|
+
if (!modelPrompt.messages) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// First run handlebars compile on the pathway messages
|
|
65
|
+
const compiledMessages = modelPrompt.messages.map((message) => {
|
|
66
|
+
if (message.content) {
|
|
67
|
+
const compileText = handlebars.compile(message.content);
|
|
68
|
+
return {
|
|
69
|
+
role: message.role,
|
|
70
|
+
content: compileText({ ...combinedParameters, text }),
|
|
71
|
+
};
|
|
72
|
+
} else {
|
|
73
|
+
return message;
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// Next add in any parameters that are referenced by name in the array
|
|
78
|
+
const expandedMessages = compiledMessages.flatMap((message) => {
|
|
79
|
+
if (typeof message === 'string') {
|
|
80
|
+
const match = message.match(/{{(.+?)}}/);
|
|
81
|
+
const placeholder = match ? match[1] : null;
|
|
82
|
+
if (placeholder === null) {
|
|
83
|
+
return message;
|
|
84
|
+
} else {
|
|
85
|
+
return combinedParameters[placeholder] || [];
|
|
86
|
+
}
|
|
87
|
+
} else {
|
|
88
|
+
return [message];
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
return expandedMessages;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
requestUrl() {
|
|
96
|
+
const generateUrl = handlebars.compile(this.model.url);
|
|
97
|
+
return generateUrl({ ...this.model, ...this.environmentVariables, ...this.config });
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
//simples form string single or list return
|
|
101
|
+
parseResponse(data) {
|
|
102
|
+
const { choices } = data;
|
|
103
|
+
if (!choices || !choices.length) {
|
|
104
|
+
if (Array.isArray(data) && data.length > 0 && data[0].translations) {
|
|
105
|
+
return data[0].translations[0].text.trim();
|
|
106
|
+
} else {
|
|
107
|
+
return data;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// if we got a choices array back with more than one choice, return the whole array
|
|
112
|
+
if (choices.length > 1) {
|
|
113
|
+
return choices;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// otherwise, return the first choice
|
|
117
|
+
const textResult = choices[0].text && choices[0].text.trim();
|
|
118
|
+
const messageResult = choices[0].message && choices[0].message.content && choices[0].message.content.trim();
|
|
119
|
+
|
|
120
|
+
return messageResult ?? textResult ?? null;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
logMessagePreview(messages) {
|
|
124
|
+
messages.forEach((message, index) => {
|
|
125
|
+
const words = message.content.split(" ");
|
|
126
|
+
const tokenCount = encode(message.content).length;
|
|
127
|
+
let preview;
|
|
128
|
+
|
|
129
|
+
if (index === 0) {
|
|
130
|
+
preview = message.content;
|
|
131
|
+
} else {
|
|
132
|
+
preview = words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
console.log(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async executeRequest(url, data, params, headers) {
|
|
140
|
+
const responseData = await request({ url, data, params, headers, cache: this.shouldCache }, this.modelName);
|
|
141
|
+
const modelInput = data.prompt || (data.messages && data.messages[0].content) || data[0].Text || null;
|
|
142
|
+
|
|
143
|
+
console.log(`=== ${this.pathwayName}.${this.requestCount++} ===`);
|
|
144
|
+
|
|
145
|
+
if (data.messages && data.messages.length > 1) {
|
|
146
|
+
this.logMessagePreview(data.messages);
|
|
147
|
+
} else {
|
|
148
|
+
console.log(`\x1b[36m${modelInput}\x1b[0m`);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
|
|
152
|
+
|
|
153
|
+
if (responseData.error) {
|
|
154
|
+
throw new Exception(`An error was returned from the server: ${JSON.stringify(responseData.error)}`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return this.parseResponse(responseData);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
module.exports = ModelPlugin;
|
|
163
|
+
|
|
164
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// OpenAIChatPlugin.js
|
|
2
|
+
const ModelPlugin = require('./modelPlugin');
|
|
3
|
+
const handlebars = require("handlebars");
|
|
4
|
+
|
|
5
|
+
class OpenAIChatPlugin extends ModelPlugin {
|
|
6
|
+
constructor(config, pathway) {
|
|
7
|
+
super(config, pathway);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Set up parameters specific to the OpenAI Chat API
|
|
11
|
+
requestParameters(text, parameters, prompt) {
|
|
12
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
13
|
+
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
14
|
+
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
15
|
+
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
16
|
+
|
|
17
|
+
const { stream } = parameters;
|
|
18
|
+
|
|
19
|
+
return {
|
|
20
|
+
messages: modelPromptMessages || [{ "role": "user", "content": modelPromptText }],
|
|
21
|
+
temperature: this.temperature ?? 0.7,
|
|
22
|
+
stream
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Execute the request to the OpenAI Chat API
|
|
27
|
+
async execute(text, parameters, prompt) {
|
|
28
|
+
const url = this.requestUrl(text);
|
|
29
|
+
const requestParameters = this.requestParameters(text, parameters, prompt);
|
|
30
|
+
|
|
31
|
+
const data = { ...(this.model.params || {}), ...requestParameters };
|
|
32
|
+
const params = {};
|
|
33
|
+
const headers = this.model.headers || {};
|
|
34
|
+
return this.executeRequest(url, data, params, headers);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
module.exports = OpenAIChatPlugin;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// OpenAICompletionPlugin.js
|
|
2
|
+
const ModelPlugin = require('./modelPlugin');
|
|
3
|
+
const handlebars = require("handlebars");
|
|
4
|
+
const { encode } = require("gpt-3-encoder");
|
|
5
|
+
|
|
6
|
+
//convert a messages array to a simple chatML format
|
|
7
|
+
const messagesToChatML = (messages) => {
|
|
8
|
+
let output = "";
|
|
9
|
+
if (messages && messages.length) {
|
|
10
|
+
for (let message of messages) {
|
|
11
|
+
output += (message.role && message.content) ? `<|im_start|>${message.role}\n${message.content}\n<|im_end|>\n` : `${message}\n`;
|
|
12
|
+
}
|
|
13
|
+
// you always want the assistant to respond next so add a
|
|
14
|
+
// directive for that
|
|
15
|
+
output += "<|im_start|>assistant\n";
|
|
16
|
+
}
|
|
17
|
+
return output;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
class OpenAICompletionPlugin extends ModelPlugin {
|
|
21
|
+
constructor(config, pathway) {
|
|
22
|
+
super(config, pathway);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Set up parameters specific to the OpenAI Completion API
|
|
26
|
+
requestParameters(text, parameters, prompt) {
|
|
27
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
28
|
+
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
29
|
+
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
30
|
+
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
31
|
+
const modelPromptMessagesML = messagesToChatML(modelPromptMessages);
|
|
32
|
+
|
|
33
|
+
const { stream } = parameters;
|
|
34
|
+
|
|
35
|
+
if (modelPromptMessagesML) {
|
|
36
|
+
return {
|
|
37
|
+
prompt: modelPromptMessagesML,
|
|
38
|
+
max_tokens: this.getModelMaxTokenLength() - encode(modelPromptMessagesML).length - 1,
|
|
39
|
+
temperature: this.temperature ?? 0.7,
|
|
40
|
+
top_p: 0.95,
|
|
41
|
+
frequency_penalty: 0,
|
|
42
|
+
presence_penalty: 0,
|
|
43
|
+
stop: ["<|im_end|>"],
|
|
44
|
+
stream
|
|
45
|
+
};
|
|
46
|
+
} else {
|
|
47
|
+
return {
|
|
48
|
+
prompt: modelPromptText,
|
|
49
|
+
max_tokens: this.getModelMaxTokenLength() - encode(modelPromptText).length - 1,
|
|
50
|
+
temperature: this.temperature ?? 0.7,
|
|
51
|
+
stream
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Execute the request to the OpenAI Completion API
|
|
57
|
+
async execute(text, parameters, prompt) {
|
|
58
|
+
const url = this.requestUrl(text);
|
|
59
|
+
const requestParameters = this.requestParameters(text, parameters, prompt);
|
|
60
|
+
|
|
61
|
+
const data = { ...(this.model.params || {}), ...requestParameters };
|
|
62
|
+
const params = {};
|
|
63
|
+
const headers = this.model.headers || {};
|
|
64
|
+
return this.executeRequest(url, data, params, headers);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
module.exports = OpenAICompletionPlugin;
|
|
69
|
+
|
package/graphql/prompt.js
CHANGED
|
@@ -25,7 +25,7 @@ function promptContains(variable, prompt) {
|
|
|
25
25
|
// if it's an array, it's the messages format
|
|
26
26
|
if (Array.isArray(prompt)) {
|
|
27
27
|
prompt.forEach(p => {
|
|
28
|
-
while (
|
|
28
|
+
while (match = p.content && regexp.exec(p.content)) {
|
|
29
29
|
matches.push(match[1]);
|
|
30
30
|
}
|
|
31
31
|
});
|
package/graphql/resolver.js
CHANGED
|
@@ -5,10 +5,10 @@ const { PathwayResolver } = require("./pathwayResolver");
|
|
|
5
5
|
// (parent, args, contextValue, info)
|
|
6
6
|
const rootResolver = async (parent, args, contextValue, info) => {
|
|
7
7
|
const { config, pathway, requestState } = contextValue;
|
|
8
|
-
const { temperature } = pathway;
|
|
8
|
+
const { temperature, enableGraphqlCache } = pathway;
|
|
9
9
|
|
|
10
|
-
// Turn
|
|
11
|
-
if (temperature == 0) {
|
|
10
|
+
// Turn on graphql caching if enableGraphqlCache true and temperature is 0
|
|
11
|
+
if (enableGraphqlCache && temperature == 0) { // ||
|
|
12
12
|
info.cacheControl.setCacheHint({ maxAge: 60 * 60 * 24, scope: 'PUBLIC' });
|
|
13
13
|
}
|
|
14
14
|
|
|
@@ -16,7 +16,7 @@ const rootResolver = async (parent, args, contextValue, info) => {
|
|
|
16
16
|
contextValue.pathwayResolver = pathwayResolver;
|
|
17
17
|
|
|
18
18
|
// Add request parameters back as debug
|
|
19
|
-
const requestParameters = pathwayResolver.prompts.map((prompt) => pathwayResolver.pathwayPrompter.requestParameters(args.text, args, prompt));
|
|
19
|
+
const requestParameters = pathwayResolver.prompts.map((prompt) => pathwayResolver.pathwayPrompter.plugin.requestParameters(args.text, args, prompt));
|
|
20
20
|
const debug = JSON.stringify(requestParameters);
|
|
21
21
|
|
|
22
22
|
// Execute the request with timeout
|
package/graphql/subscriptions.js
CHANGED
|
@@ -4,14 +4,27 @@
|
|
|
4
4
|
|
|
5
5
|
const pubsub = require("./pubsub");
|
|
6
6
|
const { withFilter } = require("graphql-subscriptions");
|
|
7
|
+
const { requestState } = require("./requestState");
|
|
7
8
|
|
|
8
9
|
const subscriptions = {
|
|
9
10
|
requestProgress: {
|
|
10
11
|
subscribe: withFilter(
|
|
11
|
-
() =>
|
|
12
|
+
(_, args, __, info) => {
|
|
13
|
+
const { requestIds } = args;
|
|
14
|
+
for (const requestId of requestIds) {
|
|
15
|
+
if (!requestState[requestId]) {
|
|
16
|
+
console.log(`requestProgress, requestId: ${requestId} not found`);
|
|
17
|
+
} else {
|
|
18
|
+
console.log(`starting async requestProgress, requestId: ${requestId}`);
|
|
19
|
+
const { resolver, args } = requestState[requestId];
|
|
20
|
+
resolver(args);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return pubsub.asyncIterator(['REQUEST_PROGRESS'])
|
|
24
|
+
},
|
|
12
25
|
(payload, variables) => {
|
|
13
26
|
return (
|
|
14
|
-
payload.requestProgress.requestId
|
|
27
|
+
variables.requestIds.includes(payload.requestProgress.requestId)
|
|
15
28
|
);
|
|
16
29
|
},
|
|
17
30
|
),
|
package/graphql/typeDef.js
CHANGED
|
@@ -12,10 +12,11 @@ const typeDef = (pathway) => {
|
|
|
12
12
|
const fieldsStr = !fields ? `` : fields.map(f => `${f}: String`).join('\n ');
|
|
13
13
|
|
|
14
14
|
const typeName = fields ? `${objName}Result` : `String`;
|
|
15
|
+
const messageType = `input Message { role: String, content: String }`;
|
|
16
|
+
|
|
15
17
|
const type = fields ? `type ${typeName} {
|
|
16
18
|
${fieldsStr}
|
|
17
|
-
}` : ``;
|
|
18
|
-
|
|
19
|
+
}` : ``;
|
|
19
20
|
|
|
20
21
|
const resultStr = pathway.list ? `[${typeName}]` : typeName;
|
|
21
22
|
|
|
@@ -29,18 +30,21 @@ const typeDef = (pathway) => {
|
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
const params = { ...defaultInputParameters, ...inputParameters };
|
|
32
|
-
const paramsStr = Object.entries(params).map(
|
|
33
|
-
([key, value]) => `${key}: ${GRAPHQL_TYPE_MAP[typeof (value)]} = ${typeof (value) == `string` ? `"${value}"` : value}`).join('\n');
|
|
34
|
-
|
|
35
33
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
43
|
-
|
|
34
|
+
const paramsStr = Object.entries(params).map(
|
|
35
|
+
([key, value]) => {
|
|
36
|
+
if (typeof value === 'object' && Array.isArray(value)) {
|
|
37
|
+
return `${key}: [Message] = []`;
|
|
38
|
+
} else {
|
|
39
|
+
return `${key}: ${GRAPHQL_TYPE_MAP[typeof (value)]} = ${typeof (value) === 'string' ? `"${value}"` : value}`;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
).join('\n');
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
const definition = `${messageType}\n\n${type}\n\n${responseType}\n\nextend type Query {${name}(${paramsStr}): ${objName}}`;
|
|
46
|
+
//console.log(definition);
|
|
47
|
+
return definition;
|
|
44
48
|
}
|
|
45
49
|
|
|
46
50
|
module.exports = {
|
package/lib/request.js
CHANGED
|
@@ -1,34 +1,88 @@
|
|
|
1
|
-
const axios = require('axios');
|
|
2
1
|
const Bottleneck = require("bottleneck/es5");
|
|
2
|
+
const RequestMonitor = require('./requestMonitor');
|
|
3
|
+
const { config } = require('../config');
|
|
4
|
+
let axios = require('axios');
|
|
5
|
+
|
|
6
|
+
if (config.get('enableCache')) {
|
|
7
|
+
// Setup cache
|
|
8
|
+
const { setupCache } = require('axios-cache-interceptor');
|
|
9
|
+
axios = setupCache(axios, {
|
|
10
|
+
// enable cache for all requests by default
|
|
11
|
+
methods: ['get', 'post', 'put', 'delete', 'patch'],
|
|
12
|
+
interpretHeader: false,
|
|
13
|
+
ttl: 1000 * 60 * 60 * 24 * 7, // 7 days
|
|
14
|
+
});
|
|
15
|
+
}
|
|
3
16
|
|
|
4
17
|
const limiters = {};
|
|
18
|
+
const monitors = {};
|
|
5
19
|
|
|
6
20
|
const buildLimiters = (config) => {
|
|
7
21
|
console.log('Building limiters...');
|
|
8
22
|
for (const [name, model] of Object.entries(config.get('models'))) {
|
|
23
|
+
const rps = model.requestsPerSecond ?? 100;
|
|
9
24
|
limiters[name] = new Bottleneck({
|
|
10
|
-
minTime: 1000 /
|
|
11
|
-
|
|
12
|
-
|
|
25
|
+
minTime: 1000 / rps,
|
|
26
|
+
maxConcurrent: rps,
|
|
27
|
+
reservoir: rps, // Number of tokens available initially
|
|
28
|
+
reservoirRefreshAmount: rps, // Number of tokens added per interval
|
|
29
|
+
reservoirRefreshInterval: 1000, // Interval in milliseconds
|
|
30
|
+
});
|
|
31
|
+
monitors[name] = new RequestMonitor();
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
setInterval(() => {
|
|
36
|
+
const monitorKeys = Object.keys(monitors);
|
|
37
|
+
|
|
38
|
+
// Skip logging if the monitors object does not exist or is empty
|
|
39
|
+
if (!monitorKeys || monitorKeys.length === 0) {
|
|
40
|
+
return;
|
|
13
41
|
}
|
|
42
|
+
|
|
43
|
+
monitorKeys.forEach((monitorName) => {
|
|
44
|
+
const monitor = monitors[monitorName];
|
|
45
|
+
const callRate = monitor.getPeakCallRate();
|
|
46
|
+
const error429Rate = monitor.getError429Rate();
|
|
47
|
+
if (callRate > 0) {
|
|
48
|
+
console.log('------------------------');
|
|
49
|
+
console.log(`${monitorName} Call rate: ${callRate} calls/sec, 429 errors: ${error429Rate * 100}%`);
|
|
50
|
+
console.log('------------------------');
|
|
51
|
+
// Reset the rate monitor to start a new monitoring interval.
|
|
52
|
+
monitor.reset();
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
}, 10000); // Log rates every 10 seconds (10000 ms).
|
|
56
|
+
|
|
57
|
+
const postWithMonitor = async (model, url, data, axiosConfigObj) => {
|
|
58
|
+
const monitor = monitors[model];
|
|
59
|
+
monitor.incrementCallCount();
|
|
60
|
+
return axios.post(url, data, axiosConfigObj);
|
|
14
61
|
}
|
|
15
62
|
|
|
16
63
|
const MAX_RETRY = 10;
|
|
17
|
-
const postRequest = async ({ url, params, headers }, model) => {
|
|
64
|
+
const postRequest = async ({ url, data, params, headers, cache }, model) => {
|
|
18
65
|
let retry = 0;
|
|
19
66
|
const errors = []
|
|
20
67
|
for (let i = 0; i < MAX_RETRY; i++) {
|
|
21
68
|
try {
|
|
22
69
|
if (i > 0) {
|
|
23
|
-
console.log(`Retrying request #retry ${i}: ${JSON.stringify(
|
|
70
|
+
console.log(`Retrying request #retry ${i}: ${JSON.stringify(data)}...`);
|
|
24
71
|
await new Promise(r => setTimeout(r, 200 * Math.pow(2, i))); // exponential backoff
|
|
25
|
-
}
|
|
72
|
+
}
|
|
26
73
|
if (!limiters[model]) {
|
|
27
74
|
throw new Error(`No limiter for model ${model}!`);
|
|
28
75
|
}
|
|
29
|
-
|
|
76
|
+
const axiosConfigObj = { params, headers, cache };
|
|
77
|
+
if (params.stream || data.stream) {
|
|
78
|
+
axiosConfigObj.responseType = 'stream';
|
|
79
|
+
}
|
|
80
|
+
return await limiters[model].schedule(() => postWithMonitor(model, url, data, axiosConfigObj));
|
|
30
81
|
} catch (e) {
|
|
31
|
-
console.error(`Failed request with
|
|
82
|
+
console.error(`Failed request with data ${JSON.stringify(data)}: ${e}`);
|
|
83
|
+
if (e.response.status === 429) {
|
|
84
|
+
monitors[model].incrementError429Count();
|
|
85
|
+
}
|
|
32
86
|
errors.push(e);
|
|
33
87
|
}
|
|
34
88
|
}
|
|
@@ -37,7 +91,10 @@ const postRequest = async ({ url, params, headers }, model) => {
|
|
|
37
91
|
|
|
38
92
|
const request = async (params, model) => {
|
|
39
93
|
const response = await postRequest(params, model);
|
|
40
|
-
const { error, data } = response;
|
|
94
|
+
const { error, data, cached } = response;
|
|
95
|
+
if (cached) {
|
|
96
|
+
console.info('/Request served with cached response.');
|
|
97
|
+
}
|
|
41
98
|
if (error && error.length > 0) {
|
|
42
99
|
const lastError = error[error.length - 1];
|
|
43
100
|
return { error: lastError.toJSON() ?? lastError ?? error };
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
class RequestMonitor {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.callCount = 0;
|
|
4
|
+
this.peakCallRate = 0;
|
|
5
|
+
this.error429Count = 0;
|
|
6
|
+
this.startTime = new Date();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
incrementCallCount() {
|
|
10
|
+
this.callCount++;
|
|
11
|
+
if (this.getCallRate() > this.peakCallRate) {
|
|
12
|
+
this.peakCallRate = this.getCallRate();
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
incrementError429Count() {
|
|
17
|
+
this.error429Count++;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
getCallRate() {
|
|
21
|
+
const currentTime = new Date();
|
|
22
|
+
const timeElapsed = (currentTime - this.startTime) / 1000; // time elapsed in seconds
|
|
23
|
+
return timeElapsed < 1 ? this.callCount : this.callCount / timeElapsed;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
getPeakCallRate() {
|
|
27
|
+
return this.peakCallRate;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
getError429Rate() {
|
|
31
|
+
return this.error429Count / this.callCount;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
reset() {
|
|
35
|
+
this.callCount = 0;
|
|
36
|
+
this.error429Count = 0;
|
|
37
|
+
this.peakCallRate = 0;
|
|
38
|
+
this.startTime = new Date();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
module.exports = RequestMonitor;
|
|
43
|
+
|
package/package.json
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.0.6",
|
|
4
|
+
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
7
7
|
"url": "git+https://github.com/aj-archipelago/cortex.git"
|
|
8
8
|
},
|
|
9
9
|
"keywords": [
|
|
10
10
|
"cortex",
|
|
11
|
-
"
|
|
11
|
+
"AI",
|
|
12
|
+
"prompt engineering",
|
|
13
|
+
"LLM",
|
|
14
|
+
"OpenAI",
|
|
15
|
+
"Azure",
|
|
16
|
+
"GPT-3",
|
|
17
|
+
"GPT-4",
|
|
18
|
+
"chatGPT",
|
|
19
|
+
"GraphQL"
|
|
12
20
|
],
|
|
13
21
|
"main": "index.js",
|
|
14
22
|
"scripts": {
|
|
@@ -22,11 +30,12 @@
|
|
|
22
30
|
"@apollo/utils.keyvadapter": "^1.1.2",
|
|
23
31
|
"@graphql-tools/schema": "^9.0.12",
|
|
24
32
|
"@keyv/redis": "^2.5.4",
|
|
25
|
-
"apollo-server": "^3.
|
|
33
|
+
"apollo-server": "^3.12.0",
|
|
26
34
|
"apollo-server-core": "^3.11.1",
|
|
27
35
|
"apollo-server-express": "^3.11.1",
|
|
28
36
|
"apollo-server-plugin-response-cache": "^3.8.1",
|
|
29
|
-
"axios": "^1.
|
|
37
|
+
"axios": "^1.3.4",
|
|
38
|
+
"axios-cache-interceptor": "^1.0.1",
|
|
30
39
|
"bottleneck": "^2.19.5",
|
|
31
40
|
"compromise": "^14.8.1",
|
|
32
41
|
"compromise-paragraphs": "^0.1.0",
|
package/pathways/basePathway.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
const { parseResponse } = require("../graphql/parser");
|
|
2
1
|
const { rootResolver, resolver } = require("../graphql/resolver");
|
|
3
2
|
const { typeDef } = require('../graphql/typeDef')
|
|
4
3
|
|
|
@@ -7,9 +6,9 @@ module.exports = {
|
|
|
7
6
|
prompt: `{{text}}`,
|
|
8
7
|
defaultInputParameters: {
|
|
9
8
|
text: ``,
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
async: false, // switch to enable async mode
|
|
10
|
+
contextId: ``, // used to identify the context of the request,
|
|
11
|
+
stream: false, // switch to enable stream mode
|
|
13
12
|
},
|
|
14
13
|
inputParameters: {},
|
|
15
14
|
typeDef,
|
|
@@ -19,5 +18,5 @@ module.exports = {
|
|
|
19
18
|
useParallelChunkProcessing: false,
|
|
20
19
|
useInputSummarization: false,
|
|
21
20
|
truncateFromFront: false,
|
|
22
|
-
timeout:
|
|
21
|
+
timeout: 120, // in seconds
|
|
23
22
|
}
|
package/pathways/bias.js
CHANGED
package/pathways/paraphrase.js
CHANGED
package/pathways/translate.js
CHANGED
package/tests/chunking.test.js
CHANGED
|
@@ -4,6 +4,11 @@ jest.setTimeout(1800000);
|
|
|
4
4
|
|
|
5
5
|
const testServer = getTestServer();
|
|
6
6
|
|
|
7
|
+
//stop server after all tests
|
|
8
|
+
afterAll(async () => {
|
|
9
|
+
await testServer.stop();
|
|
10
|
+
});
|
|
11
|
+
|
|
7
12
|
it('chunking test of translate endpoint with huge text', async () => {
|
|
8
13
|
const response = await testServer.executeOperation({
|
|
9
14
|
query: 'query translate($text: String!) { translate(text: $text) { result } }',
|
package/tests/main.test.js
CHANGED
|
@@ -14,6 +14,11 @@ const getTestServer = () => {
|
|
|
14
14
|
|
|
15
15
|
const testServer = getTestServer();
|
|
16
16
|
|
|
17
|
+
//stop server after all tests
|
|
18
|
+
afterAll(async () => {
|
|
19
|
+
await testServer.stop();
|
|
20
|
+
});
|
|
21
|
+
|
|
17
22
|
it('validates bias endpoint', async () => {
|
|
18
23
|
const response = await testServer.executeOperation({
|
|
19
24
|
query: 'query bias($text: String!) { bias(text: $text) { result } }',
|
package/tests/translate.test.js
CHANGED
|
@@ -4,6 +4,11 @@ jest.setTimeout(1800000);
|
|
|
4
4
|
|
|
5
5
|
const testServer = getTestServer();
|
|
6
6
|
|
|
7
|
+
//stop server after all tests
|
|
8
|
+
afterAll(async () => {
|
|
9
|
+
await testServer.stop();
|
|
10
|
+
});
|
|
11
|
+
|
|
7
12
|
it('test translate endpoint with huge arabic text english translation and check return non-arabic/english', async () => {
|
|
8
13
|
const response = await testServer.executeOperation({
|
|
9
14
|
query: 'query translate($text: String!, $to:String) { translate(text: $text, to:$to) { result } }',
|