@aj-archipelago/cortex 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +5 -0
- package/graphql/graphql.js +3 -4
- package/graphql/pathwayResolver.js +104 -30
- package/graphql/plugins/modelPlugin.js +33 -7
- package/graphql/plugins/openAiChatPlugin.js +3 -0
- package/graphql/plugins/openAiCompletionPlugin.js +5 -1
- package/graphql/requestState.js +5 -0
- package/graphql/resolver.js +3 -3
- package/graphql/subscriptions.js +15 -2
- package/lib/request.js +65 -8
- package/lib/requestMonitor.js +43 -0
- package/package.json +14 -5
- package/pathways/basePathway.js +3 -4
- package/pathways/bias.js +1 -0
- package/pathways/translate.js +1 -0
- package/tests/chunking.test.js +5 -0
- package/tests/main.test.js +5 -0
- package/tests/translate.test.js +5 -0
package/config.js
CHANGED
package/graphql/graphql.js
CHANGED
|
@@ -17,8 +17,7 @@ const subscriptions = require('./subscriptions');
|
|
|
17
17
|
const { buildLimiters } = require('../lib/request');
|
|
18
18
|
const { cancelRequestResolver } = require('./resolver');
|
|
19
19
|
const { buildPathways, buildModels } = require('../config');
|
|
20
|
-
|
|
21
|
-
const requestState = {}; // Stores the state of each request
|
|
20
|
+
const { requestState } = require('./requestState');
|
|
22
21
|
|
|
23
22
|
const getPlugins = (config) => {
|
|
24
23
|
// server plugins
|
|
@@ -28,7 +27,7 @@ const getPlugins = (config) => {
|
|
|
28
27
|
|
|
29
28
|
//if cache is enabled and Redis is available, use it
|
|
30
29
|
let cache;
|
|
31
|
-
if (config.get('
|
|
30
|
+
if (config.get('enableGraphqlCache') && config.get('storageConnectionString')) {
|
|
32
31
|
cache = new KeyvAdapter(new Keyv(config.get('storageConnectionString'),{
|
|
33
32
|
ssl: true,
|
|
34
33
|
abortConnect: false,
|
|
@@ -72,7 +71,7 @@ const getTypedefs = (pathways) => {
|
|
|
72
71
|
}
|
|
73
72
|
|
|
74
73
|
type Subscription {
|
|
75
|
-
requestProgress(
|
|
74
|
+
requestProgress(requestIds: [String!]): RequestSubscription
|
|
76
75
|
}
|
|
77
76
|
`;
|
|
78
77
|
|
|
@@ -8,6 +8,7 @@ const { getFirstNToken, getLastNToken, getSemanticChunks } = require('./chunker'
|
|
|
8
8
|
const { PathwayResponseParser } = require('./pathwayResponseParser');
|
|
9
9
|
const { Prompt } = require('./prompt');
|
|
10
10
|
const { getv, setv } = require('../lib/keyValueStorageClient');
|
|
11
|
+
const { requestState } = require('./requestState');
|
|
11
12
|
|
|
12
13
|
const MAX_PREVIOUS_RESULT_TOKEN_LENGTH = 1000;
|
|
13
14
|
|
|
@@ -17,9 +18,8 @@ const callPathway = async (config, pathwayName, requestState, { text, ...paramet
|
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
class PathwayResolver {
|
|
20
|
-
constructor({ config, pathway
|
|
21
|
+
constructor({ config, pathway }) {
|
|
21
22
|
this.config = config;
|
|
22
|
-
this.requestState = requestState;
|
|
23
23
|
this.pathway = pathway;
|
|
24
24
|
this.useInputChunking = pathway.useInputChunking;
|
|
25
25
|
this.chunkMaxTokenLength = 0;
|
|
@@ -48,19 +48,71 @@ class PathwayResolver {
|
|
|
48
48
|
this.pathwayPrompt = pathway.prompt;
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
-
async
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
51
|
+
async asyncResolve(args) {
|
|
52
|
+
// Wait with a sleep promise for the race condition to resolve
|
|
53
|
+
// const results = await Promise.all([this.promptAndParse(args), await new Promise(resolve => setTimeout(resolve, 250))]);
|
|
54
|
+
const data = await this.promptAndParse(args);
|
|
55
|
+
// Process the results for async
|
|
56
|
+
if(args.async || typeof data === 'string') { // if async flag set or processed async and got string response
|
|
57
|
+
const { completedCount, totalCount } = requestState[this.requestId];
|
|
58
|
+
requestState[this.requestId].data = data;
|
|
59
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
60
|
+
requestProgress: {
|
|
61
|
+
requestId: this.requestId,
|
|
62
|
+
progress: completedCount / totalCount,
|
|
63
|
+
data: JSON.stringify(data),
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
} else { //stream
|
|
67
|
+
for (const handle of data) {
|
|
68
|
+
handle.on('data', data => {
|
|
69
|
+
console.log(data.toString());
|
|
70
|
+
const lines = data.toString().split('\n').filter(line => line.trim() !== '');
|
|
71
|
+
for (const line of lines) {
|
|
72
|
+
const message = line.replace(/^data: /, '');
|
|
73
|
+
if (message === '[DONE]') {
|
|
74
|
+
// Send stream finished message
|
|
75
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
76
|
+
requestProgress: {
|
|
77
|
+
requestId: this.requestId,
|
|
78
|
+
data: null,
|
|
79
|
+
progress: 1,
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
return; // Stream finished
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const parsed = JSON.parse(message);
|
|
86
|
+
const result = this.pathwayPrompter.plugin.parseResponse(parsed)
|
|
87
|
+
|
|
88
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
89
|
+
requestProgress: {
|
|
90
|
+
requestId: this.requestId,
|
|
91
|
+
data: JSON.stringify(result)
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
} catch (error) {
|
|
95
|
+
console.error('Could not JSON parse stream message', message, error);
|
|
96
|
+
}
|
|
60
97
|
}
|
|
61
98
|
});
|
|
62
|
-
});
|
|
63
99
|
|
|
100
|
+
// data.on('end', () => {
|
|
101
|
+
// console.log("stream done");
|
|
102
|
+
// });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async resolve(args) {
|
|
109
|
+
if (args.async || args.stream) {
|
|
110
|
+
// Asyncronously process the request
|
|
111
|
+
// this.asyncResolve(args);
|
|
112
|
+
if (!requestState[this.requestId]) {
|
|
113
|
+
requestState[this.requestId] = {}
|
|
114
|
+
}
|
|
115
|
+
requestState[this.requestId] = { ...requestState[this.requestId], args, resolver: this.asyncResolve.bind(this) };
|
|
64
116
|
return this.requestId;
|
|
65
117
|
}
|
|
66
118
|
else {
|
|
@@ -70,7 +122,6 @@ class PathwayResolver {
|
|
|
70
122
|
}
|
|
71
123
|
|
|
72
124
|
async promptAndParse(args) {
|
|
73
|
-
|
|
74
125
|
// Get saved context from contextId or change contextId if needed
|
|
75
126
|
const { contextId } = args;
|
|
76
127
|
this.savedContextId = contextId ? contextId : null;
|
|
@@ -98,7 +149,7 @@ class PathwayResolver {
|
|
|
98
149
|
if (this.pathway.inputChunkSize) {
|
|
99
150
|
chunkMaxChunkTokenLength = Math.min(this.pathway.inputChunkSize, this.chunkMaxTokenLength);
|
|
100
151
|
} else {
|
|
101
|
-
|
|
152
|
+
chunkMaxChunkTokenLength = this.chunkMaxTokenLength;
|
|
102
153
|
}
|
|
103
154
|
const encoded = encode(text);
|
|
104
155
|
if (!this.useInputChunking || encoded.length <= chunkMaxChunkTokenLength) { // no chunking, return as is
|
|
@@ -124,7 +175,7 @@ class PathwayResolver {
|
|
|
124
175
|
|
|
125
176
|
async summarizeIfEnabled({ text, ...parameters }) {
|
|
126
177
|
if (this.pathway.useInputSummarization) {
|
|
127
|
-
return await callPathway(this.config, 'summary',
|
|
178
|
+
return await callPathway(this.config, 'summary', requestState, { text, targetLength: 1000, ...parameters });
|
|
128
179
|
}
|
|
129
180
|
return text;
|
|
130
181
|
}
|
|
@@ -160,18 +211,25 @@ class PathwayResolver {
|
|
|
160
211
|
|
|
161
212
|
// Process the request and return the result
|
|
162
213
|
async processRequest({ text, ...parameters }) {
|
|
163
|
-
|
|
164
214
|
text = await this.summarizeIfEnabled({ text, ...parameters }); // summarize if flag enabled
|
|
165
215
|
const chunks = this.processInputText(text);
|
|
166
216
|
|
|
167
217
|
const anticipatedRequestCount = chunks.length * this.prompts.length;
|
|
168
218
|
|
|
169
|
-
if ((
|
|
219
|
+
if ((requestState[this.requestId] || {}).canceled) {
|
|
170
220
|
throw new Error('Request canceled');
|
|
171
221
|
}
|
|
172
222
|
|
|
173
223
|
// Store the request state
|
|
174
|
-
|
|
224
|
+
requestState[this.requestId] = { ...requestState[this.requestId], totalCount: anticipatedRequestCount, completedCount: 0 };
|
|
225
|
+
|
|
226
|
+
if (chunks.length > 1) {
|
|
227
|
+
// stream behaves as async if there are multiple chunks
|
|
228
|
+
if (parameters.stream) {
|
|
229
|
+
parameters.async = true;
|
|
230
|
+
parameters.stream = false;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
175
233
|
|
|
176
234
|
// If pre information is needed, apply current prompt with previous prompt info, only parallelize current call
|
|
177
235
|
if (this.pathway.useParallelChunkProcessing) {
|
|
@@ -189,17 +247,31 @@ class PathwayResolver {
|
|
|
189
247
|
let result = '';
|
|
190
248
|
|
|
191
249
|
for (let i = 0; i < this.prompts.length; i++) {
|
|
250
|
+
const currentParameters = { ...parameters, previousResult };
|
|
251
|
+
|
|
252
|
+
if (currentParameters.stream) { // stream special flow
|
|
253
|
+
if (i < this.prompts.length - 1) {
|
|
254
|
+
currentParameters.stream = false; // if not the last prompt then don't stream
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
// use the stream parameter if not async
|
|
258
|
+
currentParameters.stream = currentParameters.async ? false : currentParameters.stream;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
192
262
|
// If the prompt doesn't contain {{text}} then we can skip the chunking, and also give that token space to the previous result
|
|
193
263
|
if (!this.prompts[i].usesTextInput) {
|
|
194
264
|
// Limit context to it's N + text's characters
|
|
195
265
|
previousResult = this.truncate(previousResult, 2 * this.chunkMaxTokenLength);
|
|
196
|
-
result = await this.applyPrompt(this.prompts[i], null,
|
|
266
|
+
result = await this.applyPrompt(this.prompts[i], null, currentParameters);
|
|
197
267
|
} else {
|
|
198
268
|
// Limit context to N characters
|
|
199
269
|
previousResult = this.truncate(previousResult, this.chunkMaxTokenLength);
|
|
200
270
|
result = await Promise.all(chunks.map(chunk =>
|
|
201
|
-
this.applyPrompt(this.prompts[i], chunk,
|
|
202
|
-
|
|
271
|
+
this.applyPrompt(this.prompts[i], chunk, currentParameters)));
|
|
272
|
+
if (!currentParameters.stream) {
|
|
273
|
+
result = result.join("\n\n")
|
|
274
|
+
}
|
|
203
275
|
}
|
|
204
276
|
|
|
205
277
|
// If this is any prompt other than the last, use the result as the previous context
|
|
@@ -225,20 +297,22 @@ class PathwayResolver {
|
|
|
225
297
|
}
|
|
226
298
|
|
|
227
299
|
async applyPrompt(prompt, text, parameters) {
|
|
228
|
-
if (
|
|
300
|
+
if (requestState[this.requestId].canceled) {
|
|
229
301
|
return;
|
|
230
302
|
}
|
|
231
303
|
const result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt);
|
|
232
|
-
|
|
304
|
+
requestState[this.requestId].completedCount++;
|
|
233
305
|
|
|
234
|
-
const { completedCount, totalCount } =
|
|
306
|
+
const { completedCount, totalCount } = requestState[this.requestId];
|
|
235
307
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
308
|
+
if (completedCount < totalCount) {
|
|
309
|
+
pubsub.publish('REQUEST_PROGRESS', {
|
|
310
|
+
requestProgress: {
|
|
311
|
+
requestId: this.requestId,
|
|
312
|
+
progress: completedCount / totalCount,
|
|
313
|
+
}
|
|
314
|
+
});
|
|
315
|
+
}
|
|
242
316
|
|
|
243
317
|
if (prompt.saveResultTo) {
|
|
244
318
|
this.savedContext[prompt.saveResultTo] = result;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// ModelPlugin.js
|
|
2
2
|
const handlebars = require('handlebars');
|
|
3
3
|
const { request } = require("../../lib/request");
|
|
4
|
-
const {
|
|
4
|
+
const { encode } = require("gpt-3-encoder");
|
|
5
5
|
|
|
6
6
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
7
7
|
const DEFAULT_PROMPT_TOKEN_RATIO = 0.5;
|
|
@@ -35,6 +35,7 @@ class ModelPlugin {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
this.requestCount = 1;
|
|
38
|
+
this.shouldCache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
|
|
38
39
|
}
|
|
39
40
|
|
|
40
41
|
getModelMaxTokenLength() {
|
|
@@ -102,6 +103,8 @@ class ModelPlugin {
|
|
|
102
103
|
if (!choices || !choices.length) {
|
|
103
104
|
if (Array.isArray(data) && data.length > 0 && data[0].translations) {
|
|
104
105
|
return data[0].translations[0].text.trim();
|
|
106
|
+
} else {
|
|
107
|
+
return data;
|
|
105
108
|
}
|
|
106
109
|
}
|
|
107
110
|
|
|
@@ -114,20 +117,43 @@ class ModelPlugin {
|
|
|
114
117
|
const textResult = choices[0].text && choices[0].text.trim();
|
|
115
118
|
const messageResult = choices[0].message && choices[0].message.content && choices[0].message.content.trim();
|
|
116
119
|
|
|
117
|
-
return messageResult
|
|
120
|
+
return messageResult ?? textResult ?? null;
|
|
118
121
|
}
|
|
119
122
|
|
|
123
|
+
logMessagePreview(messages) {
|
|
124
|
+
messages.forEach((message, index) => {
|
|
125
|
+
const words = message.content.split(" ");
|
|
126
|
+
const tokenCount = encode(message.content).length;
|
|
127
|
+
let preview;
|
|
128
|
+
|
|
129
|
+
if (index === 0) {
|
|
130
|
+
preview = message.content;
|
|
131
|
+
} else {
|
|
132
|
+
preview = words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
console.log(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
120
139
|
async executeRequest(url, data, params, headers) {
|
|
121
|
-
const responseData = await request({ url, data, params, headers }, this.modelName);
|
|
140
|
+
const responseData = await request({ url, data, params, headers, cache: this.shouldCache }, this.modelName);
|
|
122
141
|
const modelInput = data.prompt || (data.messages && data.messages[0].content) || data[0].Text || null;
|
|
123
|
-
|
|
124
|
-
console.log(
|
|
142
|
+
|
|
143
|
+
console.log(`=== ${this.pathwayName}.${this.requestCount++} ===`);
|
|
144
|
+
|
|
145
|
+
if (data.messages && data.messages.length > 1) {
|
|
146
|
+
this.logMessagePreview(data.messages);
|
|
147
|
+
} else {
|
|
148
|
+
console.log(`\x1b[36m${modelInput}\x1b[0m`);
|
|
149
|
+
}
|
|
150
|
+
|
|
125
151
|
console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
|
|
126
|
-
|
|
152
|
+
|
|
127
153
|
if (responseData.error) {
|
|
128
154
|
throw new Exception(`An error was returned from the server: ${JSON.stringify(responseData.error)}`);
|
|
129
155
|
}
|
|
130
|
-
|
|
156
|
+
|
|
131
157
|
return this.parseResponse(responseData);
|
|
132
158
|
}
|
|
133
159
|
|
|
@@ -14,9 +14,12 @@ class OpenAIChatPlugin extends ModelPlugin {
|
|
|
14
14
|
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
15
15
|
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
16
16
|
|
|
17
|
+
const { stream } = parameters;
|
|
18
|
+
|
|
17
19
|
return {
|
|
18
20
|
messages: modelPromptMessages || [{ "role": "user", "content": modelPromptText }],
|
|
19
21
|
temperature: this.temperature ?? 0.7,
|
|
22
|
+
stream
|
|
20
23
|
};
|
|
21
24
|
}
|
|
22
25
|
|
|
@@ -30,6 +30,8 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
30
30
|
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
31
31
|
const modelPromptMessagesML = messagesToChatML(modelPromptMessages);
|
|
32
32
|
|
|
33
|
+
const { stream } = parameters;
|
|
34
|
+
|
|
33
35
|
if (modelPromptMessagesML) {
|
|
34
36
|
return {
|
|
35
37
|
prompt: modelPromptMessagesML,
|
|
@@ -38,13 +40,15 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
38
40
|
top_p: 0.95,
|
|
39
41
|
frequency_penalty: 0,
|
|
40
42
|
presence_penalty: 0,
|
|
41
|
-
stop: ["<|im_end|>"]
|
|
43
|
+
stop: ["<|im_end|>"],
|
|
44
|
+
stream
|
|
42
45
|
};
|
|
43
46
|
} else {
|
|
44
47
|
return {
|
|
45
48
|
prompt: modelPromptText,
|
|
46
49
|
max_tokens: this.getModelMaxTokenLength() - encode(modelPromptText).length - 1,
|
|
47
50
|
temperature: this.temperature ?? 0.7,
|
|
51
|
+
stream
|
|
48
52
|
};
|
|
49
53
|
}
|
|
50
54
|
}
|
package/graphql/resolver.js
CHANGED
|
@@ -5,10 +5,10 @@ const { PathwayResolver } = require("./pathwayResolver");
|
|
|
5
5
|
// (parent, args, contextValue, info)
|
|
6
6
|
const rootResolver = async (parent, args, contextValue, info) => {
|
|
7
7
|
const { config, pathway, requestState } = contextValue;
|
|
8
|
-
const { temperature } = pathway;
|
|
8
|
+
const { temperature, enableGraphqlCache } = pathway;
|
|
9
9
|
|
|
10
|
-
// Turn
|
|
11
|
-
if (temperature == 0) {
|
|
10
|
+
// Turn on graphql caching if enableGraphqlCache true and temperature is 0
|
|
11
|
+
if (enableGraphqlCache && temperature == 0) { // ||
|
|
12
12
|
info.cacheControl.setCacheHint({ maxAge: 60 * 60 * 24, scope: 'PUBLIC' });
|
|
13
13
|
}
|
|
14
14
|
|
package/graphql/subscriptions.js
CHANGED
|
@@ -4,14 +4,27 @@
|
|
|
4
4
|
|
|
5
5
|
const pubsub = require("./pubsub");
|
|
6
6
|
const { withFilter } = require("graphql-subscriptions");
|
|
7
|
+
const { requestState } = require("./requestState");
|
|
7
8
|
|
|
8
9
|
const subscriptions = {
|
|
9
10
|
requestProgress: {
|
|
10
11
|
subscribe: withFilter(
|
|
11
|
-
() =>
|
|
12
|
+
(_, args, __, info) => {
|
|
13
|
+
const { requestIds } = args;
|
|
14
|
+
for (const requestId of requestIds) {
|
|
15
|
+
if (!requestState[requestId]) {
|
|
16
|
+
console.log(`requestProgress, requestId: ${requestId} not found`);
|
|
17
|
+
} else {
|
|
18
|
+
console.log(`starting async requestProgress, requestId: ${requestId}`);
|
|
19
|
+
const { resolver, args } = requestState[requestId];
|
|
20
|
+
resolver(args);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return pubsub.asyncIterator(['REQUEST_PROGRESS'])
|
|
24
|
+
},
|
|
12
25
|
(payload, variables) => {
|
|
13
26
|
return (
|
|
14
|
-
payload.requestProgress.requestId
|
|
27
|
+
variables.requestIds.includes(payload.requestProgress.requestId)
|
|
15
28
|
);
|
|
16
29
|
},
|
|
17
30
|
),
|
package/lib/request.js
CHANGED
|
@@ -1,20 +1,67 @@
|
|
|
1
|
-
const axios = require('axios');
|
|
2
1
|
const Bottleneck = require("bottleneck/es5");
|
|
2
|
+
const RequestMonitor = require('./requestMonitor');
|
|
3
|
+
const { config } = require('../config');
|
|
4
|
+
let axios = require('axios');
|
|
5
|
+
|
|
6
|
+
if (config.get('enableCache')) {
|
|
7
|
+
// Setup cache
|
|
8
|
+
const { setupCache } = require('axios-cache-interceptor');
|
|
9
|
+
axios = setupCache(axios, {
|
|
10
|
+
// enable cache for all requests by default
|
|
11
|
+
methods: ['get', 'post', 'put', 'delete', 'patch'],
|
|
12
|
+
interpretHeader: false,
|
|
13
|
+
ttl: 1000 * 60 * 60 * 24 * 7, // 7 days
|
|
14
|
+
});
|
|
15
|
+
}
|
|
3
16
|
|
|
4
17
|
const limiters = {};
|
|
18
|
+
const monitors = {};
|
|
5
19
|
|
|
6
20
|
const buildLimiters = (config) => {
|
|
7
21
|
console.log('Building limiters...');
|
|
8
22
|
for (const [name, model] of Object.entries(config.get('models'))) {
|
|
23
|
+
const rps = model.requestsPerSecond ?? 100;
|
|
9
24
|
limiters[name] = new Bottleneck({
|
|
10
|
-
minTime: 1000 /
|
|
11
|
-
|
|
12
|
-
|
|
25
|
+
minTime: 1000 / rps,
|
|
26
|
+
maxConcurrent: rps,
|
|
27
|
+
reservoir: rps, // Number of tokens available initially
|
|
28
|
+
reservoirRefreshAmount: rps, // Number of tokens added per interval
|
|
29
|
+
reservoirRefreshInterval: 1000, // Interval in milliseconds
|
|
30
|
+
});
|
|
31
|
+
monitors[name] = new RequestMonitor();
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
setInterval(() => {
|
|
36
|
+
const monitorKeys = Object.keys(monitors);
|
|
37
|
+
|
|
38
|
+
// Skip logging if the monitors object does not exist or is empty
|
|
39
|
+
if (!monitorKeys || monitorKeys.length === 0) {
|
|
40
|
+
return;
|
|
13
41
|
}
|
|
42
|
+
|
|
43
|
+
monitorKeys.forEach((monitorName) => {
|
|
44
|
+
const monitor = monitors[monitorName];
|
|
45
|
+
const callRate = monitor.getPeakCallRate();
|
|
46
|
+
const error429Rate = monitor.getError429Rate();
|
|
47
|
+
if (callRate > 0) {
|
|
48
|
+
console.log('------------------------');
|
|
49
|
+
console.log(`${monitorName} Call rate: ${callRate} calls/sec, 429 errors: ${error429Rate * 100}%`);
|
|
50
|
+
console.log('------------------------');
|
|
51
|
+
// Reset the rate monitor to start a new monitoring interval.
|
|
52
|
+
monitor.reset();
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
}, 10000); // Log rates every 10 seconds (10000 ms).
|
|
56
|
+
|
|
57
|
+
const postWithMonitor = async (model, url, data, axiosConfigObj) => {
|
|
58
|
+
const monitor = monitors[model];
|
|
59
|
+
monitor.incrementCallCount();
|
|
60
|
+
return axios.post(url, data, axiosConfigObj);
|
|
14
61
|
}
|
|
15
62
|
|
|
16
63
|
const MAX_RETRY = 10;
|
|
17
|
-
const postRequest = async ({ url, data, params, headers }, model) => {
|
|
64
|
+
const postRequest = async ({ url, data, params, headers, cache }, model) => {
|
|
18
65
|
let retry = 0;
|
|
19
66
|
const errors = []
|
|
20
67
|
for (let i = 0; i < MAX_RETRY; i++) {
|
|
@@ -22,13 +69,20 @@ const postRequest = async ({ url, data, params, headers }, model) => {
|
|
|
22
69
|
if (i > 0) {
|
|
23
70
|
console.log(`Retrying request #retry ${i}: ${JSON.stringify(data)}...`);
|
|
24
71
|
await new Promise(r => setTimeout(r, 200 * Math.pow(2, i))); // exponential backoff
|
|
25
|
-
}
|
|
72
|
+
}
|
|
26
73
|
if (!limiters[model]) {
|
|
27
74
|
throw new Error(`No limiter for model ${model}!`);
|
|
28
75
|
}
|
|
29
|
-
|
|
76
|
+
const axiosConfigObj = { params, headers, cache };
|
|
77
|
+
if (params.stream || data.stream) {
|
|
78
|
+
axiosConfigObj.responseType = 'stream';
|
|
79
|
+
}
|
|
80
|
+
return await limiters[model].schedule(() => postWithMonitor(model, url, data, axiosConfigObj));
|
|
30
81
|
} catch (e) {
|
|
31
82
|
console.error(`Failed request with data ${JSON.stringify(data)}: ${e}`);
|
|
83
|
+
if (e.response.status === 429) {
|
|
84
|
+
monitors[model].incrementError429Count();
|
|
85
|
+
}
|
|
32
86
|
errors.push(e);
|
|
33
87
|
}
|
|
34
88
|
}
|
|
@@ -37,7 +91,10 @@ const postRequest = async ({ url, data, params, headers }, model) => {
|
|
|
37
91
|
|
|
38
92
|
const request = async (params, model) => {
|
|
39
93
|
const response = await postRequest(params, model);
|
|
40
|
-
const { error, data } = response;
|
|
94
|
+
const { error, data, cached } = response;
|
|
95
|
+
if (cached) {
|
|
96
|
+
console.info('/Request served with cached response.');
|
|
97
|
+
}
|
|
41
98
|
if (error && error.length > 0) {
|
|
42
99
|
const lastError = error[error.length - 1];
|
|
43
100
|
return { error: lastError.toJSON() ?? lastError ?? error };
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
class RequestMonitor {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.callCount = 0;
|
|
4
|
+
this.peakCallRate = 0;
|
|
5
|
+
this.error429Count = 0;
|
|
6
|
+
this.startTime = new Date();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
incrementCallCount() {
|
|
10
|
+
this.callCount++;
|
|
11
|
+
if (this.getCallRate() > this.peakCallRate) {
|
|
12
|
+
this.peakCallRate = this.getCallRate();
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
incrementError429Count() {
|
|
17
|
+
this.error429Count++;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
getCallRate() {
|
|
21
|
+
const currentTime = new Date();
|
|
22
|
+
const timeElapsed = (currentTime - this.startTime) / 1000; // time elapsed in seconds
|
|
23
|
+
return timeElapsed < 1 ? this.callCount : this.callCount / timeElapsed;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
getPeakCallRate() {
|
|
27
|
+
return this.peakCallRate;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
getError429Rate() {
|
|
31
|
+
return this.error429Count / this.callCount;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
reset() {
|
|
35
|
+
this.callCount = 0;
|
|
36
|
+
this.error429Count = 0;
|
|
37
|
+
this.peakCallRate = 0;
|
|
38
|
+
this.startTime = new Date();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
module.exports = RequestMonitor;
|
|
43
|
+
|
package/package.json
CHANGED
|
@@ -1,14 +1,22 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.0.6",
|
|
4
|
+
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
7
7
|
"url": "git+https://github.com/aj-archipelago/cortex.git"
|
|
8
8
|
},
|
|
9
9
|
"keywords": [
|
|
10
10
|
"cortex",
|
|
11
|
-
"
|
|
11
|
+
"AI",
|
|
12
|
+
"prompt engineering",
|
|
13
|
+
"LLM",
|
|
14
|
+
"OpenAI",
|
|
15
|
+
"Azure",
|
|
16
|
+
"GPT-3",
|
|
17
|
+
"GPT-4",
|
|
18
|
+
"chatGPT",
|
|
19
|
+
"GraphQL"
|
|
12
20
|
],
|
|
13
21
|
"main": "index.js",
|
|
14
22
|
"scripts": {
|
|
@@ -22,11 +30,12 @@
|
|
|
22
30
|
"@apollo/utils.keyvadapter": "^1.1.2",
|
|
23
31
|
"@graphql-tools/schema": "^9.0.12",
|
|
24
32
|
"@keyv/redis": "^2.5.4",
|
|
25
|
-
"apollo-server": "^3.
|
|
33
|
+
"apollo-server": "^3.12.0",
|
|
26
34
|
"apollo-server-core": "^3.11.1",
|
|
27
35
|
"apollo-server-express": "^3.11.1",
|
|
28
36
|
"apollo-server-plugin-response-cache": "^3.8.1",
|
|
29
|
-
"axios": "^1.
|
|
37
|
+
"axios": "^1.3.4",
|
|
38
|
+
"axios-cache-interceptor": "^1.0.1",
|
|
30
39
|
"bottleneck": "^2.19.5",
|
|
31
40
|
"compromise": "^14.8.1",
|
|
32
41
|
"compromise-paragraphs": "^0.1.0",
|
package/pathways/basePathway.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
const { parseResponse } = require("../graphql/parser");
|
|
2
1
|
const { rootResolver, resolver } = require("../graphql/resolver");
|
|
3
2
|
const { typeDef } = require('../graphql/typeDef')
|
|
4
3
|
|
|
@@ -7,9 +6,9 @@ module.exports = {
|
|
|
7
6
|
prompt: `{{text}}`,
|
|
8
7
|
defaultInputParameters: {
|
|
9
8
|
text: ``,
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
async: false, // switch to enable async mode
|
|
10
|
+
contextId: ``, // used to identify the context of the request,
|
|
11
|
+
stream: false, // switch to enable stream mode
|
|
13
12
|
},
|
|
14
13
|
inputParameters: {},
|
|
15
14
|
typeDef,
|
package/pathways/bias.js
CHANGED
package/pathways/translate.js
CHANGED
package/tests/chunking.test.js
CHANGED
|
@@ -4,6 +4,11 @@ jest.setTimeout(1800000);
|
|
|
4
4
|
|
|
5
5
|
const testServer = getTestServer();
|
|
6
6
|
|
|
7
|
+
//stop server after all tests
|
|
8
|
+
afterAll(async () => {
|
|
9
|
+
await testServer.stop();
|
|
10
|
+
});
|
|
11
|
+
|
|
7
12
|
it('chunking test of translate endpoint with huge text', async () => {
|
|
8
13
|
const response = await testServer.executeOperation({
|
|
9
14
|
query: 'query translate($text: String!) { translate(text: $text) { result } }',
|
package/tests/main.test.js
CHANGED
|
@@ -14,6 +14,11 @@ const getTestServer = () => {
|
|
|
14
14
|
|
|
15
15
|
const testServer = getTestServer();
|
|
16
16
|
|
|
17
|
+
//stop server after all tests
|
|
18
|
+
afterAll(async () => {
|
|
19
|
+
await testServer.stop();
|
|
20
|
+
});
|
|
21
|
+
|
|
17
22
|
it('validates bias endpoint', async () => {
|
|
18
23
|
const response = await testServer.executeOperation({
|
|
19
24
|
query: 'query bias($text: String!) { bias(text: $text) { result } }',
|
package/tests/translate.test.js
CHANGED
|
@@ -4,6 +4,11 @@ jest.setTimeout(1800000);
|
|
|
4
4
|
|
|
5
5
|
const testServer = getTestServer();
|
|
6
6
|
|
|
7
|
+
//stop server after all tests
|
|
8
|
+
afterAll(async () => {
|
|
9
|
+
await testServer.stop();
|
|
10
|
+
});
|
|
11
|
+
|
|
7
12
|
it('test translate endpoint with huge arabic text english translation and check return non-arabic/english', async () => {
|
|
8
13
|
const response = await testServer.executeOperation({
|
|
9
14
|
query: 'query translate($text: String!, $to:String) { translate(text: $text, to:$to) { result } }',
|