@aj-archipelago/cortex 1.1.5 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +11 -1
- package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
- package/lib/cortexRequest.js +10 -0
- package/lib/encodeCache.js +38 -0
- package/lib/fastLruCache.js +82 -0
- package/lib/pathwayTools.js +1 -1
- package/lib/requestExecutor.js +78 -71
- package/lib/requestMonitor.js +19 -9
- package/package.json +3 -2
- package/pathways/basePathway.js +5 -3
- package/pathways/bing.js +12 -0
- package/pathways/index.js +2 -0
- package/server/chunker.js +1 -1
- package/server/graphql.js +1 -1
- package/server/modelExecutor.js +4 -0
- package/server/pathwayResolver.js +3 -3
- package/server/plugins/azureBingPlugin.js +44 -0
- package/server/plugins/azureCognitivePlugin.js +11 -6
- package/server/plugins/azureTranslatePlugin.js +0 -2
- package/server/plugins/geminiChatPlugin.js +4 -7
- package/server/plugins/localModelPlugin.js +1 -1
- package/server/plugins/modelPlugin.js +22 -18
- package/server/plugins/openAiChatPlugin.js +11 -12
- package/server/plugins/openAiCompletionPlugin.js +6 -7
- package/server/plugins/openAiWhisperPlugin.js +3 -0
- package/server/plugins/palmChatPlugin.js +8 -11
- package/server/plugins/palmCompletionPlugin.js +4 -7
- package/tests/chunkfunction.test.js +1 -2
- package/tests/encodeCache.test.js +92 -0
- package/tests/fastLruCache.test.js +29 -0
- package/tests/requestMonitor.test.js +3 -3
- package/tests/truncateMessages.test.js +1 -1
package/config.js
CHANGED
|
@@ -118,7 +118,7 @@ var config = convict({
|
|
|
118
118
|
"api-key": "{{AZURE_COGNITIVE_API_KEY}}",
|
|
119
119
|
"Content-Type": "application/json"
|
|
120
120
|
},
|
|
121
|
-
"requestsPerSecond":
|
|
121
|
+
"requestsPerSecond": 10
|
|
122
122
|
},
|
|
123
123
|
"oai-embeddings": {
|
|
124
124
|
"type": "OPENAI-EMBEDDINGS",
|
|
@@ -146,6 +146,16 @@ var config = convict({
|
|
|
146
146
|
"maxTokenLength": 128000,
|
|
147
147
|
"supportsStreaming": true
|
|
148
148
|
},
|
|
149
|
+
"azure-bing": {
|
|
150
|
+
"type": "AZURE-BING",
|
|
151
|
+
"url": "https://api.bing.microsoft.com/v7.0/search",
|
|
152
|
+
"headers": {
|
|
153
|
+
"Ocp-Apim-Subscription-Key": "{{AZURE_BING_KEY}}",
|
|
154
|
+
"Content-Type": "application/json"
|
|
155
|
+
},
|
|
156
|
+
"requestsPerSecond": 10,
|
|
157
|
+
"maxTokenLength": 200000
|
|
158
|
+
},
|
|
149
159
|
},
|
|
150
160
|
env: 'CORTEX_MODELS'
|
|
151
161
|
},
|
|
@@ -38,9 +38,14 @@ def transcribe(params):
|
|
|
38
38
|
if 'word_timestamps' in params: #parse as bool
|
|
39
39
|
word_timestamps = False if params['word_timestamps'] == 'False' else True
|
|
40
40
|
|
|
41
|
+
decode_options = {}
|
|
42
|
+
if 'language' in params:
|
|
43
|
+
decode_options["language"] = params["language"]
|
|
44
|
+
print(f"Transcription language set as {decode_options['language']}")
|
|
45
|
+
|
|
41
46
|
print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
|
|
42
47
|
start_time = time.time()
|
|
43
|
-
result = model.transcribe(fileurl, word_timestamps=word_timestamps)
|
|
48
|
+
result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
|
|
44
49
|
end_time = time.time()
|
|
45
50
|
execution_time = end_time - start_time
|
|
46
51
|
print("Transcribe execution time:", execution_time, "seconds")
|
package/lib/cortexRequest.js
CHANGED
|
@@ -11,6 +11,7 @@ class CortexRequest {
|
|
|
11
11
|
this._pathwayResolver = pathwayResolver || {};
|
|
12
12
|
this._selectedEndpoint = selectedEndpoint || {};
|
|
13
13
|
this._stream = stream || false;
|
|
14
|
+
this._method = 'POST';
|
|
14
15
|
|
|
15
16
|
if (this._pathwayResolver) {
|
|
16
17
|
this._model = this._pathwayResolver.model;
|
|
@@ -41,6 +42,15 @@ class CortexRequest {
|
|
|
41
42
|
this._url = value;
|
|
42
43
|
}
|
|
43
44
|
|
|
45
|
+
// method getter and setter
|
|
46
|
+
get method() {
|
|
47
|
+
return this._method;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
set method(value) {
|
|
51
|
+
this._method = value;
|
|
52
|
+
}
|
|
53
|
+
|
|
44
54
|
// data getter and setter
|
|
45
55
|
get data() {
|
|
46
56
|
return this._data;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
|
|
2
|
+
import { FastLRUCache } from './fastLruCache.js';
|
|
3
|
+
|
|
4
|
+
class EncodeCache {
|
|
5
|
+
constructor() {
|
|
6
|
+
this.encodeCache = new FastLRUCache(1000);
|
|
7
|
+
this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
encode(value) {
|
|
11
|
+
if (this.encodeCache.get(value) !== -1) {
|
|
12
|
+
return this.encodeCache.get(value);
|
|
13
|
+
}
|
|
14
|
+
const encoded = gpt3Encode(value);
|
|
15
|
+
this.encodeCache.put(value, encoded);
|
|
16
|
+
return encoded;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
decode(value) {
|
|
20
|
+
if (this.decodeCache.get(value) !== -1) {
|
|
21
|
+
return this.decodeCache.get(value);
|
|
22
|
+
}
|
|
23
|
+
const decoded = gpt3Decode(value);
|
|
24
|
+
this.decodeCache.put(value, decoded);
|
|
25
|
+
if (this.encodeCache.get(decoded) === -1) {
|
|
26
|
+
this.encodeCache.put(decoded, value);
|
|
27
|
+
}
|
|
28
|
+
return decoded;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Create one instance of the cache
|
|
33
|
+
const cache = new EncodeCache();
|
|
34
|
+
|
|
35
|
+
// Make sure the instance is bound to the methods, so
|
|
36
|
+
// references to 'this' are correct
|
|
37
|
+
export const encode = cache.encode.bind(cache);
|
|
38
|
+
export const decode = cache.decode.bind(cache);
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
|
|
2
|
+
|
|
3
|
+
class Node {
|
|
4
|
+
constructor(key, value) {
|
|
5
|
+
this.key = key;
|
|
6
|
+
this.value = value;
|
|
7
|
+
this.next = null;
|
|
8
|
+
this.prev = null;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
class FastLRUCache {
|
|
13
|
+
constructor(capacity) {
|
|
14
|
+
this.capacity = capacity;
|
|
15
|
+
this.cache = new Map();
|
|
16
|
+
this.head = null;
|
|
17
|
+
this.tail = null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get(key) {
|
|
21
|
+
if (!this.cache.has(key)) {
|
|
22
|
+
return -1;
|
|
23
|
+
}
|
|
24
|
+
const node = this.cache.get(key);
|
|
25
|
+
this.moveToEnd(node);
|
|
26
|
+
return node.value;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
put(key, value) {
|
|
30
|
+
if (this.cache.has(key)) {
|
|
31
|
+
const node = this.cache.get(key);
|
|
32
|
+
node.value = value;
|
|
33
|
+
this.moveToEnd(node);
|
|
34
|
+
} else {
|
|
35
|
+
const node = new Node(key, value);
|
|
36
|
+
if (this.cache.size >= this.capacity) {
|
|
37
|
+
this.cache.delete(this.head.key);
|
|
38
|
+
this.shiftHeadToNext();
|
|
39
|
+
}
|
|
40
|
+
this.cache.set(key, node);
|
|
41
|
+
this.addNodeToTail(node);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
addNodeToTail(node) {
|
|
46
|
+
if (!this.tail) {
|
|
47
|
+
this.head = node;
|
|
48
|
+
this.tail = node;
|
|
49
|
+
} else {
|
|
50
|
+
node.prev = this.tail;
|
|
51
|
+
this.tail.next = node;
|
|
52
|
+
this.tail = node;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
moveToEnd(node) {
|
|
57
|
+
if (node === this.tail) {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
if (node === this.head) {
|
|
61
|
+
this.shiftHeadToNext();
|
|
62
|
+
} else {
|
|
63
|
+
node.prev.next = node.next;
|
|
64
|
+
node.next.prev = node.prev;
|
|
65
|
+
}
|
|
66
|
+
node.prev = this.tail;
|
|
67
|
+
node.next = null;
|
|
68
|
+
this.tail.next = node;
|
|
69
|
+
this.tail = node;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
shiftHeadToNext() {
|
|
73
|
+
this.head = this.head.next;
|
|
74
|
+
if (this.head) {
|
|
75
|
+
this.head.prev = null;
|
|
76
|
+
} else {
|
|
77
|
+
this.tail = null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export { FastLRUCache };
|
package/lib/pathwayTools.js
CHANGED
package/lib/requestExecutor.js
CHANGED
|
@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
|
|
|
57
57
|
|
|
58
58
|
endpoint.limiter.on('failed', (error, info) => {
|
|
59
59
|
if (error.name === 'CanceledError') {
|
|
60
|
-
logger.debug(`
|
|
60
|
+
logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
|
|
61
|
+
endpoint.monitor.incrementErrorCount();
|
|
61
62
|
} else {
|
|
62
|
-
logger.error(`
|
|
63
|
+
logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
|
|
63
64
|
}
|
|
64
65
|
});
|
|
65
66
|
|
|
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
|
|
|
154
155
|
});
|
|
155
156
|
}
|
|
156
157
|
|
|
158
|
+
//log statistics about active endpoints
|
|
157
159
|
setInterval(() => {
|
|
158
160
|
// Iterate over each model
|
|
159
161
|
for (const [name, model] of Object.entries(modelEndpoints)) {
|
|
@@ -179,100 +181,106 @@ setInterval(() => {
|
|
|
179
181
|
endpointIndex++;
|
|
180
182
|
});
|
|
181
183
|
}
|
|
182
|
-
},
|
|
184
|
+
}, 30000); // Log rates every 30 seconds
|
|
183
185
|
|
|
184
|
-
const
|
|
185
|
-
|
|
186
|
+
const requestWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
|
|
187
|
+
const callId = endpoint?.monitor?.startCall();
|
|
188
|
+
let response;
|
|
189
|
+
try {
|
|
190
|
+
if (axiosConfigObj?.method == 'GET'){
|
|
191
|
+
response = await cortexAxios.get(url, axiosConfigObj);
|
|
192
|
+
} else {
|
|
193
|
+
response = await cortexAxios.post(url, data, axiosConfigObj);
|
|
194
|
+
}
|
|
195
|
+
} catch (error) {
|
|
196
|
+
// throw new error with duration as part of the error data
|
|
197
|
+
throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
|
|
198
|
+
}
|
|
199
|
+
let duration;
|
|
200
|
+
if (response.status >= 200 && response.status < 300) {
|
|
201
|
+
duration = endpoint?.monitor?.endCall(callId);
|
|
202
|
+
} else {
|
|
203
|
+
duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return { response, duration };
|
|
186
207
|
}
|
|
187
208
|
|
|
188
209
|
const MAX_RETRY = 10; // retries for error handling
|
|
189
210
|
const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
|
|
190
211
|
const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
|
|
191
212
|
|
|
192
|
-
const
|
|
213
|
+
const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
|
|
214
|
+
const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
|
|
215
|
+
const jitter = duplicateRequestTime * 0.2 * Math.random();
|
|
216
|
+
const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
|
|
217
|
+
return duplicateRequestTimeout;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const makeRequest = async (cortexRequest) => {
|
|
193
221
|
let promises = [];
|
|
222
|
+
// retry certain errors up to MAX_RETRY times
|
|
194
223
|
for (let i = 0; i < MAX_RETRY; i++) {
|
|
195
|
-
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
|
|
224
|
+
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream, method} = cortexRequest;
|
|
196
225
|
const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
if (enableDuplicateRequests) {
|
|
201
|
-
//logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
|
|
202
|
-
}
|
|
226
|
+
const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
|
|
227
|
+
const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
|
|
203
228
|
|
|
204
|
-
const axiosConfigObj = { params, headers, cache };
|
|
229
|
+
const axiosConfigObj = { params, headers, cache, method };
|
|
205
230
|
const streamRequested = (stream || params?.stream || data?.stream);
|
|
231
|
+
// if we're using streaming, duplicate requests are
|
|
232
|
+
// not supported, so we just push one promise into the array
|
|
206
233
|
if (streamRequested && model.supportsStreaming) {
|
|
207
234
|
axiosConfigObj.responseType = 'stream';
|
|
208
|
-
promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() =>
|
|
235
|
+
promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => requestWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
|
|
209
236
|
} else {
|
|
210
237
|
if (streamRequested) {
|
|
211
238
|
logger.info(`>>> [${requestId}] ${model} does not support streaming - sending non-streaming request`);
|
|
212
239
|
axiosConfigObj.params.stream = false;
|
|
213
240
|
data.stream = false;
|
|
214
241
|
}
|
|
242
|
+
// if we're not streaming, we push at least one promise
|
|
243
|
+
// into the array, but if we're supporting duplicate
|
|
244
|
+
// requests we push one for each potential duplicate,
|
|
245
|
+
// heading to a new endpoint (if available) and
|
|
246
|
+
// staggered by a jittered amount of time
|
|
215
247
|
const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
|
|
216
248
|
promises = controllers.map((controller, index) =>
|
|
217
249
|
new Promise((resolve, reject) => {
|
|
218
|
-
const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
|
|
219
|
-
const jitter = duplicateRequestTime * 0.2 * Math.random();
|
|
220
|
-
const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
|
|
221
250
|
setTimeout(async () => {
|
|
222
251
|
try {
|
|
252
|
+
if (index > 0) {
|
|
253
|
+
cortexRequest.selectNewEndpoint();
|
|
254
|
+
}
|
|
255
|
+
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
|
|
223
256
|
const endpointName = selectedEndpoint.name || model;
|
|
224
257
|
if (!selectedEndpoint.limiter) {
|
|
225
258
|
throw new Error(`No limiter for endpoint ${endpointName}!`);
|
|
226
259
|
}
|
|
227
|
-
const axiosConfigObj = { params, headers, cache };
|
|
260
|
+
const axiosConfigObj = { params, headers, cache, method };
|
|
228
261
|
|
|
229
262
|
let response = null;
|
|
263
|
+
let duration = null;
|
|
230
264
|
|
|
231
265
|
if (!controller.signal?.aborted) {
|
|
232
266
|
|
|
233
267
|
axiosConfigObj.signal = controller.signal;
|
|
234
268
|
axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
|
|
235
269
|
|
|
236
|
-
if (index
|
|
237
|
-
|
|
238
|
-
} else {
|
|
239
|
-
if (model.supportsStreaming) {
|
|
240
|
-
axiosConfigObj.responseType = 'stream';
|
|
241
|
-
axiosConfigObj.cache = false;
|
|
242
|
-
}
|
|
243
|
-
const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
|
|
270
|
+
if (index > 0) {
|
|
271
|
+
const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
|
|
244
272
|
const header = '>'.repeat(logMessage.length);
|
|
245
273
|
logger.info(`\n${header}\n${logMessage}`);
|
|
246
274
|
}
|
|
247
275
|
|
|
248
|
-
response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () =>
|
|
276
|
+
({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => requestWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
|
|
249
277
|
|
|
250
278
|
if (!controller.signal?.aborted) {
|
|
251
|
-
|
|
252
279
|
logger.debug(`<<< [${requestId}] received response for request ${index}`);
|
|
253
|
-
|
|
254
|
-
if (axiosConfigObj.responseType === 'stream') {
|
|
255
|
-
// Buffering and collecting the stream data
|
|
256
|
-
logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
|
|
257
|
-
response = await new Promise((resolve, reject) => {
|
|
258
|
-
let responseData = '';
|
|
259
|
-
response.data.on('data', (chunk) => {
|
|
260
|
-
responseData += chunk;
|
|
261
|
-
logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
|
|
262
|
-
});
|
|
263
|
-
response.data.on('end', () => {
|
|
264
|
-
response.data = JSON.parse(responseData);
|
|
265
|
-
resolve(response);
|
|
266
|
-
});
|
|
267
|
-
response.data.on('error', (error) => {
|
|
268
|
-
reject(error);
|
|
269
|
-
});
|
|
270
|
-
});
|
|
271
|
-
}
|
|
272
280
|
}
|
|
273
281
|
}
|
|
274
282
|
|
|
275
|
-
resolve(response);
|
|
283
|
+
resolve({ response, duration });
|
|
276
284
|
|
|
277
285
|
} catch (error) {
|
|
278
286
|
if (error.name === 'AbortError' || error.name === 'CanceledError') {
|
|
@@ -285,45 +293,48 @@ const postRequest = async (cortexRequest) => {
|
|
|
285
293
|
} finally {
|
|
286
294
|
controllers.forEach(controller => controller.abort());
|
|
287
295
|
}
|
|
288
|
-
},
|
|
296
|
+
}, getDuplicateRequestDelay(index, duplicateRequestAfter));
|
|
289
297
|
})
|
|
290
298
|
);
|
|
291
299
|
}
|
|
292
300
|
|
|
301
|
+
// no requests have been made yet, but the promises array
|
|
302
|
+
// is full, so now we execute them in parallel
|
|
293
303
|
try {
|
|
294
|
-
const response = await Promise.race(promises);
|
|
304
|
+
const { response, duration } = await Promise.race(promises);
|
|
295
305
|
|
|
296
306
|
// if response status is 2xx
|
|
297
307
|
if (response.status >= 200 && response.status < 300) {
|
|
298
|
-
return response;
|
|
308
|
+
return { response, duration };
|
|
299
309
|
} else {
|
|
300
310
|
throw new Error(`Received error response: ${response.status}`);
|
|
301
311
|
}
|
|
302
312
|
} catch (error) {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
const status =
|
|
306
|
-
|
|
307
|
-
if (status === 429) {
|
|
308
|
-
selectedEndpoint.monitor.incrementError429Count();
|
|
309
|
-
}
|
|
310
|
-
|
|
313
|
+
const { response, duration } = error;
|
|
314
|
+
if (response) {
|
|
315
|
+
const status = response.status;
|
|
316
|
+
// if there is only one endpoint, only retry select error codes
|
|
311
317
|
if (cortexRequest.model.endpoints.length === 1) {
|
|
312
|
-
if (status !== 429
|
|
313
|
-
|
|
318
|
+
if (status !== 429 &&
|
|
319
|
+
status !== 408 &&
|
|
320
|
+
status !== 502 &&
|
|
321
|
+
status !== 503 &&
|
|
322
|
+
status !== 504) {
|
|
323
|
+
return { response, duration };
|
|
314
324
|
}
|
|
315
325
|
} else {
|
|
316
|
-
// if there are multiple endpoints, retry everything
|
|
326
|
+
// if there are multiple endpoints, retry everything as it
|
|
327
|
+
// could be going to a different host
|
|
317
328
|
cortexRequest.selectNewEndpoint();
|
|
318
329
|
}
|
|
319
330
|
|
|
320
|
-
logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
|
|
331
|
+
logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
|
|
321
332
|
if (i < MAX_RETRY - 1) {
|
|
322
333
|
const backoffTime = 200 * Math.pow(2, i);
|
|
323
334
|
const jitter = backoffTime * 0.2 * Math.random();
|
|
324
335
|
await new Promise(r => setTimeout(r, backoffTime + jitter));
|
|
325
336
|
} else {
|
|
326
|
-
return
|
|
337
|
+
return { response, duration };
|
|
327
338
|
}
|
|
328
339
|
} else {
|
|
329
340
|
throw error;
|
|
@@ -334,10 +345,7 @@ const postRequest = async (cortexRequest) => {
|
|
|
334
345
|
|
|
335
346
|
const executeRequest = async (cortexRequest) => {
|
|
336
347
|
try {
|
|
337
|
-
const
|
|
338
|
-
const callId = endpoint?.monitor?.startCall();
|
|
339
|
-
const response = await postRequest(cortexRequest);
|
|
340
|
-
endpoint?.monitor?.endCall(callId);
|
|
348
|
+
const { response, duration } = await makeRequest(cortexRequest);
|
|
341
349
|
const requestId = cortexRequest.requestId;
|
|
342
350
|
const { error, data, cached } = response;
|
|
343
351
|
if (cached) {
|
|
@@ -347,8 +355,7 @@ const executeRequest = async (cortexRequest) => {
|
|
|
347
355
|
const lastError = error[error.length - 1];
|
|
348
356
|
return { error: lastError.toJSON() ?? lastError ?? error };
|
|
349
357
|
}
|
|
350
|
-
|
|
351
|
-
return data;
|
|
358
|
+
return { data, duration };
|
|
352
359
|
} catch (error) {
|
|
353
360
|
logger.error(`Error in request: ${error.message || error}`);
|
|
354
361
|
return { error: error };
|
package/lib/requestMonitor.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { v4 as uuidv4 } from 'uuid';
|
|
2
|
-
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
3
2
|
import { Deque } from '@datastructures-js/deque';
|
|
4
3
|
|
|
5
4
|
class RequestMonitor {
|
|
@@ -20,6 +19,15 @@ class RequestMonitor {
|
|
|
20
19
|
return this.healthy;
|
|
21
20
|
}
|
|
22
21
|
|
|
22
|
+
removeOldCallStarts() {
|
|
23
|
+
const currentTime = new Date();
|
|
24
|
+
for (const [callId, startTime] of this.callStartTimes) {
|
|
25
|
+
if (currentTime - startTime > this.ageOutTime) {
|
|
26
|
+
this.callStartTimes.delete(callId);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
23
31
|
removeOldCallStats(dq, timeProperty) {
|
|
24
32
|
const currentTime = new Date();
|
|
25
33
|
while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
|
|
@@ -28,6 +36,7 @@ class RequestMonitor {
|
|
|
28
36
|
}
|
|
29
37
|
|
|
30
38
|
maintain() {
|
|
39
|
+
this.removeOldCallStarts();
|
|
31
40
|
this.removeOldCallStats(this.callCount);
|
|
32
41
|
if (this.callCount.size() === 0) {
|
|
33
42
|
this.peakCallRate = 0;
|
|
@@ -36,7 +45,7 @@ class RequestMonitor {
|
|
|
36
45
|
this.removeOldCallStats(this.error429Count);
|
|
37
46
|
this.removeOldCallStats(this.errorCount);
|
|
38
47
|
|
|
39
|
-
if (this.getErrorRate() > 0.
|
|
48
|
+
if (this.getErrorRate() > 0.1) {
|
|
40
49
|
this.healthy = false;
|
|
41
50
|
} else {
|
|
42
51
|
this.healthy = true;
|
|
@@ -55,10 +64,11 @@ class RequestMonitor {
|
|
|
55
64
|
endCall(callId) {
|
|
56
65
|
const endTime = new Date();
|
|
57
66
|
const startTime = this.callStartTimes.get(callId);
|
|
67
|
+
let callDuration = null;
|
|
58
68
|
|
|
59
69
|
if (startTime) {
|
|
70
|
+
callDuration = (endTime - startTime);
|
|
60
71
|
this.callStartTimes.delete(callId);
|
|
61
|
-
const callDuration = endTime - startTime;
|
|
62
72
|
this.callDurations.pushBack({endTime, callDuration});
|
|
63
73
|
|
|
64
74
|
// Keep the callDurations length to 5
|
|
@@ -73,6 +83,7 @@ class RequestMonitor {
|
|
|
73
83
|
}
|
|
74
84
|
|
|
75
85
|
this.maintain();
|
|
86
|
+
return callDuration;
|
|
76
87
|
}
|
|
77
88
|
|
|
78
89
|
getAverageCallDuration() {
|
|
@@ -84,14 +95,13 @@ class RequestMonitor {
|
|
|
84
95
|
return sum / this.callDurations.size();
|
|
85
96
|
}
|
|
86
97
|
|
|
87
|
-
|
|
88
|
-
this.error429Count.pushBack(new Date());
|
|
89
|
-
this.maintain();
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
incrementErrorCount() {
|
|
98
|
+
incrementErrorCount(callId, status) {
|
|
93
99
|
this.errorCount.pushBack(new Date());
|
|
100
|
+
if (status === 429) {
|
|
101
|
+
this.error429Count.pushBack(new Date());
|
|
102
|
+
}
|
|
94
103
|
this.maintain();
|
|
104
|
+
return callId ? this.endCall(callId) : null;
|
|
95
105
|
}
|
|
96
106
|
|
|
97
107
|
getCallRate() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.7",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
"handlebars": "^4.7.7",
|
|
53
53
|
"ioredis": "^5.3.1",
|
|
54
54
|
"keyv": "^4.5.2",
|
|
55
|
-
"langchain": "^0.
|
|
55
|
+
"langchain": "^0.1.28",
|
|
56
56
|
"mime-types": "^2.1.35",
|
|
57
57
|
"subsrt": "^1.1.1",
|
|
58
58
|
"uuid": "^9.0.0",
|
|
@@ -60,6 +60,7 @@
|
|
|
60
60
|
"ws": "^8.12.0"
|
|
61
61
|
},
|
|
62
62
|
"devDependencies": {
|
|
63
|
+
"@faker-js/faker": "^8.4.1",
|
|
63
64
|
"ava": "^5.2.0",
|
|
64
65
|
"dotenv": "^16.0.3",
|
|
65
66
|
"eslint": "^8.38.0",
|
package/pathways/basePathway.js
CHANGED
|
@@ -14,19 +14,21 @@ export default {
|
|
|
14
14
|
typeDef,
|
|
15
15
|
rootResolver,
|
|
16
16
|
resolver,
|
|
17
|
-
inputFormat: 'text', // text or html - changes the behavior of the input chunking
|
|
17
|
+
inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
|
|
18
18
|
useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
|
|
19
19
|
useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
|
|
20
|
+
joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
|
|
20
21
|
useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
|
|
21
22
|
truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
|
|
22
23
|
timeout: 120, // seconds, cancels the pathway after this many seconds
|
|
24
|
+
enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
|
|
23
25
|
duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
|
|
24
26
|
// override the default execution of the pathway
|
|
25
|
-
// callback signature:
|
|
27
|
+
// callback signature: executeOverride({args: object, runAllPrompts: function})
|
|
26
28
|
// args: the input arguments to the pathway
|
|
27
29
|
// runAllPrompts: a function that runs all prompts in the pathway and returns the result
|
|
28
30
|
executePathway: undefined,
|
|
29
31
|
// Set the temperature to 0 to favor more deterministic output when generating entity extraction.
|
|
30
|
-
temperature:
|
|
32
|
+
temperature: 0.9,
|
|
31
33
|
};
|
|
32
34
|
|
package/pathways/bing.js
ADDED
package/pathways/index.js
CHANGED
package/server/chunker.js
CHANGED
package/server/graphql.js
CHANGED
package/server/modelExecutor.js
CHANGED
|
@@ -19,6 +19,7 @@ import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
|
|
|
19
19
|
import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
|
|
20
20
|
import GeminiChatPlugin from './plugins/geminiChatPlugin.js';
|
|
21
21
|
import GeminiVisionPlugin from './plugins/geminiVisionPlugin.js';
|
|
22
|
+
import AzureBingPlugin from './plugins/azureBingPlugin.js';
|
|
22
23
|
|
|
23
24
|
class ModelExecutor {
|
|
24
25
|
constructor(pathway, model) {
|
|
@@ -80,6 +81,9 @@ class ModelExecutor {
|
|
|
80
81
|
case 'GEMINI-VISION':
|
|
81
82
|
plugin = new GeminiVisionPlugin(pathway, model);
|
|
82
83
|
break;
|
|
84
|
+
case 'AZURE-BING':
|
|
85
|
+
plugin = new AzureBingPlugin(pathway, model);
|
|
86
|
+
break;
|
|
83
87
|
default:
|
|
84
88
|
throw new Error(`Unsupported model type: ${model.type}`);
|
|
85
89
|
}
|