@aj-archipelago/cortex 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +3 -3
- package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
- package/lib/cortexRequest.js +11 -1
- package/lib/encodeCache.js +38 -0
- package/lib/fastLruCache.js +82 -0
- package/lib/pathwayTools.js +1 -1
- package/lib/requestExecutor.js +71 -68
- package/lib/requestMonitor.js +19 -9
- package/package.json +3 -1
- package/pathways/basePathway.js +5 -3
- package/pathways/bias.js +1 -1
- package/pathways/cognitive_insert.js +1 -1
- package/server/chunker.js +1 -1
- package/server/graphql.js +2 -0
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +26 -8
- package/server/plugins/azureCognitivePlugin.js +11 -6
- package/server/plugins/azureTranslatePlugin.js +0 -2
- package/server/plugins/geminiChatPlugin.js +192 -0
- package/server/plugins/geminiVisionPlugin.js +102 -0
- package/server/plugins/localModelPlugin.js +1 -1
- package/server/plugins/modelPlugin.js +24 -19
- package/server/plugins/openAiChatPlugin.js +11 -12
- package/server/plugins/openAiCompletionPlugin.js +6 -7
- package/server/plugins/openAiEmbeddingsPlugin.js +3 -1
- package/server/plugins/openAiWhisperPlugin.js +3 -0
- package/server/plugins/palmChatPlugin.js +8 -11
- package/server/plugins/palmCompletionPlugin.js +4 -7
- package/server/rest.js +11 -5
- package/tests/chunkfunction.test.js +1 -2
- package/tests/encodeCache.test.js +92 -0
- package/tests/fastLruCache.test.js +29 -0
- package/tests/requestMonitor.test.js +3 -3
- package/tests/truncateMessages.test.js +1 -1
package/config.js
CHANGED
|
@@ -118,13 +118,13 @@ var config = convict({
|
|
|
118
118
|
"api-key": "{{AZURE_COGNITIVE_API_KEY}}",
|
|
119
119
|
"Content-Type": "application/json"
|
|
120
120
|
},
|
|
121
|
-
"requestsPerSecond":
|
|
121
|
+
"requestsPerSecond": 10
|
|
122
122
|
},
|
|
123
123
|
"oai-embeddings": {
|
|
124
124
|
"type": "OPENAI-EMBEDDINGS",
|
|
125
|
-
"url": "https://
|
|
125
|
+
"url": "https://api.openai.com/v1/embeddings",
|
|
126
126
|
"headers": {
|
|
127
|
-
"
|
|
127
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
128
128
|
"Content-Type": "application/json"
|
|
129
129
|
},
|
|
130
130
|
"params": {
|
|
@@ -38,9 +38,14 @@ def transcribe(params):
|
|
|
38
38
|
if 'word_timestamps' in params: #parse as bool
|
|
39
39
|
word_timestamps = False if params['word_timestamps'] == 'False' else True
|
|
40
40
|
|
|
41
|
+
decode_options = {}
|
|
42
|
+
if 'language' in params:
|
|
43
|
+
decode_options["language"] = params["language"]
|
|
44
|
+
print(f"Transcription language set as {decode_options['language']}")
|
|
45
|
+
|
|
41
46
|
print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
|
|
42
47
|
start_time = time.time()
|
|
43
|
-
result = model.transcribe(fileurl, word_timestamps=word_timestamps)
|
|
48
|
+
result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
|
|
44
49
|
end_time = time.time()
|
|
45
50
|
execution_time = end_time - start_time
|
|
46
51
|
print("Transcribe execution time:", execution_time, "seconds")
|
package/lib/cortexRequest.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { selectEndpoint } from './requestExecutor.js';
|
|
2
2
|
|
|
3
3
|
class CortexRequest {
|
|
4
|
-
constructor( { url, data, params, headers, cache, model, pathwayResolver, selectedEndpoint } = {}) {
|
|
4
|
+
constructor( { url, data, params, headers, cache, model, pathwayResolver, selectedEndpoint, stream } = {}) {
|
|
5
5
|
this._url = url || '';
|
|
6
6
|
this._data = data || {};
|
|
7
7
|
this._params = params || {};
|
|
@@ -10,6 +10,7 @@ class CortexRequest {
|
|
|
10
10
|
this._model = model || '';
|
|
11
11
|
this._pathwayResolver = pathwayResolver || {};
|
|
12
12
|
this._selectedEndpoint = selectedEndpoint || {};
|
|
13
|
+
this._stream = stream || false;
|
|
13
14
|
|
|
14
15
|
if (this._pathwayResolver) {
|
|
15
16
|
this._model = this._pathwayResolver.model;
|
|
@@ -112,6 +113,15 @@ class CortexRequest {
|
|
|
112
113
|
set pathwayResolver(value) {
|
|
113
114
|
this._pathwayResolver = value;
|
|
114
115
|
}
|
|
116
|
+
|
|
117
|
+
// stream getter and setter
|
|
118
|
+
get stream() {
|
|
119
|
+
return this._stream;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
set stream(value) {
|
|
123
|
+
this._stream = value;
|
|
124
|
+
}
|
|
115
125
|
}
|
|
116
126
|
|
|
117
127
|
export default CortexRequest;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
|
|
2
|
+
import { FastLRUCache } from './fastLruCache.js';
|
|
3
|
+
|
|
4
|
+
class EncodeCache {
|
|
5
|
+
constructor() {
|
|
6
|
+
this.encodeCache = new FastLRUCache(1000);
|
|
7
|
+
this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
encode(value) {
|
|
11
|
+
if (this.encodeCache.get(value) !== -1) {
|
|
12
|
+
return this.encodeCache.get(value);
|
|
13
|
+
}
|
|
14
|
+
const encoded = gpt3Encode(value);
|
|
15
|
+
this.encodeCache.put(value, encoded);
|
|
16
|
+
return encoded;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
decode(value) {
|
|
20
|
+
if (this.decodeCache.get(value) !== -1) {
|
|
21
|
+
return this.decodeCache.get(value);
|
|
22
|
+
}
|
|
23
|
+
const decoded = gpt3Decode(value);
|
|
24
|
+
this.decodeCache.put(value, decoded);
|
|
25
|
+
if (this.encodeCache.get(decoded) === -1) {
|
|
26
|
+
this.encodeCache.put(decoded, value);
|
|
27
|
+
}
|
|
28
|
+
return decoded;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Create one instance of the cache
|
|
33
|
+
const cache = new EncodeCache();
|
|
34
|
+
|
|
35
|
+
// Make sure the instance is bound to the methods, so
|
|
36
|
+
// references to 'this' are correct
|
|
37
|
+
export const encode = cache.encode.bind(cache);
|
|
38
|
+
export const decode = cache.decode.bind(cache);
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
|
|
2
|
+
|
|
3
|
+
class Node {
|
|
4
|
+
constructor(key, value) {
|
|
5
|
+
this.key = key;
|
|
6
|
+
this.value = value;
|
|
7
|
+
this.next = null;
|
|
8
|
+
this.prev = null;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
class FastLRUCache {
|
|
13
|
+
constructor(capacity) {
|
|
14
|
+
this.capacity = capacity;
|
|
15
|
+
this.cache = new Map();
|
|
16
|
+
this.head = null;
|
|
17
|
+
this.tail = null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get(key) {
|
|
21
|
+
if (!this.cache.has(key)) {
|
|
22
|
+
return -1;
|
|
23
|
+
}
|
|
24
|
+
const node = this.cache.get(key);
|
|
25
|
+
this.moveToEnd(node);
|
|
26
|
+
return node.value;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
put(key, value) {
|
|
30
|
+
if (this.cache.has(key)) {
|
|
31
|
+
const node = this.cache.get(key);
|
|
32
|
+
node.value = value;
|
|
33
|
+
this.moveToEnd(node);
|
|
34
|
+
} else {
|
|
35
|
+
const node = new Node(key, value);
|
|
36
|
+
if (this.cache.size >= this.capacity) {
|
|
37
|
+
this.cache.delete(this.head.key);
|
|
38
|
+
this.shiftHeadToNext();
|
|
39
|
+
}
|
|
40
|
+
this.cache.set(key, node);
|
|
41
|
+
this.addNodeToTail(node);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
addNodeToTail(node) {
|
|
46
|
+
if (!this.tail) {
|
|
47
|
+
this.head = node;
|
|
48
|
+
this.tail = node;
|
|
49
|
+
} else {
|
|
50
|
+
node.prev = this.tail;
|
|
51
|
+
this.tail.next = node;
|
|
52
|
+
this.tail = node;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
moveToEnd(node) {
|
|
57
|
+
if (node === this.tail) {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
if (node === this.head) {
|
|
61
|
+
this.shiftHeadToNext();
|
|
62
|
+
} else {
|
|
63
|
+
node.prev.next = node.next;
|
|
64
|
+
node.next.prev = node.prev;
|
|
65
|
+
}
|
|
66
|
+
node.prev = this.tail;
|
|
67
|
+
node.next = null;
|
|
68
|
+
this.tail.next = node;
|
|
69
|
+
this.tail = node;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
shiftHeadToNext() {
|
|
73
|
+
this.head = this.head.next;
|
|
74
|
+
if (this.head) {
|
|
75
|
+
this.head.prev = null;
|
|
76
|
+
} else {
|
|
77
|
+
this.tail = null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export { FastLRUCache };
|
package/lib/pathwayTools.js
CHANGED
package/lib/requestExecutor.js
CHANGED
|
@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
|
|
|
57
57
|
|
|
58
58
|
endpoint.limiter.on('failed', (error, info) => {
|
|
59
59
|
if (error.name === 'CanceledError') {
|
|
60
|
-
logger.debug(`
|
|
60
|
+
logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
|
|
61
|
+
endpoint.monitor.incrementErrorCount();
|
|
61
62
|
} else {
|
|
62
|
-
logger.error(`
|
|
63
|
+
logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
|
|
63
64
|
}
|
|
64
65
|
});
|
|
65
66
|
|
|
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
|
|
|
154
155
|
});
|
|
155
156
|
}
|
|
156
157
|
|
|
158
|
+
//log statistics about active endpoints
|
|
157
159
|
setInterval(() => {
|
|
158
160
|
// Iterate over each model
|
|
159
161
|
for (const [name, model] of Object.entries(modelEndpoints)) {
|
|
@@ -179,30 +181,51 @@ setInterval(() => {
|
|
|
179
181
|
endpointIndex++;
|
|
180
182
|
});
|
|
181
183
|
}
|
|
182
|
-
},
|
|
184
|
+
}, 30000); // Log rates every 30 seconds
|
|
183
185
|
|
|
184
186
|
const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
|
|
185
|
-
|
|
187
|
+
const callId = endpoint?.monitor?.startCall();
|
|
188
|
+
let response;
|
|
189
|
+
try {
|
|
190
|
+
response = await cortexAxios.post(url, data, axiosConfigObj);
|
|
191
|
+
} catch (error) {
|
|
192
|
+
// throw new error with duration as part of the error data
|
|
193
|
+
throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
|
|
194
|
+
}
|
|
195
|
+
let duration;
|
|
196
|
+
if (response.status >= 200 && response.status < 300) {
|
|
197
|
+
duration = endpoint?.monitor?.endCall(callId);
|
|
198
|
+
} else {
|
|
199
|
+
duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return { response, duration };
|
|
186
203
|
}
|
|
187
204
|
|
|
188
205
|
const MAX_RETRY = 10; // retries for error handling
|
|
189
206
|
const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
|
|
190
207
|
const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
|
|
191
208
|
|
|
209
|
+
const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
|
|
210
|
+
const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
|
|
211
|
+
const jitter = duplicateRequestTime * 0.2 * Math.random();
|
|
212
|
+
const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
|
|
213
|
+
return duplicateRequestTimeout;
|
|
214
|
+
}
|
|
215
|
+
|
|
192
216
|
const postRequest = async (cortexRequest) => {
|
|
193
217
|
let promises = [];
|
|
218
|
+
// retry certain errors up to MAX_RETRY times
|
|
194
219
|
for (let i = 0; i < MAX_RETRY; i++) {
|
|
195
|
-
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model} = cortexRequest;
|
|
220
|
+
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
|
|
196
221
|
const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
if (enableDuplicateRequests) {
|
|
201
|
-
//logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
|
|
202
|
-
}
|
|
222
|
+
const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
|
|
223
|
+
const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
|
|
203
224
|
|
|
204
225
|
const axiosConfigObj = { params, headers, cache };
|
|
205
|
-
const streamRequested = (params?.stream || data?.stream);
|
|
226
|
+
const streamRequested = (stream || params?.stream || data?.stream);
|
|
227
|
+
// if we're using streaming, duplicate requests are
|
|
228
|
+
// not supported, so we just push one promise into the array
|
|
206
229
|
if (streamRequested && model.supportsStreaming) {
|
|
207
230
|
axiosConfigObj.responseType = 'stream';
|
|
208
231
|
promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
|
|
@@ -212,14 +235,20 @@ const postRequest = async (cortexRequest) => {
|
|
|
212
235
|
axiosConfigObj.params.stream = false;
|
|
213
236
|
data.stream = false;
|
|
214
237
|
}
|
|
238
|
+
// if we're not streaming, we push at least one promise
|
|
239
|
+
// into the array, but if we're supporting duplicate
|
|
240
|
+
// requests we push one for each potential duplicate,
|
|
241
|
+
// heading to a new endpoint (if available) and
|
|
242
|
+
// staggered by a jittered amount of time
|
|
215
243
|
const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
|
|
216
244
|
promises = controllers.map((controller, index) =>
|
|
217
245
|
new Promise((resolve, reject) => {
|
|
218
|
-
const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
|
|
219
|
-
const jitter = duplicateRequestTime * 0.2 * Math.random();
|
|
220
|
-
const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
|
|
221
246
|
setTimeout(async () => {
|
|
222
247
|
try {
|
|
248
|
+
if (index > 0) {
|
|
249
|
+
cortexRequest.selectNewEndpoint();
|
|
250
|
+
}
|
|
251
|
+
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
|
|
223
252
|
const endpointName = selectedEndpoint.name || model;
|
|
224
253
|
if (!selectedEndpoint.limiter) {
|
|
225
254
|
throw new Error(`No limiter for endpoint ${endpointName}!`);
|
|
@@ -227,52 +256,27 @@ const postRequest = async (cortexRequest) => {
|
|
|
227
256
|
const axiosConfigObj = { params, headers, cache };
|
|
228
257
|
|
|
229
258
|
let response = null;
|
|
259
|
+
let duration = null;
|
|
230
260
|
|
|
231
261
|
if (!controller.signal?.aborted) {
|
|
232
262
|
|
|
233
263
|
axiosConfigObj.signal = controller.signal;
|
|
234
264
|
axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
|
|
235
265
|
|
|
236
|
-
if (index
|
|
237
|
-
|
|
238
|
-
} else {
|
|
239
|
-
if (model.supportsStreaming) {
|
|
240
|
-
axiosConfigObj.responseType = 'stream';
|
|
241
|
-
axiosConfigObj.cache = false;
|
|
242
|
-
}
|
|
243
|
-
const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
|
|
266
|
+
if (index > 0) {
|
|
267
|
+
const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
|
|
244
268
|
const header = '>'.repeat(logMessage.length);
|
|
245
269
|
logger.info(`\n${header}\n${logMessage}`);
|
|
246
270
|
}
|
|
247
271
|
|
|
248
|
-
response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
|
|
272
|
+
({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
|
|
249
273
|
|
|
250
274
|
if (!controller.signal?.aborted) {
|
|
251
|
-
|
|
252
|
-
//logger.info(`<<< [${requestId}] received response for request ${index}`);
|
|
253
|
-
|
|
254
|
-
if (axiosConfigObj.responseType === 'stream') {
|
|
255
|
-
// Buffering and collecting the stream data
|
|
256
|
-
logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
|
|
257
|
-
response = await new Promise((resolve, reject) => {
|
|
258
|
-
let responseData = '';
|
|
259
|
-
response.data.on('data', (chunk) => {
|
|
260
|
-
responseData += chunk;
|
|
261
|
-
//logger.info(`<<< [${requestId}] received chunk for request ${index}`);
|
|
262
|
-
});
|
|
263
|
-
response.data.on('end', () => {
|
|
264
|
-
response.data = JSON.parse(responseData);
|
|
265
|
-
resolve(response);
|
|
266
|
-
});
|
|
267
|
-
response.data.on('error', (error) => {
|
|
268
|
-
reject(error);
|
|
269
|
-
});
|
|
270
|
-
});
|
|
271
|
-
}
|
|
275
|
+
logger.debug(`<<< [${requestId}] received response for request ${index}`);
|
|
272
276
|
}
|
|
273
277
|
}
|
|
274
278
|
|
|
275
|
-
resolve(response);
|
|
279
|
+
resolve({ response, duration });
|
|
276
280
|
|
|
277
281
|
} catch (error) {
|
|
278
282
|
if (error.name === 'AbortError' || error.name === 'CanceledError') {
|
|
@@ -285,45 +289,48 @@ const postRequest = async (cortexRequest) => {
|
|
|
285
289
|
} finally {
|
|
286
290
|
controllers.forEach(controller => controller.abort());
|
|
287
291
|
}
|
|
288
|
-
},
|
|
292
|
+
}, getDuplicateRequestDelay(index, duplicateRequestAfter));
|
|
289
293
|
})
|
|
290
294
|
);
|
|
291
295
|
}
|
|
292
296
|
|
|
297
|
+
// no requests have been made yet, but the promises array
|
|
298
|
+
// is full, so now we execute them in parallel
|
|
293
299
|
try {
|
|
294
|
-
const response = await Promise.race(promises);
|
|
300
|
+
const { response, duration } = await Promise.race(promises);
|
|
295
301
|
|
|
296
302
|
// if response status is 2xx
|
|
297
303
|
if (response.status >= 200 && response.status < 300) {
|
|
298
|
-
return response;
|
|
304
|
+
return { response, duration };
|
|
299
305
|
} else {
|
|
300
306
|
throw new Error(`Received error response: ${response.status}`);
|
|
301
307
|
}
|
|
302
308
|
} catch (error) {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
const status =
|
|
306
|
-
|
|
307
|
-
if (status === 429) {
|
|
308
|
-
selectedEndpoint.monitor.incrementError429Count();
|
|
309
|
-
}
|
|
310
|
-
|
|
309
|
+
const { response, duration } = error;
|
|
310
|
+
if (response) {
|
|
311
|
+
const status = response.status;
|
|
312
|
+
// if there is only one endpoint, only retry select error codes
|
|
311
313
|
if (cortexRequest.model.endpoints.length === 1) {
|
|
312
|
-
if (status !== 429
|
|
313
|
-
|
|
314
|
+
if (status !== 429 &&
|
|
315
|
+
status !== 408 &&
|
|
316
|
+
status !== 502 &&
|
|
317
|
+
status !== 503 &&
|
|
318
|
+
status !== 504) {
|
|
319
|
+
return { response, duration };
|
|
314
320
|
}
|
|
315
321
|
} else {
|
|
316
|
-
// if there are multiple endpoints, retry everything
|
|
322
|
+
// if there are multiple endpoints, retry everything as it
|
|
323
|
+
// could be going to a different host
|
|
317
324
|
cortexRequest.selectNewEndpoint();
|
|
318
325
|
}
|
|
319
326
|
|
|
320
|
-
logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
|
|
327
|
+
logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
|
|
321
328
|
if (i < MAX_RETRY - 1) {
|
|
322
329
|
const backoffTime = 200 * Math.pow(2, i);
|
|
323
330
|
const jitter = backoffTime * 0.2 * Math.random();
|
|
324
331
|
await new Promise(r => setTimeout(r, backoffTime + jitter));
|
|
325
332
|
} else {
|
|
326
|
-
return
|
|
333
|
+
return { response, duration };
|
|
327
334
|
}
|
|
328
335
|
} else {
|
|
329
336
|
throw error;
|
|
@@ -334,10 +341,7 @@ const postRequest = async (cortexRequest) => {
|
|
|
334
341
|
|
|
335
342
|
const executeRequest = async (cortexRequest) => {
|
|
336
343
|
try {
|
|
337
|
-
const
|
|
338
|
-
const callId = endpoint?.monitor?.startCall();
|
|
339
|
-
const response = await postRequest(cortexRequest);
|
|
340
|
-
endpoint?.monitor?.endCall(callId);
|
|
344
|
+
const { response, duration } = await postRequest(cortexRequest);
|
|
341
345
|
const requestId = cortexRequest.requestId;
|
|
342
346
|
const { error, data, cached } = response;
|
|
343
347
|
if (cached) {
|
|
@@ -347,8 +351,7 @@ const executeRequest = async (cortexRequest) => {
|
|
|
347
351
|
const lastError = error[error.length - 1];
|
|
348
352
|
return { error: lastError.toJSON() ?? lastError ?? error };
|
|
349
353
|
}
|
|
350
|
-
|
|
351
|
-
return data;
|
|
354
|
+
return { data, duration };
|
|
352
355
|
} catch (error) {
|
|
353
356
|
logger.error(`Error in request: ${error.message || error}`);
|
|
354
357
|
return { error: error };
|
package/lib/requestMonitor.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { v4 as uuidv4 } from 'uuid';
|
|
2
|
-
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
3
2
|
import { Deque } from '@datastructures-js/deque';
|
|
4
3
|
|
|
5
4
|
class RequestMonitor {
|
|
@@ -20,6 +19,15 @@ class RequestMonitor {
|
|
|
20
19
|
return this.healthy;
|
|
21
20
|
}
|
|
22
21
|
|
|
22
|
+
removeOldCallStarts() {
|
|
23
|
+
const currentTime = new Date();
|
|
24
|
+
for (const [callId, startTime] of this.callStartTimes) {
|
|
25
|
+
if (currentTime - startTime > this.ageOutTime) {
|
|
26
|
+
this.callStartTimes.delete(callId);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
23
31
|
removeOldCallStats(dq, timeProperty) {
|
|
24
32
|
const currentTime = new Date();
|
|
25
33
|
while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
|
|
@@ -28,6 +36,7 @@ class RequestMonitor {
|
|
|
28
36
|
}
|
|
29
37
|
|
|
30
38
|
maintain() {
|
|
39
|
+
this.removeOldCallStarts();
|
|
31
40
|
this.removeOldCallStats(this.callCount);
|
|
32
41
|
if (this.callCount.size() === 0) {
|
|
33
42
|
this.peakCallRate = 0;
|
|
@@ -36,7 +45,7 @@ class RequestMonitor {
|
|
|
36
45
|
this.removeOldCallStats(this.error429Count);
|
|
37
46
|
this.removeOldCallStats(this.errorCount);
|
|
38
47
|
|
|
39
|
-
if (this.getErrorRate() > 0.
|
|
48
|
+
if (this.getErrorRate() > 0.1) {
|
|
40
49
|
this.healthy = false;
|
|
41
50
|
} else {
|
|
42
51
|
this.healthy = true;
|
|
@@ -55,10 +64,11 @@ class RequestMonitor {
|
|
|
55
64
|
endCall(callId) {
|
|
56
65
|
const endTime = new Date();
|
|
57
66
|
const startTime = this.callStartTimes.get(callId);
|
|
67
|
+
let callDuration = null;
|
|
58
68
|
|
|
59
69
|
if (startTime) {
|
|
70
|
+
callDuration = (endTime - startTime);
|
|
60
71
|
this.callStartTimes.delete(callId);
|
|
61
|
-
const callDuration = endTime - startTime;
|
|
62
72
|
this.callDurations.pushBack({endTime, callDuration});
|
|
63
73
|
|
|
64
74
|
// Keep the callDurations length to 5
|
|
@@ -73,6 +83,7 @@ class RequestMonitor {
|
|
|
73
83
|
}
|
|
74
84
|
|
|
75
85
|
this.maintain();
|
|
86
|
+
return callDuration;
|
|
76
87
|
}
|
|
77
88
|
|
|
78
89
|
getAverageCallDuration() {
|
|
@@ -84,14 +95,13 @@ class RequestMonitor {
|
|
|
84
95
|
return sum / this.callDurations.size();
|
|
85
96
|
}
|
|
86
97
|
|
|
87
|
-
|
|
88
|
-
this.error429Count.pushBack(new Date());
|
|
89
|
-
this.maintain();
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
incrementErrorCount() {
|
|
98
|
+
incrementErrorCount(callId, status) {
|
|
93
99
|
this.errorCount.pushBack(new Date());
|
|
100
|
+
if (status === 429) {
|
|
101
|
+
this.error429Count.pushBack(new Date());
|
|
102
|
+
}
|
|
94
103
|
this.maintain();
|
|
104
|
+
return callId ? this.endCall(callId) : null;
|
|
95
105
|
}
|
|
96
106
|
|
|
97
107
|
getCallRate() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.6",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -53,12 +53,14 @@
|
|
|
53
53
|
"ioredis": "^5.3.1",
|
|
54
54
|
"keyv": "^4.5.2",
|
|
55
55
|
"langchain": "^0.0.47",
|
|
56
|
+
"mime-types": "^2.1.35",
|
|
56
57
|
"subsrt": "^1.1.1",
|
|
57
58
|
"uuid": "^9.0.0",
|
|
58
59
|
"winston": "^3.11.0",
|
|
59
60
|
"ws": "^8.12.0"
|
|
60
61
|
},
|
|
61
62
|
"devDependencies": {
|
|
63
|
+
"@faker-js/faker": "^8.4.1",
|
|
62
64
|
"ava": "^5.2.0",
|
|
63
65
|
"dotenv": "^16.0.3",
|
|
64
66
|
"eslint": "^8.38.0",
|
package/pathways/basePathway.js
CHANGED
|
@@ -14,19 +14,21 @@ export default {
|
|
|
14
14
|
typeDef,
|
|
15
15
|
rootResolver,
|
|
16
16
|
resolver,
|
|
17
|
-
inputFormat: 'text', // text or html - changes the behavior of the input chunking
|
|
17
|
+
inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
|
|
18
18
|
useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
|
|
19
19
|
useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
|
|
20
|
+
joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
|
|
20
21
|
useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
|
|
21
22
|
truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
|
|
22
23
|
timeout: 120, // seconds, cancels the pathway after this many seconds
|
|
24
|
+
enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
|
|
23
25
|
duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
|
|
24
26
|
// override the default execution of the pathway
|
|
25
|
-
// callback signature:
|
|
27
|
+
// callback signature: executeOverride({args: object, runAllPrompts: function})
|
|
26
28
|
// args: the input arguments to the pathway
|
|
27
29
|
// runAllPrompts: a function that runs all prompts in the pathway and returns the result
|
|
28
30
|
executePathway: undefined,
|
|
29
31
|
// Set the temperature to 0 to favor more deterministic output when generating entity extraction.
|
|
30
|
-
temperature:
|
|
32
|
+
temperature: 0.9,
|
|
31
33
|
};
|
|
32
34
|
|
package/pathways/bias.js
CHANGED
|
@@ -6,5 +6,5 @@ export default {
|
|
|
6
6
|
// Uncomment the following line to enable caching for this prompt, if desired.
|
|
7
7
|
// enableCache: true,
|
|
8
8
|
|
|
9
|
-
prompt: `{{text}}\n\nIs the above text written objectively? Why or why not, explain with details:\n
|
|
9
|
+
prompt: `{{text}}\n\nIs the above text written objectively? Why or why not, explain with details:\n`,
|
|
10
10
|
};
|
package/server/chunker.js
CHANGED
package/server/graphql.js
CHANGED
package/server/modelExecutor.js
CHANGED
|
@@ -17,6 +17,8 @@ import OpenAiEmbeddingsPlugin from './plugins/openAiEmbeddingsPlugin.js';
|
|
|
17
17
|
import OpenAIImagePlugin from './plugins/openAiImagePlugin.js';
|
|
18
18
|
import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
|
|
19
19
|
import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
|
|
20
|
+
import GeminiChatPlugin from './plugins/geminiChatPlugin.js';
|
|
21
|
+
import GeminiVisionPlugin from './plugins/geminiVisionPlugin.js';
|
|
20
22
|
|
|
21
23
|
class ModelExecutor {
|
|
22
24
|
constructor(pathway, model) {
|
|
@@ -72,6 +74,12 @@ class ModelExecutor {
|
|
|
72
74
|
case 'OPENAI-VISION':
|
|
73
75
|
plugin = new OpenAIVisionPlugin(pathway, model);
|
|
74
76
|
break;
|
|
77
|
+
case 'GEMINI-CHAT':
|
|
78
|
+
plugin = new GeminiChatPlugin(pathway, model);
|
|
79
|
+
break;
|
|
80
|
+
case 'GEMINI-VISION':
|
|
81
|
+
plugin = new GeminiVisionPlugin(pathway, model);
|
|
82
|
+
break;
|
|
75
83
|
default:
|
|
76
84
|
throw new Error(`Unsupported model type: ${model.type}`);
|
|
77
85
|
}
|