@aj-archipelago/cortex 1.1.5 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
- package/lib/encodeCache.js +38 -0
- package/lib/fastLruCache.js +82 -0
- package/lib/pathwayTools.js +1 -1
- package/lib/requestExecutor.js +68 -65
- package/lib/requestMonitor.js +19 -9
- package/package.json +2 -1
- package/pathways/basePathway.js +5 -3
- package/server/chunker.js +1 -1
- package/server/graphql.js +1 -1
- package/server/pathwayResolver.js +3 -3
- package/server/plugins/azureCognitivePlugin.js +11 -6
- package/server/plugins/azureTranslatePlugin.js +0 -2
- package/server/plugins/geminiChatPlugin.js +4 -7
- package/server/plugins/localModelPlugin.js +1 -1
- package/server/plugins/modelPlugin.js +22 -18
- package/server/plugins/openAiChatPlugin.js +11 -12
- package/server/plugins/openAiCompletionPlugin.js +6 -7
- package/server/plugins/openAiWhisperPlugin.js +3 -0
- package/server/plugins/palmChatPlugin.js +8 -11
- package/server/plugins/palmCompletionPlugin.js +4 -7
- package/tests/chunkfunction.test.js +1 -2
- package/tests/encodeCache.test.js +92 -0
- package/tests/fastLruCache.test.js +29 -0
- package/tests/requestMonitor.test.js +3 -3
- package/tests/truncateMessages.test.js +1 -1
package/config.js
CHANGED
|
@@ -38,9 +38,14 @@ def transcribe(params):
|
|
|
38
38
|
if 'word_timestamps' in params: #parse as bool
|
|
39
39
|
word_timestamps = False if params['word_timestamps'] == 'False' else True
|
|
40
40
|
|
|
41
|
+
decode_options = {}
|
|
42
|
+
if 'language' in params:
|
|
43
|
+
decode_options["language"] = params["language"]
|
|
44
|
+
print(f"Transcription language set as {decode_options['language']}")
|
|
45
|
+
|
|
41
46
|
print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
|
|
42
47
|
start_time = time.time()
|
|
43
|
-
result = model.transcribe(fileurl, word_timestamps=word_timestamps)
|
|
48
|
+
result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
|
|
44
49
|
end_time = time.time()
|
|
45
50
|
execution_time = end_time - start_time
|
|
46
51
|
print("Transcribe execution time:", execution_time, "seconds")
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
|
|
2
|
+
import { FastLRUCache } from './fastLruCache.js';
|
|
3
|
+
|
|
4
|
+
class EncodeCache {
|
|
5
|
+
constructor() {
|
|
6
|
+
this.encodeCache = new FastLRUCache(1000);
|
|
7
|
+
this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
encode(value) {
|
|
11
|
+
if (this.encodeCache.get(value) !== -1) {
|
|
12
|
+
return this.encodeCache.get(value);
|
|
13
|
+
}
|
|
14
|
+
const encoded = gpt3Encode(value);
|
|
15
|
+
this.encodeCache.put(value, encoded);
|
|
16
|
+
return encoded;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
decode(value) {
|
|
20
|
+
if (this.decodeCache.get(value) !== -1) {
|
|
21
|
+
return this.decodeCache.get(value);
|
|
22
|
+
}
|
|
23
|
+
const decoded = gpt3Decode(value);
|
|
24
|
+
this.decodeCache.put(value, decoded);
|
|
25
|
+
if (this.encodeCache.get(decoded) === -1) {
|
|
26
|
+
this.encodeCache.put(decoded, value);
|
|
27
|
+
}
|
|
28
|
+
return decoded;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Create one instance of the cache
|
|
33
|
+
const cache = new EncodeCache();
|
|
34
|
+
|
|
35
|
+
// Make sure the instance is bound to the methods, so
|
|
36
|
+
// references to 'this' are correct
|
|
37
|
+
export const encode = cache.encode.bind(cache);
|
|
38
|
+
export const decode = cache.decode.bind(cache);
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
|
|
2
|
+
|
|
3
|
+
class Node {
|
|
4
|
+
constructor(key, value) {
|
|
5
|
+
this.key = key;
|
|
6
|
+
this.value = value;
|
|
7
|
+
this.next = null;
|
|
8
|
+
this.prev = null;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
class FastLRUCache {
|
|
13
|
+
constructor(capacity) {
|
|
14
|
+
this.capacity = capacity;
|
|
15
|
+
this.cache = new Map();
|
|
16
|
+
this.head = null;
|
|
17
|
+
this.tail = null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get(key) {
|
|
21
|
+
if (!this.cache.has(key)) {
|
|
22
|
+
return -1;
|
|
23
|
+
}
|
|
24
|
+
const node = this.cache.get(key);
|
|
25
|
+
this.moveToEnd(node);
|
|
26
|
+
return node.value;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
put(key, value) {
|
|
30
|
+
if (this.cache.has(key)) {
|
|
31
|
+
const node = this.cache.get(key);
|
|
32
|
+
node.value = value;
|
|
33
|
+
this.moveToEnd(node);
|
|
34
|
+
} else {
|
|
35
|
+
const node = new Node(key, value);
|
|
36
|
+
if (this.cache.size >= this.capacity) {
|
|
37
|
+
this.cache.delete(this.head.key);
|
|
38
|
+
this.shiftHeadToNext();
|
|
39
|
+
}
|
|
40
|
+
this.cache.set(key, node);
|
|
41
|
+
this.addNodeToTail(node);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
addNodeToTail(node) {
|
|
46
|
+
if (!this.tail) {
|
|
47
|
+
this.head = node;
|
|
48
|
+
this.tail = node;
|
|
49
|
+
} else {
|
|
50
|
+
node.prev = this.tail;
|
|
51
|
+
this.tail.next = node;
|
|
52
|
+
this.tail = node;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
moveToEnd(node) {
|
|
57
|
+
if (node === this.tail) {
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
if (node === this.head) {
|
|
61
|
+
this.shiftHeadToNext();
|
|
62
|
+
} else {
|
|
63
|
+
node.prev.next = node.next;
|
|
64
|
+
node.next.prev = node.prev;
|
|
65
|
+
}
|
|
66
|
+
node.prev = this.tail;
|
|
67
|
+
node.next = null;
|
|
68
|
+
this.tail.next = node;
|
|
69
|
+
this.tail = node;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
shiftHeadToNext() {
|
|
73
|
+
this.head = this.head.next;
|
|
74
|
+
if (this.head) {
|
|
75
|
+
this.head.prev = null;
|
|
76
|
+
} else {
|
|
77
|
+
this.tail = null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export { FastLRUCache };
|
package/lib/pathwayTools.js
CHANGED
package/lib/requestExecutor.js
CHANGED
|
@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
|
|
|
57
57
|
|
|
58
58
|
endpoint.limiter.on('failed', (error, info) => {
|
|
59
59
|
if (error.name === 'CanceledError') {
|
|
60
|
-
logger.debug(`
|
|
60
|
+
logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
|
|
61
|
+
endpoint.monitor.incrementErrorCount();
|
|
61
62
|
} else {
|
|
62
|
-
logger.error(`
|
|
63
|
+
logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
|
|
63
64
|
}
|
|
64
65
|
});
|
|
65
66
|
|
|
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
|
|
|
154
155
|
});
|
|
155
156
|
}
|
|
156
157
|
|
|
158
|
+
//log statistics about active endpoints
|
|
157
159
|
setInterval(() => {
|
|
158
160
|
// Iterate over each model
|
|
159
161
|
for (const [name, model] of Object.entries(modelEndpoints)) {
|
|
@@ -179,30 +181,51 @@ setInterval(() => {
|
|
|
179
181
|
endpointIndex++;
|
|
180
182
|
});
|
|
181
183
|
}
|
|
182
|
-
},
|
|
184
|
+
}, 30000); // Log rates every 30 seconds
|
|
183
185
|
|
|
184
186
|
const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
|
|
185
|
-
|
|
187
|
+
const callId = endpoint?.monitor?.startCall();
|
|
188
|
+
let response;
|
|
189
|
+
try {
|
|
190
|
+
response = await cortexAxios.post(url, data, axiosConfigObj);
|
|
191
|
+
} catch (error) {
|
|
192
|
+
// throw new error with duration as part of the error data
|
|
193
|
+
throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
|
|
194
|
+
}
|
|
195
|
+
let duration;
|
|
196
|
+
if (response.status >= 200 && response.status < 300) {
|
|
197
|
+
duration = endpoint?.monitor?.endCall(callId);
|
|
198
|
+
} else {
|
|
199
|
+
duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return { response, duration };
|
|
186
203
|
}
|
|
187
204
|
|
|
188
205
|
const MAX_RETRY = 10; // retries for error handling
|
|
189
206
|
const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
|
|
190
207
|
const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
|
|
191
208
|
|
|
209
|
+
const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
|
|
210
|
+
const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
|
|
211
|
+
const jitter = duplicateRequestTime * 0.2 * Math.random();
|
|
212
|
+
const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
|
|
213
|
+
return duplicateRequestTimeout;
|
|
214
|
+
}
|
|
215
|
+
|
|
192
216
|
const postRequest = async (cortexRequest) => {
|
|
193
217
|
let promises = [];
|
|
218
|
+
// retry certain errors up to MAX_RETRY times
|
|
194
219
|
for (let i = 0; i < MAX_RETRY; i++) {
|
|
195
220
|
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
|
|
196
221
|
const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
if (enableDuplicateRequests) {
|
|
201
|
-
//logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
|
|
202
|
-
}
|
|
222
|
+
const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
|
|
223
|
+
const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
|
|
203
224
|
|
|
204
225
|
const axiosConfigObj = { params, headers, cache };
|
|
205
226
|
const streamRequested = (stream || params?.stream || data?.stream);
|
|
227
|
+
// if we're using streaming, duplicate requests are
|
|
228
|
+
// not supported, so we just push one promise into the array
|
|
206
229
|
if (streamRequested && model.supportsStreaming) {
|
|
207
230
|
axiosConfigObj.responseType = 'stream';
|
|
208
231
|
promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
|
|
@@ -212,14 +235,20 @@ const postRequest = async (cortexRequest) => {
|
|
|
212
235
|
axiosConfigObj.params.stream = false;
|
|
213
236
|
data.stream = false;
|
|
214
237
|
}
|
|
238
|
+
// if we're not streaming, we push at least one promise
|
|
239
|
+
// into the array, but if we're supporting duplicate
|
|
240
|
+
// requests we push one for each potential duplicate,
|
|
241
|
+
// heading to a new endpoint (if available) and
|
|
242
|
+
// staggered by a jittered amount of time
|
|
215
243
|
const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
|
|
216
244
|
promises = controllers.map((controller, index) =>
|
|
217
245
|
new Promise((resolve, reject) => {
|
|
218
|
-
const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
|
|
219
|
-
const jitter = duplicateRequestTime * 0.2 * Math.random();
|
|
220
|
-
const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
|
|
221
246
|
setTimeout(async () => {
|
|
222
247
|
try {
|
|
248
|
+
if (index > 0) {
|
|
249
|
+
cortexRequest.selectNewEndpoint();
|
|
250
|
+
}
|
|
251
|
+
const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
|
|
223
252
|
const endpointName = selectedEndpoint.name || model;
|
|
224
253
|
if (!selectedEndpoint.limiter) {
|
|
225
254
|
throw new Error(`No limiter for endpoint ${endpointName}!`);
|
|
@@ -227,52 +256,27 @@ const postRequest = async (cortexRequest) => {
|
|
|
227
256
|
const axiosConfigObj = { params, headers, cache };
|
|
228
257
|
|
|
229
258
|
let response = null;
|
|
259
|
+
let duration = null;
|
|
230
260
|
|
|
231
261
|
if (!controller.signal?.aborted) {
|
|
232
262
|
|
|
233
263
|
axiosConfigObj.signal = controller.signal;
|
|
234
264
|
axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
|
|
235
265
|
|
|
236
|
-
if (index
|
|
237
|
-
|
|
238
|
-
} else {
|
|
239
|
-
if (model.supportsStreaming) {
|
|
240
|
-
axiosConfigObj.responseType = 'stream';
|
|
241
|
-
axiosConfigObj.cache = false;
|
|
242
|
-
}
|
|
243
|
-
const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
|
|
266
|
+
if (index > 0) {
|
|
267
|
+
const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
|
|
244
268
|
const header = '>'.repeat(logMessage.length);
|
|
245
269
|
logger.info(`\n${header}\n${logMessage}`);
|
|
246
270
|
}
|
|
247
271
|
|
|
248
|
-
response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
|
|
272
|
+
({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
|
|
249
273
|
|
|
250
274
|
if (!controller.signal?.aborted) {
|
|
251
|
-
|
|
252
275
|
logger.debug(`<<< [${requestId}] received response for request ${index}`);
|
|
253
|
-
|
|
254
|
-
if (axiosConfigObj.responseType === 'stream') {
|
|
255
|
-
// Buffering and collecting the stream data
|
|
256
|
-
logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
|
|
257
|
-
response = await new Promise((resolve, reject) => {
|
|
258
|
-
let responseData = '';
|
|
259
|
-
response.data.on('data', (chunk) => {
|
|
260
|
-
responseData += chunk;
|
|
261
|
-
logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
|
|
262
|
-
});
|
|
263
|
-
response.data.on('end', () => {
|
|
264
|
-
response.data = JSON.parse(responseData);
|
|
265
|
-
resolve(response);
|
|
266
|
-
});
|
|
267
|
-
response.data.on('error', (error) => {
|
|
268
|
-
reject(error);
|
|
269
|
-
});
|
|
270
|
-
});
|
|
271
|
-
}
|
|
272
276
|
}
|
|
273
277
|
}
|
|
274
278
|
|
|
275
|
-
resolve(response);
|
|
279
|
+
resolve({ response, duration });
|
|
276
280
|
|
|
277
281
|
} catch (error) {
|
|
278
282
|
if (error.name === 'AbortError' || error.name === 'CanceledError') {
|
|
@@ -285,45 +289,48 @@ const postRequest = async (cortexRequest) => {
|
|
|
285
289
|
} finally {
|
|
286
290
|
controllers.forEach(controller => controller.abort());
|
|
287
291
|
}
|
|
288
|
-
},
|
|
292
|
+
}, getDuplicateRequestDelay(index, duplicateRequestAfter));
|
|
289
293
|
})
|
|
290
294
|
);
|
|
291
295
|
}
|
|
292
296
|
|
|
297
|
+
// no requests have been made yet, but the promises array
|
|
298
|
+
// is full, so now we execute them in parallel
|
|
293
299
|
try {
|
|
294
|
-
const response = await Promise.race(promises);
|
|
300
|
+
const { response, duration } = await Promise.race(promises);
|
|
295
301
|
|
|
296
302
|
// if response status is 2xx
|
|
297
303
|
if (response.status >= 200 && response.status < 300) {
|
|
298
|
-
return response;
|
|
304
|
+
return { response, duration };
|
|
299
305
|
} else {
|
|
300
306
|
throw new Error(`Received error response: ${response.status}`);
|
|
301
307
|
}
|
|
302
308
|
} catch (error) {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
const status =
|
|
306
|
-
|
|
307
|
-
if (status === 429) {
|
|
308
|
-
selectedEndpoint.monitor.incrementError429Count();
|
|
309
|
-
}
|
|
310
|
-
|
|
309
|
+
const { response, duration } = error;
|
|
310
|
+
if (response) {
|
|
311
|
+
const status = response.status;
|
|
312
|
+
// if there is only one endpoint, only retry select error codes
|
|
311
313
|
if (cortexRequest.model.endpoints.length === 1) {
|
|
312
|
-
if (status !== 429
|
|
313
|
-
|
|
314
|
+
if (status !== 429 &&
|
|
315
|
+
status !== 408 &&
|
|
316
|
+
status !== 502 &&
|
|
317
|
+
status !== 503 &&
|
|
318
|
+
status !== 504) {
|
|
319
|
+
return { response, duration };
|
|
314
320
|
}
|
|
315
321
|
} else {
|
|
316
|
-
// if there are multiple endpoints, retry everything
|
|
322
|
+
// if there are multiple endpoints, retry everything as it
|
|
323
|
+
// could be going to a different host
|
|
317
324
|
cortexRequest.selectNewEndpoint();
|
|
318
325
|
}
|
|
319
326
|
|
|
320
|
-
logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
|
|
327
|
+
logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
|
|
321
328
|
if (i < MAX_RETRY - 1) {
|
|
322
329
|
const backoffTime = 200 * Math.pow(2, i);
|
|
323
330
|
const jitter = backoffTime * 0.2 * Math.random();
|
|
324
331
|
await new Promise(r => setTimeout(r, backoffTime + jitter));
|
|
325
332
|
} else {
|
|
326
|
-
return
|
|
333
|
+
return { response, duration };
|
|
327
334
|
}
|
|
328
335
|
} else {
|
|
329
336
|
throw error;
|
|
@@ -334,10 +341,7 @@ const postRequest = async (cortexRequest) => {
|
|
|
334
341
|
|
|
335
342
|
const executeRequest = async (cortexRequest) => {
|
|
336
343
|
try {
|
|
337
|
-
const
|
|
338
|
-
const callId = endpoint?.monitor?.startCall();
|
|
339
|
-
const response = await postRequest(cortexRequest);
|
|
340
|
-
endpoint?.monitor?.endCall(callId);
|
|
344
|
+
const { response, duration } = await postRequest(cortexRequest);
|
|
341
345
|
const requestId = cortexRequest.requestId;
|
|
342
346
|
const { error, data, cached } = response;
|
|
343
347
|
if (cached) {
|
|
@@ -347,8 +351,7 @@ const executeRequest = async (cortexRequest) => {
|
|
|
347
351
|
const lastError = error[error.length - 1];
|
|
348
352
|
return { error: lastError.toJSON() ?? lastError ?? error };
|
|
349
353
|
}
|
|
350
|
-
|
|
351
|
-
return data;
|
|
354
|
+
return { data, duration };
|
|
352
355
|
} catch (error) {
|
|
353
356
|
logger.error(`Error in request: ${error.message || error}`);
|
|
354
357
|
return { error: error };
|
package/lib/requestMonitor.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { v4 as uuidv4 } from 'uuid';
|
|
2
|
-
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
3
2
|
import { Deque } from '@datastructures-js/deque';
|
|
4
3
|
|
|
5
4
|
class RequestMonitor {
|
|
@@ -20,6 +19,15 @@ class RequestMonitor {
|
|
|
20
19
|
return this.healthy;
|
|
21
20
|
}
|
|
22
21
|
|
|
22
|
+
removeOldCallStarts() {
|
|
23
|
+
const currentTime = new Date();
|
|
24
|
+
for (const [callId, startTime] of this.callStartTimes) {
|
|
25
|
+
if (currentTime - startTime > this.ageOutTime) {
|
|
26
|
+
this.callStartTimes.delete(callId);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
23
31
|
removeOldCallStats(dq, timeProperty) {
|
|
24
32
|
const currentTime = new Date();
|
|
25
33
|
while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
|
|
@@ -28,6 +36,7 @@ class RequestMonitor {
|
|
|
28
36
|
}
|
|
29
37
|
|
|
30
38
|
maintain() {
|
|
39
|
+
this.removeOldCallStarts();
|
|
31
40
|
this.removeOldCallStats(this.callCount);
|
|
32
41
|
if (this.callCount.size() === 0) {
|
|
33
42
|
this.peakCallRate = 0;
|
|
@@ -36,7 +45,7 @@ class RequestMonitor {
|
|
|
36
45
|
this.removeOldCallStats(this.error429Count);
|
|
37
46
|
this.removeOldCallStats(this.errorCount);
|
|
38
47
|
|
|
39
|
-
if (this.getErrorRate() > 0.
|
|
48
|
+
if (this.getErrorRate() > 0.1) {
|
|
40
49
|
this.healthy = false;
|
|
41
50
|
} else {
|
|
42
51
|
this.healthy = true;
|
|
@@ -55,10 +64,11 @@ class RequestMonitor {
|
|
|
55
64
|
endCall(callId) {
|
|
56
65
|
const endTime = new Date();
|
|
57
66
|
const startTime = this.callStartTimes.get(callId);
|
|
67
|
+
let callDuration = null;
|
|
58
68
|
|
|
59
69
|
if (startTime) {
|
|
70
|
+
callDuration = (endTime - startTime);
|
|
60
71
|
this.callStartTimes.delete(callId);
|
|
61
|
-
const callDuration = endTime - startTime;
|
|
62
72
|
this.callDurations.pushBack({endTime, callDuration});
|
|
63
73
|
|
|
64
74
|
// Keep the callDurations length to 5
|
|
@@ -73,6 +83,7 @@ class RequestMonitor {
|
|
|
73
83
|
}
|
|
74
84
|
|
|
75
85
|
this.maintain();
|
|
86
|
+
return callDuration;
|
|
76
87
|
}
|
|
77
88
|
|
|
78
89
|
getAverageCallDuration() {
|
|
@@ -84,14 +95,13 @@ class RequestMonitor {
|
|
|
84
95
|
return sum / this.callDurations.size();
|
|
85
96
|
}
|
|
86
97
|
|
|
87
|
-
|
|
88
|
-
this.error429Count.pushBack(new Date());
|
|
89
|
-
this.maintain();
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
incrementErrorCount() {
|
|
98
|
+
incrementErrorCount(callId, status) {
|
|
93
99
|
this.errorCount.pushBack(new Date());
|
|
100
|
+
if (status === 429) {
|
|
101
|
+
this.error429Count.pushBack(new Date());
|
|
102
|
+
}
|
|
94
103
|
this.maintain();
|
|
104
|
+
return callId ? this.endCall(callId) : null;
|
|
95
105
|
}
|
|
96
106
|
|
|
97
107
|
getCallRate() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.6",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -60,6 +60,7 @@
|
|
|
60
60
|
"ws": "^8.12.0"
|
|
61
61
|
},
|
|
62
62
|
"devDependencies": {
|
|
63
|
+
"@faker-js/faker": "^8.4.1",
|
|
63
64
|
"ava": "^5.2.0",
|
|
64
65
|
"dotenv": "^16.0.3",
|
|
65
66
|
"eslint": "^8.38.0",
|
package/pathways/basePathway.js
CHANGED
|
@@ -14,19 +14,21 @@ export default {
|
|
|
14
14
|
typeDef,
|
|
15
15
|
rootResolver,
|
|
16
16
|
resolver,
|
|
17
|
-
inputFormat: 'text', // text or html - changes the behavior of the input chunking
|
|
17
|
+
inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
|
|
18
18
|
useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
|
|
19
19
|
useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
|
|
20
|
+
joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
|
|
20
21
|
useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
|
|
21
22
|
truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
|
|
22
23
|
timeout: 120, // seconds, cancels the pathway after this many seconds
|
|
24
|
+
enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
|
|
23
25
|
duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
|
|
24
26
|
// override the default execution of the pathway
|
|
25
|
-
// callback signature:
|
|
27
|
+
// callback signature: executeOverride({args: object, runAllPrompts: function})
|
|
26
28
|
// args: the input arguments to the pathway
|
|
27
29
|
// runAllPrompts: a function that runs all prompts in the pathway and returns the result
|
|
28
30
|
executePathway: undefined,
|
|
29
31
|
// Set the temperature to 0 to favor more deterministic output when generating entity extraction.
|
|
30
|
-
temperature:
|
|
32
|
+
temperature: 0.9,
|
|
31
33
|
};
|
|
32
34
|
|
package/server/chunker.js
CHANGED
package/server/graphql.js
CHANGED
|
@@ -2,7 +2,7 @@ import { ModelExecutor } from './modelExecutor.js';
|
|
|
2
2
|
import { modelEndpoints } from '../lib/requestExecutor.js';
|
|
3
3
|
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
4
4
|
import { v4 as uuidv4 } from 'uuid';
|
|
5
|
-
import { encode } from '
|
|
5
|
+
import { encode } from '../lib/encodeCache.js';
|
|
6
6
|
import { getFirstNToken, getLastNToken, getSemanticChunks } from './chunker.js';
|
|
7
7
|
import { PathwayResponseParser } from './pathwayResponseParser.js';
|
|
8
8
|
import { Prompt } from './prompt.js';
|
|
@@ -339,7 +339,7 @@ class PathwayResolver {
|
|
|
339
339
|
const data = await Promise.all(chunks.map(chunk =>
|
|
340
340
|
this.applyPromptsSerially(chunk, parameters)));
|
|
341
341
|
// Join the chunks with newlines
|
|
342
|
-
return data.join("\n\n");
|
|
342
|
+
return data.join(this.pathway.joinChunksWith || "\n\n");
|
|
343
343
|
} else {
|
|
344
344
|
// Apply prompts one by one, serially, across all chunks
|
|
345
345
|
// This is the default processing mode and will make previousResult available at the object level
|
|
@@ -373,7 +373,7 @@ class PathwayResolver {
|
|
|
373
373
|
if (result.length === 1) {
|
|
374
374
|
result = result[0];
|
|
375
375
|
} else if (!currentParameters.stream) {
|
|
376
|
-
result = result.join("\n\n");
|
|
376
|
+
result = result.join(this.pathway.joinChunksWith || "\n\n");
|
|
377
377
|
}
|
|
378
378
|
}
|
|
379
379
|
|
|
@@ -6,6 +6,7 @@ import path from 'path';
|
|
|
6
6
|
import { config } from '../../config.js';
|
|
7
7
|
import { axios } from '../../lib/requestExecutor.js';
|
|
8
8
|
import logger from '../../lib/logger.js';
|
|
9
|
+
import { getSemanticChunks } from '../chunker.js';
|
|
9
10
|
|
|
10
11
|
const API_URL = config.get('whisperMediaApiUrl');
|
|
11
12
|
|
|
@@ -37,7 +38,8 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
37
38
|
const data = {};
|
|
38
39
|
|
|
39
40
|
if (mode == 'delete') {
|
|
40
|
-
|
|
41
|
+
let searchUrl = this.ensureMode(this.requestUrl(text), 'search');
|
|
42
|
+
searchUrl = this.ensureIndex(searchUrl, indexName);
|
|
41
43
|
let searchQuery = `owner:${savedContextId}`;
|
|
42
44
|
|
|
43
45
|
if (docId) {
|
|
@@ -155,6 +157,7 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
155
157
|
const headers = cortexRequest.headers;
|
|
156
158
|
|
|
157
159
|
const { file } = parameters;
|
|
160
|
+
const fileData = { value: [] };
|
|
158
161
|
if(file){
|
|
159
162
|
let url = file;
|
|
160
163
|
//if not txt file, use helper app to convert to txt
|
|
@@ -177,11 +180,13 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
177
180
|
throw Error(`No data can be extracted out of file!`);
|
|
178
181
|
}
|
|
179
182
|
|
|
180
|
-
|
|
181
|
-
|
|
183
|
+
const chunkTokenLength = this.promptParameters.inputChunkSize || 1000;
|
|
184
|
+
const chunks = getSemanticChunks(data, chunkTokenLength);
|
|
182
185
|
|
|
183
|
-
|
|
184
|
-
|
|
186
|
+
for (const text of chunks) {
|
|
187
|
+
const { data: singleData } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest)
|
|
188
|
+
fileData.value.push(singleData.value[0]);
|
|
189
|
+
}
|
|
185
190
|
}
|
|
186
191
|
|
|
187
192
|
const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
|
|
@@ -195,7 +200,7 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
195
200
|
|
|
196
201
|
// execute the request
|
|
197
202
|
cortexRequest.url = url;
|
|
198
|
-
cortexRequest.data = data;
|
|
203
|
+
cortexRequest.data = (mode === 'index' && fileData.value.length>0) ? fileData : data;
|
|
199
204
|
cortexRequest.params = params;
|
|
200
205
|
cortexRequest.headers = headers;
|
|
201
206
|
const result = await this.executeRequest(cortexRequest);
|
|
@@ -45,8 +45,6 @@ class AzureTranslatePlugin extends ModelPlugin {
|
|
|
45
45
|
|
|
46
46
|
// Override the logging function to display the request and response
|
|
47
47
|
logRequestData(data, responseData, prompt) {
|
|
48
|
-
this.logAIRequestFinished();
|
|
49
|
-
|
|
50
48
|
const modelInput = data[0].Text;
|
|
51
49
|
|
|
52
50
|
logger.debug(`${modelInput}`);
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// geminiChatPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
|
-
import { encode } from 'gpt-3-encoder';
|
|
4
3
|
import logger from '../../lib/logger.js';
|
|
5
4
|
|
|
6
5
|
const mergeResults = (data) => {
|
|
@@ -148,8 +147,6 @@ class GeminiChatPlugin extends ModelPlugin {
|
|
|
148
147
|
|
|
149
148
|
// Override the logging function to display the messages and responses
|
|
150
149
|
logRequestData(data, responseData, prompt) {
|
|
151
|
-
this.logAIRequestFinished();
|
|
152
|
-
|
|
153
150
|
const messages = data && data.contents;
|
|
154
151
|
|
|
155
152
|
if (messages && messages.length > 1) {
|
|
@@ -162,10 +159,10 @@ class GeminiChatPlugin extends ModelPlugin {
|
|
|
162
159
|
return acc;
|
|
163
160
|
} , '');
|
|
164
161
|
const words = messageContent.split(" ");
|
|
165
|
-
const
|
|
162
|
+
const { length, units } = this.getLength(messageContent);
|
|
166
163
|
const preview = words.length < 41 ? messageContent : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
167
164
|
|
|
168
|
-
logger.debug(`
|
|
165
|
+
logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
|
|
169
166
|
});
|
|
170
167
|
} else if (messages && messages.length === 1) {
|
|
171
168
|
logger.debug(`${messages[0].parts[0].text}`);
|
|
@@ -180,8 +177,8 @@ class GeminiChatPlugin extends ModelPlugin {
|
|
|
180
177
|
logger.warn(`!!! response was blocked because the input or response potentially violates policies`);
|
|
181
178
|
logger.debug(`Safety Ratings: ${JSON.stringify(safetyRatings, null, 2)}`);
|
|
182
179
|
}
|
|
183
|
-
const
|
|
184
|
-
logger.info(`[response received containing ${
|
|
180
|
+
const { length, units } = this.getLength(mergedResult);
|
|
181
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
185
182
|
logger.debug(`${mergedResult}`);
|
|
186
183
|
}
|
|
187
184
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// localModelPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
3
|
import { execFileSync } from 'child_process';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../../lib/encodeCache.js';
|
|
5
5
|
import logger from '../../lib/logger.js';
|
|
6
6
|
|
|
7
7
|
class LocalModelPlugin extends ModelPlugin {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// ModelPlugin.js
|
|
2
2
|
import HandleBars from '../../lib/handleBars.js';
|
|
3
3
|
import { executeRequest } from '../../lib/requestExecutor.js';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../../lib/encodeCache.js';
|
|
5
5
|
import { getFirstNToken } from '../chunker.js';
|
|
6
6
|
import logger, { obscureUrlParams } from '../../lib/logger.js';
|
|
7
7
|
import { config } from '../../config.js';
|
|
@@ -32,7 +32,6 @@ class ModelPlugin {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
this.requestCount = 0;
|
|
35
|
-
this.lastRequestStartTime = new Date();
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
truncateMessagesToTargetLength(messages, targetTokenLength) {
|
|
@@ -221,7 +220,6 @@ class ModelPlugin {
|
|
|
221
220
|
// Default simple logging
|
|
222
221
|
logRequestStart() {
|
|
223
222
|
this.requestCount++;
|
|
224
|
-
this.lastRequestStartTime = new Date();
|
|
225
223
|
const logMessage = `>>> [${this.requestId}: ${this.pathwayName}.${this.requestCount}] request`;
|
|
226
224
|
const header = '>'.repeat(logMessage.length);
|
|
227
225
|
logger.info(`${header}`);
|
|
@@ -229,28 +227,32 @@ class ModelPlugin {
|
|
|
229
227
|
logger.info(`>>> Making API request to ${obscureUrlParams(this.url)}`);
|
|
230
228
|
}
|
|
231
229
|
|
|
232
|
-
logAIRequestFinished() {
|
|
233
|
-
const
|
|
234
|
-
const timeElapsed = (currentTime - this.lastRequestStartTime) / 1000;
|
|
235
|
-
const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${timeElapsed}s - data:`;
|
|
230
|
+
logAIRequestFinished(requestDuration) {
|
|
231
|
+
const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${requestDuration}ms - data:`;
|
|
236
232
|
const header = '<'.repeat(logMessage.length);
|
|
237
233
|
logger.info(`${header}`);
|
|
238
234
|
logger.info(`${logMessage}`);
|
|
239
235
|
}
|
|
240
236
|
|
|
237
|
+
getLength(data) {
|
|
238
|
+
const isProd = config.get('env') === 'production';
|
|
239
|
+
const length = isProd ? data.length : encode(data).length;
|
|
240
|
+
const units = isProd ? 'characters' : 'tokens';
|
|
241
|
+
return {length, units};
|
|
242
|
+
}
|
|
243
|
+
|
|
241
244
|
logRequestData(data, responseData, prompt) {
|
|
242
|
-
this.logAIRequestFinished();
|
|
243
245
|
const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
|
|
244
246
|
|
|
245
247
|
if (modelInput) {
|
|
246
|
-
const
|
|
247
|
-
logger.info(`[request sent containing ${
|
|
248
|
+
const { length, units } = this.getLength(modelInput);
|
|
249
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
248
250
|
logger.debug(`${modelInput}`);
|
|
249
251
|
}
|
|
250
252
|
|
|
251
|
-
const responseText = JSON.stringify(
|
|
252
|
-
const
|
|
253
|
-
logger.info(`[response received containing ${
|
|
253
|
+
const responseText = JSON.stringify(responseData);
|
|
254
|
+
const { length, units } = this.getLength(responseText);
|
|
255
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
254
256
|
logger.debug(`${responseText}`);
|
|
255
257
|
|
|
256
258
|
prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
@@ -267,16 +269,18 @@ class ModelPlugin {
|
|
|
267
269
|
cortexRequest.cache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
|
|
268
270
|
this.logRequestStart();
|
|
269
271
|
|
|
270
|
-
const responseData = await executeRequest(cortexRequest);
|
|
272
|
+
const { data: responseData, duration: requestDuration } = await executeRequest(cortexRequest);
|
|
271
273
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
+
const errorData = Array.isArray(responseData) ? responseData[0] : responseData;
|
|
274
275
|
if (errorData && errorData.error) {
|
|
275
276
|
throw new Error(`Server error: ${JSON.stringify(errorData.error)}`);
|
|
276
277
|
}
|
|
277
278
|
|
|
278
|
-
this.
|
|
279
|
-
|
|
279
|
+
this.logAIRequestFinished(requestDuration);
|
|
280
|
+
const parsedData = this.parseResponse(responseData);
|
|
281
|
+
this.logRequestData(data, parsedData, prompt);
|
|
282
|
+
|
|
283
|
+
return parsedData;
|
|
280
284
|
} catch (error) {
|
|
281
285
|
// Log the error and continue
|
|
282
286
|
logger.error(error.message || error);
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// OpenAIChatPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
|
-
import { encode } from 'gpt-3-encoder';
|
|
4
3
|
import logger from '../../lib/logger.js';
|
|
5
4
|
|
|
6
5
|
class OpenAIChatPlugin extends ModelPlugin {
|
|
@@ -105,28 +104,28 @@ class OpenAIChatPlugin extends ModelPlugin {
|
|
|
105
104
|
|
|
106
105
|
// Override the logging function to display the messages and responses
|
|
107
106
|
logRequestData(data, responseData, prompt) {
|
|
108
|
-
this.logAIRequestFinished();
|
|
109
|
-
|
|
110
107
|
const { stream, messages } = data;
|
|
111
108
|
if (messages && messages.length > 1) {
|
|
112
109
|
logger.info(`[chat request sent containing ${messages.length} messages]`);
|
|
113
|
-
let
|
|
110
|
+
let totalLength = 0;
|
|
111
|
+
let totalUnits;
|
|
114
112
|
messages.forEach((message, index) => {
|
|
115
113
|
//message.content string or array
|
|
116
114
|
const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
|
|
117
115
|
const words = content.split(" ");
|
|
118
|
-
const
|
|
116
|
+
const { length, units } = this.getLength(content);
|
|
119
117
|
const preview = words.length < 41 ? content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
120
118
|
|
|
121
|
-
logger.debug(`
|
|
122
|
-
|
|
119
|
+
logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
|
|
120
|
+
totalLength += length;
|
|
121
|
+
totalUnits = units;
|
|
123
122
|
});
|
|
124
|
-
logger.info(`[chat request contained ${
|
|
123
|
+
logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
|
|
125
124
|
} else {
|
|
126
125
|
const message = messages[0];
|
|
127
126
|
const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
|
|
128
|
-
const
|
|
129
|
-
logger.info(`[request sent containing ${
|
|
127
|
+
const { length, units } = this.getLength(content);
|
|
128
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
130
129
|
logger.debug(`${content}`);
|
|
131
130
|
}
|
|
132
131
|
|
|
@@ -134,8 +133,8 @@ class OpenAIChatPlugin extends ModelPlugin {
|
|
|
134
133
|
logger.info(`[response received as an SSE stream]`);
|
|
135
134
|
} else {
|
|
136
135
|
const responseText = this.parseResponse(responseData);
|
|
137
|
-
const
|
|
138
|
-
logger.info(`[response received containing ${
|
|
136
|
+
const { length, units } = this.getLength(responseText);
|
|
137
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
139
138
|
logger.debug(`${responseText}`);
|
|
140
139
|
}
|
|
141
140
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// OpenAICompletionPlugin.js
|
|
2
2
|
|
|
3
3
|
import ModelPlugin from './modelPlugin.js';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../../lib/encodeCache.js';
|
|
5
5
|
import logger from '../../lib/logger.js';
|
|
6
6
|
|
|
7
7
|
// Helper function to truncate the prompt if it is too long
|
|
@@ -104,21 +104,20 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
104
104
|
|
|
105
105
|
// Override the logging function to log the prompt and response
|
|
106
106
|
logRequestData(data, responseData, prompt) {
|
|
107
|
-
this.logAIRequestFinished();
|
|
108
|
-
|
|
109
107
|
const stream = data.stream;
|
|
110
108
|
const modelInput = data.prompt;
|
|
111
109
|
|
|
112
|
-
const
|
|
113
|
-
|
|
110
|
+
const { length, units } = this.getLength(modelInput);
|
|
111
|
+
|
|
112
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
114
113
|
logger.debug(`${modelInput}`);
|
|
115
114
|
|
|
116
115
|
if (stream) {
|
|
117
116
|
logger.info(`[response received as an SSE stream]`);
|
|
118
117
|
} else {
|
|
119
118
|
const responseText = this.parseResponse(responseData);
|
|
120
|
-
const
|
|
121
|
-
logger.info(`[response received containing ${
|
|
119
|
+
const { length, units } = this.getLength(responseText);
|
|
120
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
122
121
|
logger.debug(`${responseText}`);
|
|
123
122
|
}
|
|
124
123
|
|
|
@@ -201,6 +201,9 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
201
201
|
const processTS = async (uri) => {
|
|
202
202
|
try {
|
|
203
203
|
const tsparams = { fileurl:uri };
|
|
204
|
+
|
|
205
|
+
const { language } = parameters;
|
|
206
|
+
if(language) tsparams.language = language;
|
|
204
207
|
if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
|
|
205
208
|
if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
|
|
206
209
|
if(maxLineCount) tsparams.max_line_count = maxLineCount;
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// palmChatPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
|
-
import { encode } from 'gpt-3-encoder';
|
|
4
3
|
import HandleBars from '../../lib/handleBars.js';
|
|
5
4
|
import logger from '../../lib/logger.js';
|
|
6
5
|
|
|
@@ -181,22 +180,20 @@ class PalmChatPlugin extends ModelPlugin {
|
|
|
181
180
|
|
|
182
181
|
// Override the logging function to display the messages and responses
|
|
183
182
|
logRequestData(data, responseData, prompt) {
|
|
184
|
-
this.logAIRequestFinished();
|
|
185
|
-
|
|
186
183
|
const instances = data && data.instances;
|
|
187
184
|
const messages = instances && instances[0] && instances[0].messages;
|
|
188
185
|
const { context, examples } = instances && instances [0] || {};
|
|
189
186
|
|
|
190
187
|
if (context) {
|
|
191
|
-
const
|
|
192
|
-
logger.info(`[chat request contains context information of length ${
|
|
193
|
-
logger.debug(`
|
|
188
|
+
const { length, units } = this.getLength(context);
|
|
189
|
+
logger.info(`[chat request contains context information of length ${length} ${units}]`)
|
|
190
|
+
logger.debug(`context: ${context}`);
|
|
194
191
|
}
|
|
195
192
|
|
|
196
193
|
if (examples && examples.length) {
|
|
197
194
|
logger.info(`[chat request contains ${examples.length} examples]`);
|
|
198
195
|
examples.forEach((example, index) => {
|
|
199
|
-
logger.debug(`
|
|
196
|
+
logger.debug(`example ${index + 1}: input: "${example.input.content}", output: "${example.output.content}"`);
|
|
200
197
|
});
|
|
201
198
|
}
|
|
202
199
|
|
|
@@ -204,10 +201,10 @@ class PalmChatPlugin extends ModelPlugin {
|
|
|
204
201
|
logger.info(`[chat request contains ${messages.length} messages]`);
|
|
205
202
|
messages.forEach((message, index) => {
|
|
206
203
|
const words = message.content.split(" ");
|
|
207
|
-
const
|
|
204
|
+
const { length, units } = this.getLength(message.content);
|
|
208
205
|
const preview = words.length < 41 ? message.content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
209
206
|
|
|
210
|
-
logger.debug(`
|
|
207
|
+
logger.debug(`message ${index + 1}: author: ${message.author}, ${units}: ${length}, content: "${preview}"`);
|
|
211
208
|
});
|
|
212
209
|
} else if (messages && messages.length === 1) {
|
|
213
210
|
logger.debug(`${messages[0].content}`);
|
|
@@ -216,8 +213,8 @@ class PalmChatPlugin extends ModelPlugin {
|
|
|
216
213
|
const safetyAttributes = this.getSafetyAttributes(responseData);
|
|
217
214
|
|
|
218
215
|
const responseText = this.parseResponse(responseData);
|
|
219
|
-
const
|
|
220
|
-
logger.info(`[response received containing ${
|
|
216
|
+
const { length, units } = this.getLength(responseText);
|
|
217
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
221
218
|
logger.debug(`${responseText}`);
|
|
222
219
|
|
|
223
220
|
if (safetyAttributes) {
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
// palmCompletionPlugin.js
|
|
2
2
|
|
|
3
3
|
import ModelPlugin from './modelPlugin.js';
|
|
4
|
-
import { encode } from 'gpt-3-encoder';
|
|
5
4
|
import logger from '../../lib/logger.js';
|
|
6
5
|
|
|
7
6
|
// PalmCompletionPlugin class for handling requests and responses to the PaLM API Text Completion API
|
|
@@ -107,22 +106,20 @@ class PalmCompletionPlugin extends ModelPlugin {
|
|
|
107
106
|
|
|
108
107
|
// Override the logging function to log the prompt and response
|
|
109
108
|
logRequestData(data, responseData, prompt) {
|
|
110
|
-
this.logAIRequestFinished();
|
|
111
|
-
|
|
112
109
|
const safetyAttributes = this.getSafetyAttributes(responseData);
|
|
113
110
|
|
|
114
111
|
const instances = data && data.instances;
|
|
115
112
|
const modelInput = instances && instances[0] && instances[0].prompt;
|
|
116
113
|
|
|
117
114
|
if (modelInput) {
|
|
118
|
-
const
|
|
119
|
-
logger.info(`[request sent containing ${
|
|
115
|
+
const { length, units } = this.getLength(modelInput);
|
|
116
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
120
117
|
logger.debug(`${modelInput}`);
|
|
121
118
|
}
|
|
122
119
|
|
|
123
120
|
const responseText = this.parseResponse(responseData);
|
|
124
|
-
const
|
|
125
|
-
logger.info(`[response received containing ${
|
|
121
|
+
const { length, units } = this.getLength(responseText);
|
|
122
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
126
123
|
logger.debug(`${responseText}`);
|
|
127
124
|
|
|
128
125
|
if (safetyAttributes) {
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import test from 'ava';
|
|
2
2
|
import { getSemanticChunks, determineTextFormat } from '../server/chunker.js';
|
|
3
|
-
|
|
4
|
-
import { encode } from 'gpt-3-encoder';
|
|
3
|
+
import { encode } from '../lib/encodeCache.js';
|
|
5
4
|
|
|
6
5
|
const testText = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.
|
|
7
6
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import { faker } from '@faker-js/faker';
|
|
3
|
+
import { performance } from 'perf_hooks';
|
|
4
|
+
import { encode, decode } from '../lib/encodeCache.js';
|
|
5
|
+
import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
|
|
6
|
+
|
|
7
|
+
// Test the accuracy of the cached encoding and decoding
|
|
8
|
+
test('cached encode and decode are reversible', t => {
|
|
9
|
+
const original = faker.lorem.paragraph(50);
|
|
10
|
+
const encoded = encode(original);
|
|
11
|
+
const decoded = decode(encoded);
|
|
12
|
+
t.is(decoded, original);
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
// Test whether the cached encoding and decoding is identical to the gpt3-encoder
|
|
16
|
+
test('cached encode and decode are identical to noncached', t => {
|
|
17
|
+
const original = faker.lorem.paragraph(50);
|
|
18
|
+
const encoded = encode(original);
|
|
19
|
+
const gpt3Encoded = gpt3Encode(original);
|
|
20
|
+
t.deepEqual(encoded, gpt3Encoded);
|
|
21
|
+
|
|
22
|
+
const decoded = decode(encoded);
|
|
23
|
+
const gpt3Decoded = gpt3Decode(encoded);
|
|
24
|
+
t.is(decoded, gpt3Decoded);
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
// Test whether decoding adds the encoded value to the encode cache
|
|
28
|
+
// the only way to tell is if the encode is faster after the cached decode
|
|
29
|
+
test('decode operation adds to encode cache', t => {
|
|
30
|
+
const original = faker.lorem.paragraph(50);
|
|
31
|
+
const encodedOriginal = gpt3Encode(original);
|
|
32
|
+
|
|
33
|
+
const startEncode = performance.now();
|
|
34
|
+
const encoded = encode(original);
|
|
35
|
+
const endEncode = performance.now();
|
|
36
|
+
const encodeTime = endEncode - startEncode;
|
|
37
|
+
console.log("pre-decode encode time", encodeTime);
|
|
38
|
+
|
|
39
|
+
t.deepEqual(encoded, encodedOriginal);
|
|
40
|
+
|
|
41
|
+
const original2 = faker.lorem.paragraph(50);
|
|
42
|
+
const encodedOriginal2 = gpt3Encode(original2);
|
|
43
|
+
const decodedOriginal2 = decode(encodedOriginal2);
|
|
44
|
+
const startEncode2 = performance.now();
|
|
45
|
+
const encoded2 = encode(original2);
|
|
46
|
+
const endEncode2 = performance.now();
|
|
47
|
+
const encodeTime2 = endEncode2 - startEncode2;
|
|
48
|
+
console.log("post-decode encode time", encodeTime2);
|
|
49
|
+
|
|
50
|
+
t.deepEqual(encoded2, encodedOriginal2);
|
|
51
|
+
t.true(encodeTime2 <= encodeTime);
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
// Test encode and decode caching
|
|
56
|
+
test('caching', t => {
|
|
57
|
+
const original = faker.lorem.paragraph(50);
|
|
58
|
+
const startEncode1 = performance.now();
|
|
59
|
+
const encoded1 = encode(original);
|
|
60
|
+
const endEncode1 = performance.now();
|
|
61
|
+
const encodeTime1 = endEncode1 - startEncode1;
|
|
62
|
+
|
|
63
|
+
const original2 = faker.lorem.paragraph(50);
|
|
64
|
+
const encodedOriginal2 = gpt3Encode(original2);
|
|
65
|
+
const startDecode1 = performance.now();
|
|
66
|
+
const decoded1 = decode(encodedOriginal2);
|
|
67
|
+
const endDecode1 = performance.now();
|
|
68
|
+
const decodeTime1 = endDecode1 - startDecode1;
|
|
69
|
+
|
|
70
|
+
t.deepEqual(encoded1, gpt3Encode(original));
|
|
71
|
+
t.is(decoded1, original2);
|
|
72
|
+
|
|
73
|
+
console.log('uncached encode time', encodeTime1);
|
|
74
|
+
console.log('uncached decode time', decodeTime1);
|
|
75
|
+
|
|
76
|
+
// Second time encoding and decoding, it should be from the cache
|
|
77
|
+
const startEncode2 = performance.now();
|
|
78
|
+
const encoded2 = encode(original);
|
|
79
|
+
const endEncode2 = performance.now();
|
|
80
|
+
const encodeTime2 = endEncode2 - startEncode2;
|
|
81
|
+
|
|
82
|
+
const startDecode2 = performance.now();
|
|
83
|
+
const decoded2 = decode(encodedOriginal2);
|
|
84
|
+
const endDecode2 = performance.now();
|
|
85
|
+
const decodeTime2 = endDecode2 - startDecode2;
|
|
86
|
+
|
|
87
|
+
console.log('cached encode time', encodeTime2);
|
|
88
|
+
console.log('cached decode time', decodeTime2);
|
|
89
|
+
|
|
90
|
+
t.true(encodeTime2 <= encodeTime1);
|
|
91
|
+
t.true(decodeTime2 <= decodeTime1);
|
|
92
|
+
});
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import { FastLRUCache } from '../lib/fastLruCache.js';
|
|
3
|
+
|
|
4
|
+
test('FastLRUCache - get and put', t => {
|
|
5
|
+
const cache = new FastLRUCache(2);
|
|
6
|
+
|
|
7
|
+
cache.put(1, 1);
|
|
8
|
+
cache.put(2, 2);
|
|
9
|
+
|
|
10
|
+
t.is(cache.get(1), 1); // returns 1
|
|
11
|
+
cache.put(3, 3); // evicts key 2
|
|
12
|
+
t.is(cache.get(2), -1); // returns -1 (not found)
|
|
13
|
+
cache.put(4, 4); // evicts key 1
|
|
14
|
+
t.is(cache.get(1), -1); // returns -1 (not found)
|
|
15
|
+
t.is(cache.get(3), 3); // returns 3
|
|
16
|
+
t.is(cache.get(4), 4); // returns 4
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('FastLRUCache - get non-existent key', t => {
|
|
20
|
+
const cache = new FastLRUCache(2);
|
|
21
|
+
t.is(cache.get(99), -1); // returns -1 (not found)
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test('FastLRUCache - update value of existing key', t => {
|
|
25
|
+
const cache = new FastLRUCache(2);
|
|
26
|
+
cache.put(1, 1);
|
|
27
|
+
cache.put(1, 100);
|
|
28
|
+
t.is(cache.get(1), 100); // returns updated value 100
|
|
29
|
+
});
|
|
@@ -37,7 +37,7 @@ test('RequestMonitor: getAverageCallDuration', async t => {
|
|
|
37
37
|
test('RequestMonitor: incrementError429Count', t => {
|
|
38
38
|
const rm = new RequestMonitor();
|
|
39
39
|
|
|
40
|
-
rm.
|
|
40
|
+
rm.incrementErrorCount(null, 429);
|
|
41
41
|
|
|
42
42
|
t.is(rm.error429Count.size(), 1);
|
|
43
43
|
});
|
|
@@ -74,7 +74,7 @@ test('RequestMonitor: getError429Rate', t => {
|
|
|
74
74
|
|
|
75
75
|
rm.startCall();
|
|
76
76
|
rm.endCall();
|
|
77
|
-
rm.
|
|
77
|
+
rm.incrementErrorCount(null, 429);
|
|
78
78
|
|
|
79
79
|
t.is(rm.getError429Rate(), 1);
|
|
80
80
|
});
|
|
@@ -84,7 +84,7 @@ test('RequestMonitor: reset', t => {
|
|
|
84
84
|
|
|
85
85
|
rm.startCall();
|
|
86
86
|
rm.endCall();
|
|
87
|
-
rm.
|
|
87
|
+
rm.incrementErrorCount(null, 429);
|
|
88
88
|
|
|
89
89
|
rm.reset();
|
|
90
90
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// ModelPlugin.test.js
|
|
2
2
|
import test from 'ava';
|
|
3
3
|
import ModelPlugin from '../server/plugins/modelPlugin.js';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../lib/encodeCache.js';
|
|
5
5
|
import { mockPathwayResolverString } from './mocks.js';
|
|
6
6
|
|
|
7
7
|
const { config, pathway, modelName, model } = mockPathwayResolverString;
|