@aj-archipelago/cortex 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/config.js CHANGED
@@ -118,7 +118,7 @@ var config = convict({
118
118
  "api-key": "{{AZURE_COGNITIVE_API_KEY}}",
119
119
  "Content-Type": "application/json"
120
120
  },
121
- "requestsPerSecond": 6
121
+ "requestsPerSecond": 10
122
122
  },
123
123
  "oai-embeddings": {
124
124
  "type": "OPENAI-EMBEDDINGS",
@@ -38,9 +38,14 @@ def transcribe(params):
38
38
  if 'word_timestamps' in params: #parse as bool
39
39
  word_timestamps = False if params['word_timestamps'] == 'False' else True
40
40
 
41
+ decode_options = {}
42
+ if 'language' in params:
43
+ decode_options["language"] = params["language"]
44
+ print(f"Transcription language set as {decode_options['language']}")
45
+
41
46
  print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
42
47
  start_time = time.time()
43
- result = model.transcribe(fileurl, word_timestamps=word_timestamps)
48
+ result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
44
49
  end_time = time.time()
45
50
  execution_time = end_time - start_time
46
51
  print("Transcribe execution time:", execution_time, "seconds")
@@ -0,0 +1,38 @@
1
+ import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
2
+ import { FastLRUCache } from './fastLruCache.js';
3
+
4
+ class EncodeCache {
5
+ constructor() {
6
+ this.encodeCache = new FastLRUCache(1000);
7
+ this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
8
+ }
9
+
10
+ encode(value) {
11
+ if (this.encodeCache.get(value) !== -1) {
12
+ return this.encodeCache.get(value);
13
+ }
14
+ const encoded = gpt3Encode(value);
15
+ this.encodeCache.put(value, encoded);
16
+ return encoded;
17
+ }
18
+
19
+ decode(value) {
20
+ if (this.decodeCache.get(value) !== -1) {
21
+ return this.decodeCache.get(value);
22
+ }
23
+ const decoded = gpt3Decode(value);
24
+ this.decodeCache.put(value, decoded);
25
+ if (this.encodeCache.get(decoded) === -1) {
26
+ this.encodeCache.put(decoded, value);
27
+ }
28
+ return decoded;
29
+ }
30
+ }
31
+
32
+ // Create one instance of the cache
33
+ const cache = new EncodeCache();
34
+
35
+ // Make sure the instance is bound to the methods, so
36
+ // references to 'this' are correct
37
+ export const encode = cache.encode.bind(cache);
38
+ export const decode = cache.decode.bind(cache);
@@ -0,0 +1,82 @@
1
+ // This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
2
+
3
+ class Node {
4
+ constructor(key, value) {
5
+ this.key = key;
6
+ this.value = value;
7
+ this.next = null;
8
+ this.prev = null;
9
+ }
10
+ }
11
+
12
+ class FastLRUCache {
13
+ constructor(capacity) {
14
+ this.capacity = capacity;
15
+ this.cache = new Map();
16
+ this.head = null;
17
+ this.tail = null;
18
+ }
19
+
20
+ get(key) {
21
+ if (!this.cache.has(key)) {
22
+ return -1;
23
+ }
24
+ const node = this.cache.get(key);
25
+ this.moveToEnd(node);
26
+ return node.value;
27
+ }
28
+
29
+ put(key, value) {
30
+ if (this.cache.has(key)) {
31
+ const node = this.cache.get(key);
32
+ node.value = value;
33
+ this.moveToEnd(node);
34
+ } else {
35
+ const node = new Node(key, value);
36
+ if (this.cache.size >= this.capacity) {
37
+ this.cache.delete(this.head.key);
38
+ this.shiftHeadToNext();
39
+ }
40
+ this.cache.set(key, node);
41
+ this.addNodeToTail(node);
42
+ }
43
+ }
44
+
45
+ addNodeToTail(node) {
46
+ if (!this.tail) {
47
+ this.head = node;
48
+ this.tail = node;
49
+ } else {
50
+ node.prev = this.tail;
51
+ this.tail.next = node;
52
+ this.tail = node;
53
+ }
54
+ }
55
+
56
+ moveToEnd(node) {
57
+ if (node === this.tail) {
58
+ return;
59
+ }
60
+ if (node === this.head) {
61
+ this.shiftHeadToNext();
62
+ } else {
63
+ node.prev.next = node.next;
64
+ node.next.prev = node.prev;
65
+ }
66
+ node.prev = this.tail;
67
+ node.next = null;
68
+ this.tail.next = node;
69
+ this.tail = node;
70
+ }
71
+
72
+ shiftHeadToNext() {
73
+ this.head = this.head.next;
74
+ if (this.head) {
75
+ this.head.prev = null;
76
+ } else {
77
+ this.tail = null;
78
+ }
79
+ }
80
+ }
81
+
82
+ export { FastLRUCache };
@@ -1,5 +1,5 @@
1
1
  // pathwayTools.js
2
- import { encode , decode } from 'gpt-3-encoder';
2
+ import { encode, decode } from '../lib/encodeCache.js';
3
3
  import { config } from '../config.js';
4
4
 
5
5
  // callPathway - call a pathway from another pathway
@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
57
57
 
58
58
  endpoint.limiter.on('failed', (error, info) => {
59
59
  if (error.name === 'CanceledError') {
60
- logger.debug(`Request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
60
+ logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
61
+ endpoint.monitor.incrementErrorCount();
61
62
  } else {
62
- logger.error(`Request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error}`);
63
+ logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
63
64
  }
64
65
  });
65
66
 
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
154
155
  });
155
156
  }
156
157
 
158
+ //log statistics about active endpoints
157
159
  setInterval(() => {
158
160
  // Iterate over each model
159
161
  for (const [name, model] of Object.entries(modelEndpoints)) {
@@ -179,30 +181,51 @@ setInterval(() => {
179
181
  endpointIndex++;
180
182
  });
181
183
  }
182
- }, 10000); // Log rates every 10 seconds (10000 ms).
184
+ }, 30000); // Log rates every 30 seconds
183
185
 
184
186
  const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
185
- return cortexAxios.post(url, data, axiosConfigObj);
187
+ const callId = endpoint?.monitor?.startCall();
188
+ let response;
189
+ try {
190
+ response = await cortexAxios.post(url, data, axiosConfigObj);
191
+ } catch (error) {
192
+ // throw new error with duration as part of the error data
193
+ throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
194
+ }
195
+ let duration;
196
+ if (response.status >= 200 && response.status < 300) {
197
+ duration = endpoint?.monitor?.endCall(callId);
198
+ } else {
199
+ duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
200
+ }
201
+
202
+ return { response, duration };
186
203
  }
187
204
 
188
205
  const MAX_RETRY = 10; // retries for error handling
189
206
  const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
190
207
  const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
191
208
 
209
+ const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
210
+ const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
211
+ const jitter = duplicateRequestTime * 0.2 * Math.random();
212
+ const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
213
+ return duplicateRequestTimeout;
214
+ }
215
+
192
216
  const postRequest = async (cortexRequest) => {
193
217
  let promises = [];
218
+ // retry certain errors up to MAX_RETRY times
194
219
  for (let i = 0; i < MAX_RETRY; i++) {
195
220
  const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
196
221
  const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
197
- let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
198
- let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
199
-
200
- if (enableDuplicateRequests) {
201
- //logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
202
- }
222
+ const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
223
+ const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
203
224
 
204
225
  const axiosConfigObj = { params, headers, cache };
205
226
  const streamRequested = (stream || params?.stream || data?.stream);
227
+ // if we're using streaming, duplicate requests are
228
+ // not supported, so we just push one promise into the array
206
229
  if (streamRequested && model.supportsStreaming) {
207
230
  axiosConfigObj.responseType = 'stream';
208
231
  promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
@@ -212,14 +235,20 @@ const postRequest = async (cortexRequest) => {
212
235
  axiosConfigObj.params.stream = false;
213
236
  data.stream = false;
214
237
  }
238
+ // if we're not streaming, we push at least one promise
239
+ // into the array, but if we're supporting duplicate
240
+ // requests we push one for each potential duplicate,
241
+ // heading to a new endpoint (if available) and
242
+ // staggered by a jittered amount of time
215
243
  const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
216
244
  promises = controllers.map((controller, index) =>
217
245
  new Promise((resolve, reject) => {
218
- const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
219
- const jitter = duplicateRequestTime * 0.2 * Math.random();
220
- const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
221
246
  setTimeout(async () => {
222
247
  try {
248
+ if (index > 0) {
249
+ cortexRequest.selectNewEndpoint();
250
+ }
251
+ const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
223
252
  const endpointName = selectedEndpoint.name || model;
224
253
  if (!selectedEndpoint.limiter) {
225
254
  throw new Error(`No limiter for endpoint ${endpointName}!`);
@@ -227,52 +256,27 @@ const postRequest = async (cortexRequest) => {
227
256
  const axiosConfigObj = { params, headers, cache };
228
257
 
229
258
  let response = null;
259
+ let duration = null;
230
260
 
231
261
  if (!controller.signal?.aborted) {
232
262
 
233
263
  axiosConfigObj.signal = controller.signal;
234
264
  axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
235
265
 
236
- if (index === 0) {
237
- //logger.info(`>>> [${requestId}] sending request to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`);
238
- } else {
239
- if (model.supportsStreaming) {
240
- axiosConfigObj.responseType = 'stream';
241
- axiosConfigObj.cache = false;
242
- }
243
- const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
266
+ if (index > 0) {
267
+ const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
244
268
  const header = '>'.repeat(logMessage.length);
245
269
  logger.info(`\n${header}\n${logMessage}`);
246
270
  }
247
271
 
248
- response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
272
+ ({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
249
273
 
250
274
  if (!controller.signal?.aborted) {
251
-
252
275
  logger.debug(`<<< [${requestId}] received response for request ${index}`);
253
-
254
- if (axiosConfigObj.responseType === 'stream') {
255
- // Buffering and collecting the stream data
256
- logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
257
- response = await new Promise((resolve, reject) => {
258
- let responseData = '';
259
- response.data.on('data', (chunk) => {
260
- responseData += chunk;
261
- logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
262
- });
263
- response.data.on('end', () => {
264
- response.data = JSON.parse(responseData);
265
- resolve(response);
266
- });
267
- response.data.on('error', (error) => {
268
- reject(error);
269
- });
270
- });
271
- }
272
276
  }
273
277
  }
274
278
 
275
- resolve(response);
279
+ resolve({ response, duration });
276
280
 
277
281
  } catch (error) {
278
282
  if (error.name === 'AbortError' || error.name === 'CanceledError') {
@@ -285,45 +289,48 @@ const postRequest = async (cortexRequest) => {
285
289
  } finally {
286
290
  controllers.forEach(controller => controller.abort());
287
291
  }
288
- }, duplicateRequestTimeout);
292
+ }, getDuplicateRequestDelay(index, duplicateRequestAfter));
289
293
  })
290
294
  );
291
295
  }
292
296
 
297
+ // no requests have been made yet, but the promises array
298
+ // is full, so now we execute them in parallel
293
299
  try {
294
- const response = await Promise.race(promises);
300
+ const { response, duration } = await Promise.race(promises);
295
301
 
296
302
  // if response status is 2xx
297
303
  if (response.status >= 200 && response.status < 300) {
298
- return response;
304
+ return { response, duration };
299
305
  } else {
300
306
  throw new Error(`Received error response: ${response.status}`);
301
307
  }
302
308
  } catch (error) {
303
- if (error.response) {
304
- selectedEndpoint.monitor.incrementErrorCount();
305
- const status = error.response.status;
306
-
307
- if (status === 429) {
308
- selectedEndpoint.monitor.incrementError429Count();
309
- }
310
-
309
+ const { response, duration } = error;
310
+ if (response) {
311
+ const status = response.status;
312
+ // if there is only one endpoint, only retry select error codes
311
313
  if (cortexRequest.model.endpoints.length === 1) {
312
- if (status !== 429) {
313
- return error.response;
314
+ if (status !== 429 &&
315
+ status !== 408 &&
316
+ status !== 502 &&
317
+ status !== 503 &&
318
+ status !== 504) {
319
+ return { response, duration };
314
320
  }
315
321
  } else {
316
- // if there are multiple endpoints, retry everything
322
+ // if there are multiple endpoints, retry everything as it
323
+ // could be going to a different host
317
324
  cortexRequest.selectNewEndpoint();
318
325
  }
319
326
 
320
- logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
327
+ logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
321
328
  if (i < MAX_RETRY - 1) {
322
329
  const backoffTime = 200 * Math.pow(2, i);
323
330
  const jitter = backoffTime * 0.2 * Math.random();
324
331
  await new Promise(r => setTimeout(r, backoffTime + jitter));
325
332
  } else {
326
- return error.response;
333
+ return { response, duration };
327
334
  }
328
335
  } else {
329
336
  throw error;
@@ -334,10 +341,7 @@ const postRequest = async (cortexRequest) => {
334
341
 
335
342
  const executeRequest = async (cortexRequest) => {
336
343
  try {
337
- const endpoint = cortexRequest.selectedEndpoint;
338
- const callId = endpoint?.monitor?.startCall();
339
- const response = await postRequest(cortexRequest);
340
- endpoint?.monitor?.endCall(callId);
344
+ const { response, duration } = await postRequest(cortexRequest);
341
345
  const requestId = cortexRequest.requestId;
342
346
  const { error, data, cached } = response;
343
347
  if (cached) {
@@ -347,8 +351,7 @@ const executeRequest = async (cortexRequest) => {
347
351
  const lastError = error[error.length - 1];
348
352
  return { error: lastError.toJSON() ?? lastError ?? error };
349
353
  }
350
- //logger.info(`<<< [${requestId}] response: ${data.choices[0].delta || data.choices[0]}`)
351
- return data;
354
+ return { data, duration };
352
355
  } catch (error) {
353
356
  logger.error(`Error in request: ${error.message || error}`);
354
357
  return { error: error };
@@ -1,5 +1,4 @@
1
1
  import { v4 as uuidv4 } from 'uuid';
2
- // eslint-disable-next-line import/no-extraneous-dependencies
3
2
  import { Deque } from '@datastructures-js/deque';
4
3
 
5
4
  class RequestMonitor {
@@ -20,6 +19,15 @@ class RequestMonitor {
20
19
  return this.healthy;
21
20
  }
22
21
 
22
+ removeOldCallStarts() {
23
+ const currentTime = new Date();
24
+ for (const [callId, startTime] of this.callStartTimes) {
25
+ if (currentTime - startTime > this.ageOutTime) {
26
+ this.callStartTimes.delete(callId);
27
+ }
28
+ }
29
+ }
30
+
23
31
  removeOldCallStats(dq, timeProperty) {
24
32
  const currentTime = new Date();
25
33
  while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
@@ -28,6 +36,7 @@ class RequestMonitor {
28
36
  }
29
37
 
30
38
  maintain() {
39
+ this.removeOldCallStarts();
31
40
  this.removeOldCallStats(this.callCount);
32
41
  if (this.callCount.size() === 0) {
33
42
  this.peakCallRate = 0;
@@ -36,7 +45,7 @@ class RequestMonitor {
36
45
  this.removeOldCallStats(this.error429Count);
37
46
  this.removeOldCallStats(this.errorCount);
38
47
 
39
- if (this.getErrorRate() > 0.3) {
48
+ if (this.getErrorRate() > 0.1) {
40
49
  this.healthy = false;
41
50
  } else {
42
51
  this.healthy = true;
@@ -55,10 +64,11 @@ class RequestMonitor {
55
64
  endCall(callId) {
56
65
  const endTime = new Date();
57
66
  const startTime = this.callStartTimes.get(callId);
67
+ let callDuration = null;
58
68
 
59
69
  if (startTime) {
70
+ callDuration = (endTime - startTime);
60
71
  this.callStartTimes.delete(callId);
61
- const callDuration = endTime - startTime;
62
72
  this.callDurations.pushBack({endTime, callDuration});
63
73
 
64
74
  // Keep the callDurations length to 5
@@ -73,6 +83,7 @@ class RequestMonitor {
73
83
  }
74
84
 
75
85
  this.maintain();
86
+ return callDuration;
76
87
  }
77
88
 
78
89
  getAverageCallDuration() {
@@ -84,14 +95,13 @@ class RequestMonitor {
84
95
  return sum / this.callDurations.size();
85
96
  }
86
97
 
87
- incrementError429Count() {
88
- this.error429Count.pushBack(new Date());
89
- this.maintain();
90
- }
91
-
92
- incrementErrorCount() {
98
+ incrementErrorCount(callId, status) {
93
99
  this.errorCount.pushBack(new Date());
100
+ if (status === 429) {
101
+ this.error429Count.pushBack(new Date());
102
+ }
94
103
  this.maintain();
104
+ return callId ? this.endCall(callId) : null;
95
105
  }
96
106
 
97
107
  getCallRate() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.1.5",
3
+ "version": "1.1.6",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -60,6 +60,7 @@
60
60
  "ws": "^8.12.0"
61
61
  },
62
62
  "devDependencies": {
63
+ "@faker-js/faker": "^8.4.1",
63
64
  "ava": "^5.2.0",
64
65
  "dotenv": "^16.0.3",
65
66
  "eslint": "^8.38.0",
@@ -14,19 +14,21 @@ export default {
14
14
  typeDef,
15
15
  rootResolver,
16
16
  resolver,
17
- inputFormat: 'text', // text or html - changes the behavior of the input chunking
17
+ inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
18
18
  useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
19
19
  useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
20
+ joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
20
21
  useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
21
22
  truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
22
23
  timeout: 120, // seconds, cancels the pathway after this many seconds
24
+ enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
23
25
  duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
24
26
  // override the default execution of the pathway
25
- // callback signature: excuteOverride({args: object, runAllPrompts: function})
27
+ // callback signature: executeOverride({args: object, runAllPrompts: function})
26
28
  // args: the input arguments to the pathway
27
29
  // runAllPrompts: a function that runs all prompts in the pathway and returns the result
28
30
  executePathway: undefined,
29
31
  // Set the temperature to 0 to favor more deterministic output when generating entity extraction.
30
- temperature: undefined,
32
+ temperature: 0.9,
31
33
  };
32
34
 
package/server/chunker.js CHANGED
@@ -1,4 +1,4 @@
1
- import { encode, decode } from 'gpt-3-encoder';
1
+ import { encode, decode } from '../lib/encodeCache.js';
2
2
  import cheerio from 'cheerio';
3
3
 
4
4
  const getLastNToken = (text, maxTokenLen) => {
package/server/graphql.js CHANGED
@@ -131,7 +131,7 @@ const build = async (config) => {
131
131
 
132
132
  const app = express();
133
133
 
134
- app.use(express.json({ limit: '50mb' }));
134
+ app.use(express.json({ limit: '200mb' }));
135
135
 
136
136
  const httpServer = http.createServer(app);
137
137
 
@@ -2,7 +2,7 @@ import { ModelExecutor } from './modelExecutor.js';
2
2
  import { modelEndpoints } from '../lib/requestExecutor.js';
3
3
  // eslint-disable-next-line import/no-extraneous-dependencies
4
4
  import { v4 as uuidv4 } from 'uuid';
5
- import { encode } from 'gpt-3-encoder';
5
+ import { encode } from '../lib/encodeCache.js';
6
6
  import { getFirstNToken, getLastNToken, getSemanticChunks } from './chunker.js';
7
7
  import { PathwayResponseParser } from './pathwayResponseParser.js';
8
8
  import { Prompt } from './prompt.js';
@@ -339,7 +339,7 @@ class PathwayResolver {
339
339
  const data = await Promise.all(chunks.map(chunk =>
340
340
  this.applyPromptsSerially(chunk, parameters)));
341
341
  // Join the chunks with newlines
342
- return data.join("\n\n");
342
+ return data.join(this.pathway.joinChunksWith || "\n\n");
343
343
  } else {
344
344
  // Apply prompts one by one, serially, across all chunks
345
345
  // This is the default processing mode and will make previousResult available at the object level
@@ -373,7 +373,7 @@ class PathwayResolver {
373
373
  if (result.length === 1) {
374
374
  result = result[0];
375
375
  } else if (!currentParameters.stream) {
376
- result = result.join("\n\n");
376
+ result = result.join(this.pathway.joinChunksWith || "\n\n");
377
377
  }
378
378
  }
379
379
 
@@ -6,6 +6,7 @@ import path from 'path';
6
6
  import { config } from '../../config.js';
7
7
  import { axios } from '../../lib/requestExecutor.js';
8
8
  import logger from '../../lib/logger.js';
9
+ import { getSemanticChunks } from '../chunker.js';
9
10
 
10
11
  const API_URL = config.get('whisperMediaApiUrl');
11
12
 
@@ -37,7 +38,8 @@ class AzureCognitivePlugin extends ModelPlugin {
37
38
  const data = {};
38
39
 
39
40
  if (mode == 'delete') {
40
- const searchUrl = this.ensureMode(this.requestUrl(text), 'search');
41
+ let searchUrl = this.ensureMode(this.requestUrl(text), 'search');
42
+ searchUrl = this.ensureIndex(searchUrl, indexName);
41
43
  let searchQuery = `owner:${savedContextId}`;
42
44
 
43
45
  if (docId) {
@@ -155,6 +157,7 @@ class AzureCognitivePlugin extends ModelPlugin {
155
157
  const headers = cortexRequest.headers;
156
158
 
157
159
  const { file } = parameters;
160
+ const fileData = { value: [] };
158
161
  if(file){
159
162
  let url = file;
160
163
  //if not txt file, use helper app to convert to txt
@@ -177,11 +180,13 @@ class AzureCognitivePlugin extends ModelPlugin {
177
180
  throw Error(`No data can be extracted out of file!`);
178
181
  }
179
182
 
180
- return await callPathway('cognitive_insert', {...parameters, file:null, text:data });
181
- }
183
+ const chunkTokenLength = this.promptParameters.inputChunkSize || 1000;
184
+ const chunks = getSemanticChunks(data, chunkTokenLength);
182
185
 
183
- if (mode === 'index' && (!text || !text.trim()) ){
184
- return; // nothing to index
186
+ for (const text of chunks) {
187
+ const { data: singleData } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest)
188
+ fileData.value.push(singleData.value[0]);
189
+ }
185
190
  }
186
191
 
187
192
  const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
@@ -195,7 +200,7 @@ class AzureCognitivePlugin extends ModelPlugin {
195
200
 
196
201
  // execute the request
197
202
  cortexRequest.url = url;
198
- cortexRequest.data = data;
203
+ cortexRequest.data = (mode === 'index' && fileData.value.length>0) ? fileData : data;
199
204
  cortexRequest.params = params;
200
205
  cortexRequest.headers = headers;
201
206
  const result = await this.executeRequest(cortexRequest);
@@ -45,8 +45,6 @@ class AzureTranslatePlugin extends ModelPlugin {
45
45
 
46
46
  // Override the logging function to display the request and response
47
47
  logRequestData(data, responseData, prompt) {
48
- this.logAIRequestFinished();
49
-
50
48
  const modelInput = data[0].Text;
51
49
 
52
50
  logger.debug(`${modelInput}`);
@@ -1,6 +1,5 @@
1
1
  // geminiChatPlugin.js
2
2
  import ModelPlugin from './modelPlugin.js';
3
- import { encode } from 'gpt-3-encoder';
4
3
  import logger from '../../lib/logger.js';
5
4
 
6
5
  const mergeResults = (data) => {
@@ -148,8 +147,6 @@ class GeminiChatPlugin extends ModelPlugin {
148
147
 
149
148
  // Override the logging function to display the messages and responses
150
149
  logRequestData(data, responseData, prompt) {
151
- this.logAIRequestFinished();
152
-
153
150
  const messages = data && data.contents;
154
151
 
155
152
  if (messages && messages.length > 1) {
@@ -162,10 +159,10 @@ class GeminiChatPlugin extends ModelPlugin {
162
159
  return acc;
163
160
  } , '');
164
161
  const words = messageContent.split(" ");
165
- const tokenCount = encode(messageContent).length;
162
+ const { length, units } = this.getLength(messageContent);
166
163
  const preview = words.length < 41 ? messageContent : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
167
164
 
168
- logger.debug(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
165
+ logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
169
166
  });
170
167
  } else if (messages && messages.length === 1) {
171
168
  logger.debug(`${messages[0].parts[0].text}`);
@@ -180,8 +177,8 @@ class GeminiChatPlugin extends ModelPlugin {
180
177
  logger.warn(`!!! response was blocked because the input or response potentially violates policies`);
181
178
  logger.debug(`Safety Ratings: ${JSON.stringify(safetyRatings, null, 2)}`);
182
179
  }
183
- const responseTokens = encode(mergedResult).length;
184
- logger.info(`[response received containing ${responseTokens} tokens]`);
180
+ const { length, units } = this.getLength(mergedResult);
181
+ logger.info(`[response received containing ${length} ${units}]`);
185
182
  logger.debug(`${mergedResult}`);
186
183
  }
187
184
 
@@ -1,7 +1,7 @@
1
1
  // localModelPlugin.js
2
2
  import ModelPlugin from './modelPlugin.js';
3
3
  import { execFileSync } from 'child_process';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../../lib/encodeCache.js';
5
5
  import logger from '../../lib/logger.js';
6
6
 
7
7
  class LocalModelPlugin extends ModelPlugin {
@@ -1,7 +1,7 @@
1
1
  // ModelPlugin.js
2
2
  import HandleBars from '../../lib/handleBars.js';
3
3
  import { executeRequest } from '../../lib/requestExecutor.js';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../../lib/encodeCache.js';
5
5
  import { getFirstNToken } from '../chunker.js';
6
6
  import logger, { obscureUrlParams } from '../../lib/logger.js';
7
7
  import { config } from '../../config.js';
@@ -32,7 +32,6 @@ class ModelPlugin {
32
32
  }
33
33
 
34
34
  this.requestCount = 0;
35
- this.lastRequestStartTime = new Date();
36
35
  }
37
36
 
38
37
  truncateMessagesToTargetLength(messages, targetTokenLength) {
@@ -221,7 +220,6 @@ class ModelPlugin {
221
220
  // Default simple logging
222
221
  logRequestStart() {
223
222
  this.requestCount++;
224
- this.lastRequestStartTime = new Date();
225
223
  const logMessage = `>>> [${this.requestId}: ${this.pathwayName}.${this.requestCount}] request`;
226
224
  const header = '>'.repeat(logMessage.length);
227
225
  logger.info(`${header}`);
@@ -229,28 +227,32 @@ class ModelPlugin {
229
227
  logger.info(`>>> Making API request to ${obscureUrlParams(this.url)}`);
230
228
  }
231
229
 
232
- logAIRequestFinished() {
233
- const currentTime = new Date();
234
- const timeElapsed = (currentTime - this.lastRequestStartTime) / 1000;
235
- const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${timeElapsed}s - data:`;
230
+ logAIRequestFinished(requestDuration) {
231
+ const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${requestDuration}ms - data:`;
236
232
  const header = '<'.repeat(logMessage.length);
237
233
  logger.info(`${header}`);
238
234
  logger.info(`${logMessage}`);
239
235
  }
240
236
 
237
+ getLength(data) {
238
+ const isProd = config.get('env') === 'production';
239
+ const length = isProd ? data.length : encode(data).length;
240
+ const units = isProd ? 'characters' : 'tokens';
241
+ return {length, units};
242
+ }
243
+
241
244
  logRequestData(data, responseData, prompt) {
242
- this.logAIRequestFinished();
243
245
  const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
244
246
 
245
247
  if (modelInput) {
246
- const inputTokens = encode(modelInput).length;
247
- logger.info(`[request sent containing ${inputTokens} tokens]`);
248
+ const { length, units } = this.getLength(modelInput);
249
+ logger.info(`[request sent containing ${length} ${units}]`);
248
250
  logger.debug(`${modelInput}`);
249
251
  }
250
252
 
251
- const responseText = JSON.stringify(this.parseResponse(responseData));
252
- const responseTokens = encode(responseText).length;
253
- logger.info(`[response received containing ${responseTokens} tokens]`);
253
+ const responseText = JSON.stringify(responseData);
254
+ const { length, units } = this.getLength(responseText);
255
+ logger.info(`[response received containing ${length} ${units}]`);
254
256
  logger.debug(`${responseText}`);
255
257
 
256
258
  prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
@@ -267,16 +269,18 @@ class ModelPlugin {
267
269
  cortexRequest.cache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
268
270
  this.logRequestStart();
269
271
 
270
- const responseData = await executeRequest(cortexRequest);
272
+ const { data: responseData, duration: requestDuration } = await executeRequest(cortexRequest);
271
273
 
272
- let errorData = Array.isArray(responseData) ? responseData[0] : responseData;
273
-
274
+ const errorData = Array.isArray(responseData) ? responseData[0] : responseData;
274
275
  if (errorData && errorData.error) {
275
276
  throw new Error(`Server error: ${JSON.stringify(errorData.error)}`);
276
277
  }
277
278
 
278
- this.logRequestData(data, responseData, prompt);
279
- return this.parseResponse(responseData);
279
+ this.logAIRequestFinished(requestDuration);
280
+ const parsedData = this.parseResponse(responseData);
281
+ this.logRequestData(data, parsedData, prompt);
282
+
283
+ return parsedData;
280
284
  } catch (error) {
281
285
  // Log the error and continue
282
286
  logger.error(error.message || error);
@@ -1,6 +1,5 @@
1
1
  // OpenAIChatPlugin.js
2
2
  import ModelPlugin from './modelPlugin.js';
3
- import { encode } from 'gpt-3-encoder';
4
3
  import logger from '../../lib/logger.js';
5
4
 
6
5
  class OpenAIChatPlugin extends ModelPlugin {
@@ -105,28 +104,28 @@ class OpenAIChatPlugin extends ModelPlugin {
105
104
 
106
105
  // Override the logging function to display the messages and responses
107
106
  logRequestData(data, responseData, prompt) {
108
- this.logAIRequestFinished();
109
-
110
107
  const { stream, messages } = data;
111
108
  if (messages && messages.length > 1) {
112
109
  logger.info(`[chat request sent containing ${messages.length} messages]`);
113
- let totalTokens = 0;
110
+ let totalLength = 0;
111
+ let totalUnits;
114
112
  messages.forEach((message, index) => {
115
113
  //message.content string or array
116
114
  const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
117
115
  const words = content.split(" ");
118
- const tokenCount = encode(content).length;
116
+ const { length, units } = this.getLength(content);
119
117
  const preview = words.length < 41 ? content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
120
118
 
121
- logger.debug(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
122
- totalTokens += tokenCount;
119
+ logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
120
+ totalLength += length;
121
+ totalUnits = units;
123
122
  });
124
- logger.info(`[chat request contained ${totalTokens} tokens]`);
123
+ logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
125
124
  } else {
126
125
  const message = messages[0];
127
126
  const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
128
- const tokenCount = encode(content).length;
129
- logger.info(`[request sent containing ${tokenCount} tokens]`);
127
+ const { length, units } = this.getLength(content);
128
+ logger.info(`[request sent containing ${length} ${units}]`);
130
129
  logger.debug(`${content}`);
131
130
  }
132
131
 
@@ -134,8 +133,8 @@ class OpenAIChatPlugin extends ModelPlugin {
134
133
  logger.info(`[response received as an SSE stream]`);
135
134
  } else {
136
135
  const responseText = this.parseResponse(responseData);
137
- const responseTokens = encode(responseText).length;
138
- logger.info(`[response received containing ${responseTokens} tokens]`);
136
+ const { length, units } = this.getLength(responseText);
137
+ logger.info(`[response received containing ${length} ${units}]`);
139
138
  logger.debug(`${responseText}`);
140
139
  }
141
140
 
@@ -1,7 +1,7 @@
1
1
  // OpenAICompletionPlugin.js
2
2
 
3
3
  import ModelPlugin from './modelPlugin.js';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../../lib/encodeCache.js';
5
5
  import logger from '../../lib/logger.js';
6
6
 
7
7
  // Helper function to truncate the prompt if it is too long
@@ -104,21 +104,20 @@ class OpenAICompletionPlugin extends ModelPlugin {
104
104
 
105
105
  // Override the logging function to log the prompt and response
106
106
  logRequestData(data, responseData, prompt) {
107
- this.logAIRequestFinished();
108
-
109
107
  const stream = data.stream;
110
108
  const modelInput = data.prompt;
111
109
 
112
- const modelInputTokens = encode(modelInput).length;
113
- logger.info(`[request sent containing ${modelInputTokens} tokens]`);
110
+ const { length, units } = this.getLength(modelInput);
111
+
112
+ logger.info(`[request sent containing ${length} ${units}]`);
114
113
  logger.debug(`${modelInput}`);
115
114
 
116
115
  if (stream) {
117
116
  logger.info(`[response received as an SSE stream]`);
118
117
  } else {
119
118
  const responseText = this.parseResponse(responseData);
120
- const responseTokens = encode(responseText).length;
121
- logger.info(`[response received containing ${responseTokens} tokens]`);
119
+ const { length, units } = this.getLength(responseText);
120
+ logger.info(`[response received containing ${length} ${units}]`);
122
121
  logger.debug(`${responseText}`);
123
122
  }
124
123
 
@@ -201,6 +201,9 @@ class OpenAIWhisperPlugin extends ModelPlugin {
201
201
  const processTS = async (uri) => {
202
202
  try {
203
203
  const tsparams = { fileurl:uri };
204
+
205
+ const { language } = parameters;
206
+ if(language) tsparams.language = language;
204
207
  if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
205
208
  if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
206
209
  if(maxLineCount) tsparams.max_line_count = maxLineCount;
@@ -1,6 +1,5 @@
1
1
  // palmChatPlugin.js
2
2
  import ModelPlugin from './modelPlugin.js';
3
- import { encode } from 'gpt-3-encoder';
4
3
  import HandleBars from '../../lib/handleBars.js';
5
4
  import logger from '../../lib/logger.js';
6
5
 
@@ -181,22 +180,20 @@ class PalmChatPlugin extends ModelPlugin {
181
180
 
182
181
  // Override the logging function to display the messages and responses
183
182
  logRequestData(data, responseData, prompt) {
184
- this.logAIRequestFinished();
185
-
186
183
  const instances = data && data.instances;
187
184
  const messages = instances && instances[0] && instances[0].messages;
188
185
  const { context, examples } = instances && instances [0] || {};
189
186
 
190
187
  if (context) {
191
- const contextLength = encode(context).length;
192
- logger.info(`[chat request contains context information of length ${contextLength} tokens]`)
193
- logger.debug(`Context: ${context}`);
188
+ const { length, units } = this.getLength(context);
189
+ logger.info(`[chat request contains context information of length ${length} ${units}]`)
190
+ logger.debug(`context: ${context}`);
194
191
  }
195
192
 
196
193
  if (examples && examples.length) {
197
194
  logger.info(`[chat request contains ${examples.length} examples]`);
198
195
  examples.forEach((example, index) => {
199
- logger.debug(`Example ${index + 1}: Input: "${example.input.content}", Output: "${example.output.content}"`);
196
+ logger.debug(`example ${index + 1}: input: "${example.input.content}", output: "${example.output.content}"`);
200
197
  });
201
198
  }
202
199
 
@@ -204,10 +201,10 @@ class PalmChatPlugin extends ModelPlugin {
204
201
  logger.info(`[chat request contains ${messages.length} messages]`);
205
202
  messages.forEach((message, index) => {
206
203
  const words = message.content.split(" ");
207
- const tokenCount = encode(message.content).length;
204
+ const { length, units } = this.getLength(message.content);
208
205
  const preview = words.length < 41 ? message.content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
209
206
 
210
- logger.debug(`Message ${index + 1}: Author: ${message.author}, Tokens: ${tokenCount}, Content: "${preview}"`);
207
+ logger.debug(`message ${index + 1}: author: ${message.author}, ${units}: ${length}, content: "${preview}"`);
211
208
  });
212
209
  } else if (messages && messages.length === 1) {
213
210
  logger.debug(`${messages[0].content}`);
@@ -216,8 +213,8 @@ class PalmChatPlugin extends ModelPlugin {
216
213
  const safetyAttributes = this.getSafetyAttributes(responseData);
217
214
 
218
215
  const responseText = this.parseResponse(responseData);
219
- const responseTokens = encode(responseText).length;
220
- logger.info(`[response received containing ${responseTokens} tokens]`);
216
+ const { length, units } = this.getLength(responseText);
217
+ logger.info(`[response received containing ${length} ${units}]`);
221
218
  logger.debug(`${responseText}`);
222
219
 
223
220
  if (safetyAttributes) {
@@ -1,7 +1,6 @@
1
1
  // palmCompletionPlugin.js
2
2
 
3
3
  import ModelPlugin from './modelPlugin.js';
4
- import { encode } from 'gpt-3-encoder';
5
4
  import logger from '../../lib/logger.js';
6
5
 
7
6
  // PalmCompletionPlugin class for handling requests and responses to the PaLM API Text Completion API
@@ -107,22 +106,20 @@ class PalmCompletionPlugin extends ModelPlugin {
107
106
 
108
107
  // Override the logging function to log the prompt and response
109
108
  logRequestData(data, responseData, prompt) {
110
- this.logAIRequestFinished();
111
-
112
109
  const safetyAttributes = this.getSafetyAttributes(responseData);
113
110
 
114
111
  const instances = data && data.instances;
115
112
  const modelInput = instances && instances[0] && instances[0].prompt;
116
113
 
117
114
  if (modelInput) {
118
- const inputTokens = encode(modelInput).length;
119
- logger.info(`[request sent containing ${inputTokens} tokens]`);
115
+ const { length, units } = this.getLength(modelInput);
116
+ logger.info(`[request sent containing ${length} ${units}]`);
120
117
  logger.debug(`${modelInput}`);
121
118
  }
122
119
 
123
120
  const responseText = this.parseResponse(responseData);
124
- const responseTokens = encode(responseText).length;
125
- logger.info(`[response received containing ${responseTokens} tokens]`);
121
+ const { length, units } = this.getLength(responseText);
122
+ logger.info(`[response received containing ${length} ${units}]`);
126
123
  logger.debug(`${responseText}`);
127
124
 
128
125
  if (safetyAttributes) {
@@ -1,7 +1,6 @@
1
1
  import test from 'ava';
2
2
  import { getSemanticChunks, determineTextFormat } from '../server/chunker.js';
3
-
4
- import { encode } from 'gpt-3-encoder';
3
+ import { encode } from '../lib/encodeCache.js';
5
4
 
6
5
  const testText = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.
7
6
 
@@ -0,0 +1,92 @@
1
+ import test from 'ava';
2
+ import { faker } from '@faker-js/faker';
3
+ import { performance } from 'perf_hooks';
4
+ import { encode, decode } from '../lib/encodeCache.js';
5
+ import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
6
+
7
+ // Test the accuracy of the cached encoding and decoding
8
+ test('cached encode and decode are reversible', t => {
9
+ const original = faker.lorem.paragraph(50);
10
+ const encoded = encode(original);
11
+ const decoded = decode(encoded);
12
+ t.is(decoded, original);
13
+ })
14
+
15
+ // Test whether the cached encoding and decoding is identical to the gpt3-encoder
16
+ test('cached encode and decode are identical to noncached', t => {
17
+ const original = faker.lorem.paragraph(50);
18
+ const encoded = encode(original);
19
+ const gpt3Encoded = gpt3Encode(original);
20
+ t.deepEqual(encoded, gpt3Encoded);
21
+
22
+ const decoded = decode(encoded);
23
+ const gpt3Decoded = gpt3Decode(encoded);
24
+ t.is(decoded, gpt3Decoded);
25
+ })
26
+
27
+ // Test whether decoding adds the encoded value to the encode cache
28
+ // the only way to tell is if the encode is faster after the cached decode
29
+ test('decode operation adds to encode cache', t => {
30
+ const original = faker.lorem.paragraph(50);
31
+ const encodedOriginal = gpt3Encode(original);
32
+
33
+ const startEncode = performance.now();
34
+ const encoded = encode(original);
35
+ const endEncode = performance.now();
36
+ const encodeTime = endEncode - startEncode;
37
+ console.log("pre-decode encode time", encodeTime);
38
+
39
+ t.deepEqual(encoded, encodedOriginal);
40
+
41
+ const original2 = faker.lorem.paragraph(50);
42
+ const encodedOriginal2 = gpt3Encode(original2);
43
+ const decodedOriginal2 = decode(encodedOriginal2);
44
+ const startEncode2 = performance.now();
45
+ const encoded2 = encode(original2);
46
+ const endEncode2 = performance.now();
47
+ const encodeTime2 = endEncode2 - startEncode2;
48
+ console.log("post-decode encode time", encodeTime2);
49
+
50
+ t.deepEqual(encoded2, encodedOriginal2);
51
+ t.true(encodeTime2 <= encodeTime);
52
+ })
53
+
54
+
55
+ // Test encode and decode caching
56
+ test('caching', t => {
57
+ const original = faker.lorem.paragraph(50);
58
+ const startEncode1 = performance.now();
59
+ const encoded1 = encode(original);
60
+ const endEncode1 = performance.now();
61
+ const encodeTime1 = endEncode1 - startEncode1;
62
+
63
+ const original2 = faker.lorem.paragraph(50);
64
+ const encodedOriginal2 = gpt3Encode(original2);
65
+ const startDecode1 = performance.now();
66
+ const decoded1 = decode(encodedOriginal2);
67
+ const endDecode1 = performance.now();
68
+ const decodeTime1 = endDecode1 - startDecode1;
69
+
70
+ t.deepEqual(encoded1, gpt3Encode(original));
71
+ t.is(decoded1, original2);
72
+
73
+ console.log('uncached encode time', encodeTime1);
74
+ console.log('uncached decode time', decodeTime1);
75
+
76
+ // Second time encoding and decoding, it should be from the cache
77
+ const startEncode2 = performance.now();
78
+ const encoded2 = encode(original);
79
+ const endEncode2 = performance.now();
80
+ const encodeTime2 = endEncode2 - startEncode2;
81
+
82
+ const startDecode2 = performance.now();
83
+ const decoded2 = decode(encodedOriginal2);
84
+ const endDecode2 = performance.now();
85
+ const decodeTime2 = endDecode2 - startDecode2;
86
+
87
+ console.log('cached encode time', encodeTime2);
88
+ console.log('cached decode time', decodeTime2);
89
+
90
+ t.true(encodeTime2 <= encodeTime1);
91
+ t.true(decodeTime2 <= decodeTime1);
92
+ });
@@ -0,0 +1,29 @@
1
+ import test from 'ava';
2
+ import { FastLRUCache } from '../lib/fastLruCache.js';
3
+
4
+ test('FastLRUCache - get and put', t => {
5
+ const cache = new FastLRUCache(2);
6
+
7
+ cache.put(1, 1);
8
+ cache.put(2, 2);
9
+
10
+ t.is(cache.get(1), 1); // returns 1
11
+ cache.put(3, 3); // evicts key 2
12
+ t.is(cache.get(2), -1); // returns -1 (not found)
13
+ cache.put(4, 4); // evicts key 1
14
+ t.is(cache.get(1), -1); // returns -1 (not found)
15
+ t.is(cache.get(3), 3); // returns 3
16
+ t.is(cache.get(4), 4); // returns 4
17
+ });
18
+
19
+ test('FastLRUCache - get non-existent key', t => {
20
+ const cache = new FastLRUCache(2);
21
+ t.is(cache.get(99), -1); // returns -1 (not found)
22
+ });
23
+
24
+ test('FastLRUCache - update value of existing key', t => {
25
+ const cache = new FastLRUCache(2);
26
+ cache.put(1, 1);
27
+ cache.put(1, 100);
28
+ t.is(cache.get(1), 100); // returns updated value 100
29
+ });
@@ -37,7 +37,7 @@ test('RequestMonitor: getAverageCallDuration', async t => {
37
37
  test('RequestMonitor: incrementError429Count', t => {
38
38
  const rm = new RequestMonitor();
39
39
 
40
- rm.incrementError429Count();
40
+ rm.incrementErrorCount(null, 429);
41
41
 
42
42
  t.is(rm.error429Count.size(), 1);
43
43
  });
@@ -74,7 +74,7 @@ test('RequestMonitor: getError429Rate', t => {
74
74
 
75
75
  rm.startCall();
76
76
  rm.endCall();
77
- rm.incrementError429Count();
77
+ rm.incrementErrorCount(null, 429);
78
78
 
79
79
  t.is(rm.getError429Rate(), 1);
80
80
  });
@@ -84,7 +84,7 @@ test('RequestMonitor: reset', t => {
84
84
 
85
85
  rm.startCall();
86
86
  rm.endCall();
87
- rm.incrementError429Count();
87
+ rm.incrementErrorCount(null, 429);
88
88
 
89
89
  rm.reset();
90
90
 
@@ -1,7 +1,7 @@
1
1
  // ModelPlugin.test.js
2
2
  import test from 'ava';
3
3
  import ModelPlugin from '../server/plugins/modelPlugin.js';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../lib/encodeCache.js';
5
5
  import { mockPathwayResolverString } from './mocks.js';
6
6
 
7
7
  const { config, pathway, modelName, model } = mockPathwayResolverString;