npm - @aj-archipelago/cortex - Versions diffs - 1.1.5 → 1.1.6 - Mend

@aj-archipelago/cortex 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/config.js +1 -1
package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
package/lib/encodeCache.js +38 -0
package/lib/fastLruCache.js +82 -0
package/lib/pathwayTools.js +1 -1
package/lib/requestExecutor.js +68 -65
package/lib/requestMonitor.js +19 -9
package/package.json +2 -1
package/pathways/basePathway.js +5 -3
package/server/chunker.js +1 -1
package/server/graphql.js +1 -1
package/server/pathwayResolver.js +3 -3
package/server/plugins/azureCognitivePlugin.js +11 -6
package/server/plugins/azureTranslatePlugin.js +0 -2
package/server/plugins/geminiChatPlugin.js +4 -7
package/server/plugins/localModelPlugin.js +1 -1
package/server/plugins/modelPlugin.js +22 -18
package/server/plugins/openAiChatPlugin.js +11 -12
package/server/plugins/openAiCompletionPlugin.js +6 -7
package/server/plugins/openAiWhisperPlugin.js +3 -0
package/server/plugins/palmChatPlugin.js +8 -11
package/server/plugins/palmCompletionPlugin.js +4 -7
package/tests/chunkfunction.test.js +1 -2
package/tests/encodeCache.test.js +92 -0
package/tests/fastLruCache.test.js +29 -0
package/tests/requestMonitor.test.js +3 -3
package/tests/truncateMessages.test.js +1 -1

package/config.js CHANGED Viewed

@@ -118,7 +118,7 @@ var config = convict({
                     "api-key": "{{AZURE_COGNITIVE_API_KEY}}",
                     "Content-Type": "application/json"
                 },
-                "requestsPerSecond": 6
+                "requestsPerSecond": 10
             },
             "oai-embeddings": {
                 "type": "OPENAI-EMBEDDINGS",

package/helper-apps/cortex-whisper-wrapper/app.py CHANGED Viewed

@@ -38,9 +38,14 @@ def transcribe(params):
     if 'word_timestamps' in params: #parse as bool
         word_timestamps = False if params['word_timestamps'] == 'False' else True
+    decode_options = {}
+    if 'language' in params:
+        decode_options["language"] = params["language"]
+        print(f"Transcription language set as {decode_options['language']}")
     print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
     start_time = time.time()
-    result = model.transcribe(fileurl, word_timestamps=word_timestamps)
+    result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
     end_time = time.time()
     execution_time = end_time - start_time
     print("Transcribe execution time:", execution_time, "seconds")

package/lib/encodeCache.js ADDED Viewed

@@ -0,0 +1,38 @@
+import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
+import { FastLRUCache } from './fastLruCache.js';
+class EncodeCache {
+    constructor() {
+        this.encodeCache = new FastLRUCache(1000);
+        this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
+    }
+    encode(value) {
+        if (this.encodeCache.get(value) !== -1) {
+            return this.encodeCache.get(value);
+        }
+        const encoded = gpt3Encode(value);
+        this.encodeCache.put(value, encoded);
+        return encoded;
+    }
+    decode(value) {
+        if (this.decodeCache.get(value) !== -1) {
+            return this.decodeCache.get(value);
+        }
+        const decoded = gpt3Decode(value);
+        this.decodeCache.put(value, decoded);
+        if (this.encodeCache.get(decoded) === -1) {
+            this.encodeCache.put(decoded, value);
+        }
+        return decoded;
+    }
+}
+// Create one instance of the cache
+const cache = new EncodeCache();
+// Make sure the instance is bound to the methods, so
+// references to 'this' are correct
+export const encode = cache.encode.bind(cache);
+export const decode = cache.decode.bind(cache);

package/lib/fastLruCache.js ADDED Viewed

@@ -0,0 +1,82 @@
+// This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
+class Node {
+    constructor(key, value) {
+        this.key = key;
+        this.value = value;
+        this.next = null;
+        this.prev = null;
+    }
+}
+class FastLRUCache {
+    constructor(capacity) {
+        this.capacity = capacity;
+        this.cache = new Map();
+        this.head = null;
+        this.tail = null;
+    }
+    get(key) {
+        if (!this.cache.has(key)) {
+            return -1;
+        }
+        const node = this.cache.get(key);
+        this.moveToEnd(node);
+        return node.value;
+    }
+    put(key, value) {
+        if (this.cache.has(key)) {
+            const node = this.cache.get(key);
+            node.value = value;
+            this.moveToEnd(node);
+        } else {
+            const node = new Node(key, value);
+            if (this.cache.size >= this.capacity) {
+                this.cache.delete(this.head.key);
+                this.shiftHeadToNext();
+            }
+            this.cache.set(key, node);
+            this.addNodeToTail(node);
+        }
+    }
+    addNodeToTail(node) {
+        if (!this.tail) {
+            this.head = node;
+            this.tail = node;
+        } else {
+            node.prev = this.tail;
+            this.tail.next = node;
+            this.tail = node;
+        }
+    }
+    moveToEnd(node) {
+        if (node === this.tail) {
+            return;
+        }
+        if (node === this.head) {
+            this.shiftHeadToNext();
+        } else {
+            node.prev.next = node.next;
+            node.next.prev = node.prev;
+        }
+        node.prev = this.tail;
+        node.next = null;
+        this.tail.next = node;
+        this.tail = node;
+    }
+    shiftHeadToNext() {
+        this.head = this.head.next;
+        if (this.head) {
+            this.head.prev = null;
+        } else {
+            this.tail = null;
+        }
+    }
+}
+export { FastLRUCache };

package/lib/pathwayTools.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // pathwayTools.js
-import { encode , decode } from 'gpt-3-encoder';
+import { encode, decode } from '../lib/encodeCache.js';
 import { config } from '../config.js';
 // callPathway - call a pathway from another pathway

package/lib/requestExecutor.js CHANGED Viewed

@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
     endpoint.limiter.on('failed', (error, info) => {
         if (error.name === 'CanceledError') {
-            logger.debug(`Request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
+            logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
+            endpoint.monitor.incrementErrorCount();
         } else {
-            logger.error(`Request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error}`);
+            logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
         }
     });
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
     });
 }
+//log statistics about active endpoints
 setInterval(() => {
   // Iterate over each model
   for (const [name, model] of Object.entries(modelEndpoints)) {
@@ -179,30 +181,51 @@ setInterval(() => {
         endpointIndex++;
     });
   }
-}, 10000); // Log rates every 10 seconds (10000 ms).
+}, 30000); // Log rates every 30 seconds
 const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
-    return cortexAxios.post(url, data, axiosConfigObj);
+    const callId = endpoint?.monitor?.startCall();
+    let response;
+    try {
+        response = await cortexAxios.post(url, data, axiosConfigObj);
+    } catch (error) {
+        // throw new error with duration as part of the error data
+        throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
+    }
+    let duration;
+    if (response.status >= 200 && response.status < 300) {
+        duration = endpoint?.monitor?.endCall(callId);
+    } else {
+        duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
+    }
+    return { response, duration };
 }
 const MAX_RETRY = 10; // retries for error handling
 const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
 const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
+const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
+    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
+    const jitter = duplicateRequestTime * 0.2 * Math.random();
+    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
+    return duplicateRequestTimeout;
+}
 const postRequest = async (cortexRequest) => {
     let promises = [];
+    // retry certain errors up to MAX_RETRY times
     for (let i = 0; i < MAX_RETRY; i++) {
         const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
         const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
-        let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
-        let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
-        if (enableDuplicateRequests) {
-            //logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
-        }
+        const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
+        const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
         const axiosConfigObj = { params, headers, cache };
         const streamRequested = (stream || params?.stream || data?.stream);
+        // if we're using streaming, duplicate requests are
+        // not supported, so we just push one promise into the array
         if (streamRequested && model.supportsStreaming) {
             axiosConfigObj.responseType = 'stream';
             promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
@@ -212,14 +235,20 @@ const postRequest = async (cortexRequest) => {
                 axiosConfigObj.params.stream = false;
                 data.stream = false;
             }
+            // if we're not streaming, we push at least one promise
+            // into the array, but if we're supporting duplicate
+            // requests we push one for each potential duplicate,
+            // heading to a new endpoint (if available) and
+            // staggered by a jittered amount of time
             const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
             promises = controllers.map((controller, index) =>
                 new Promise((resolve, reject) => {
-                    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
-                    const jitter = duplicateRequestTime * 0.2 * Math.random();
-                    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
                     setTimeout(async () => {
                         try {
+                            if (index > 0) {
+                                cortexRequest.selectNewEndpoint();
+                            }
+                            const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
                             const endpointName = selectedEndpoint.name || model;
                             if (!selectedEndpoint.limiter) {
                                 throw new Error(`No limiter for endpoint ${endpointName}!`);
@@ -227,52 +256,27 @@ const postRequest = async (cortexRequest) => {
                             const axiosConfigObj = { params, headers, cache };
                             let response = null;
+                            let duration = null;
                             if (!controller.signal?.aborted) {
                                 axiosConfigObj.signal = controller.signal;
                                 axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
-                                if (index === 0) {
-                                    //logger.info(`>>> [${requestId}] sending request to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`);
-                                } else {
-                                    if (model.supportsStreaming) {
-                                        axiosConfigObj.responseType = 'stream';
-                                        axiosConfigObj.cache = false;
-                                    }
-                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
+                                if (index > 0) {
+                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
                                     const header = '>'.repeat(logMessage.length);
                                     logger.info(`\n${header}\n${logMessage}`);
                                 }
-                                response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
+                                ({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
                                 if (!controller.signal?.aborted) {
                                     logger.debug(`<<< [${requestId}] received response for request ${index}`);
-                                    if (axiosConfigObj.responseType === 'stream') {
-                                        // Buffering and collecting the stream data
-                                        logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
-                                        response = await new Promise((resolve, reject) => {
-                                            let responseData = '';
-                                            response.data.on('data', (chunk) => {
-                                                responseData += chunk;
-                                                logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
-                                            });
-                                            response.data.on('end', () => {
-                                                response.data = JSON.parse(responseData);
-                                                resolve(response);
-                                            });
-                                            response.data.on('error', (error) => {
-                                                reject(error);
-                                            });
-                                        });
-                                    }
                                 }
                             }
-                            resolve(response);
+                            resolve({ response, duration });
                         } catch (error) {
                             if (error.name === 'AbortError' || error.name === 'CanceledError') {
@@ -285,45 +289,48 @@ const postRequest = async (cortexRequest) => {
                         } finally {
                             controllers.forEach(controller => controller.abort());
                         }
-                    }, duplicateRequestTimeout);
+                    }, getDuplicateRequestDelay(index, duplicateRequestAfter));
                 })
             );
         }
+        // no requests have been made yet, but the promises array
+        // is full, so now we execute them in parallel
         try {
-            const response = await Promise.race(promises);
+            const { response, duration } = await Promise.race(promises);
             // if response status is 2xx
             if (response.status >= 200 && response.status < 300) {
-                return response;
+                return { response, duration };
             } else {
                 throw new Error(`Received error response: ${response.status}`);
             }
         } catch (error) {
-            if (error.response) {
-                selectedEndpoint.monitor.incrementErrorCount();
-                const status = error.response.status;
-                if (status === 429) {
-                    selectedEndpoint.monitor.incrementError429Count();
-                }
+            const { response, duration } = error;
+            if (response) {
+                const status = response.status;
+                // if there is only one endpoint, only retry select error codes
                 if (cortexRequest.model.endpoints.length === 1) {
-                    if (status !== 429) {
-                        return error.response;
+                    if (status !== 429 &&
+                        status !== 408 &&
+                        status !== 502 &&
+                        status !== 503 &&
+                        status !== 504) {
+                        return { response, duration };
                     }
                 } else {
-                    // if there are multiple endpoints, retry everything
+                    // if there are multiple endpoints, retry everything as it
+                    // could be going to a different host
                     cortexRequest.selectNewEndpoint();
                 }
-                logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
+                logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
                 if (i < MAX_RETRY - 1) {
                     const backoffTime = 200 * Math.pow(2, i);
                     const jitter = backoffTime * 0.2 * Math.random();
                     await new Promise(r => setTimeout(r, backoffTime + jitter));
                 } else {
-                    return error.response;
+                    return { response, duration };
                 }
             } else {
                 throw error;
@@ -334,10 +341,7 @@ const postRequest = async (cortexRequest) => {
 const executeRequest = async (cortexRequest) => {
     try {
-        const endpoint = cortexRequest.selectedEndpoint;
-        const callId = endpoint?.monitor?.startCall();
-        const response = await postRequest(cortexRequest);
-        endpoint?.monitor?.endCall(callId);
+        const { response, duration } = await postRequest(cortexRequest);
         const requestId = cortexRequest.requestId;
         const { error, data, cached } = response;
         if (cached) {
@@ -347,8 +351,7 @@ const executeRequest = async (cortexRequest) => {
             const lastError = error[error.length - 1];
             return { error: lastError.toJSON() ?? lastError ?? error };
         }
-        //logger.info(`<<< [${requestId}] response: ${data.choices[0].delta || data.choices[0]}`)
-        return data;
+        return { data, duration };
     } catch (error) {
         logger.error(`Error in request: ${error.message || error}`);
         return { error: error };

package/lib/requestMonitor.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { v4 as uuidv4 } from 'uuid';
-// eslint-disable-next-line import/no-extraneous-dependencies
 import { Deque } from '@datastructures-js/deque';
 class RequestMonitor {
@@ -20,6 +19,15 @@ class RequestMonitor {
     return this.healthy;
   }
+  removeOldCallStarts() {
+    const currentTime = new Date();
+    for (const [callId, startTime] of this.callStartTimes) {
+      if (currentTime - startTime > this.ageOutTime) {
+        this.callStartTimes.delete(callId);
+      }
+    }
+  }
   removeOldCallStats(dq, timeProperty) {
     const currentTime = new Date();
     while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front())  > this.ageOutTime) {
@@ -28,6 +36,7 @@ class RequestMonitor {
   }
   maintain() {
+    this.removeOldCallStarts();
     this.removeOldCallStats(this.callCount);
     if (this.callCount.size() === 0) {
       this.peakCallRate = 0;
@@ -36,7 +45,7 @@ class RequestMonitor {
     this.removeOldCallStats(this.error429Count);
     this.removeOldCallStats(this.errorCount);
-    if (this.getErrorRate() > 0.3) {
+    if (this.getErrorRate() > 0.1) {
       this.healthy = false;
     } else {
       this.healthy = true;
@@ -55,10 +64,11 @@ class RequestMonitor {
   endCall(callId) {
     const endTime = new Date();
     const startTime = this.callStartTimes.get(callId);
+    let callDuration = null;
     if (startTime) {
+      callDuration = (endTime - startTime);
       this.callStartTimes.delete(callId);
-      const callDuration = endTime - startTime;
       this.callDurations.pushBack({endTime, callDuration});
       // Keep the callDurations length to 5
@@ -73,6 +83,7 @@ class RequestMonitor {
     }
     this.maintain();
+    return callDuration;
   }
   getAverageCallDuration() {
@@ -84,14 +95,13 @@ class RequestMonitor {
     return sum / this.callDurations.size();
   }
-  incrementError429Count() {
-    this.error429Count.pushBack(new Date());
-    this.maintain();
-  }
-  incrementErrorCount() {
+  incrementErrorCount(callId, status) {
     this.errorCount.pushBack(new Date());
+    if (status === 429) {
+      this.error429Count.pushBack(new Date());
+    }
     this.maintain();
+    return callId ? this.endCall(callId) : null;
   }
   getCallRate() {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.1.5",
+  "version": "1.1.6",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -60,6 +60,7 @@
     "ws": "^8.12.0"
   },
   "devDependencies": {
+    "@faker-js/faker": "^8.4.1",
     "ava": "^5.2.0",
     "dotenv": "^16.0.3",
     "eslint": "^8.38.0",

package/pathways/basePathway.js CHANGED Viewed

@@ -14,19 +14,21 @@ export default {
     typeDef,
     rootResolver,
     resolver,
-    inputFormat: 'text', // text or html - changes the behavior of the input chunking
+    inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
     useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
     useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
+    joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
     useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
     truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
     timeout: 120, // seconds, cancels the pathway after this many seconds
+    enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
     duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
     // override the default execution of the pathway
-    // callback signature: excuteOverride({args: object, runAllPrompts: function})
+    // callback signature: executeOverride({args: object, runAllPrompts: function})
     // args: the input arguments to the pathway
     // runAllPrompts: a function that runs all prompts in the pathway and returns the result
     executePathway: undefined,
     // Set the temperature to 0 to favor more deterministic output when generating entity extraction.
-    temperature: undefined,
+    temperature: 0.9,
 };

package/server/chunker.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { encode, decode } from 'gpt-3-encoder';
+import { encode, decode } from '../lib/encodeCache.js';
 import cheerio from 'cheerio';
 const getLastNToken = (text, maxTokenLen) => {

package/server/graphql.js CHANGED Viewed

@@ -131,7 +131,7 @@ const build = async (config) => {
     const app = express();
-    app.use(express.json({ limit: '50mb' }));
+    app.use(express.json({ limit: '200mb' }));
     const httpServer = http.createServer(app);

package/server/pathwayResolver.js CHANGED Viewed

@@ -2,7 +2,7 @@ import { ModelExecutor } from './modelExecutor.js';
 import { modelEndpoints } from '../lib/requestExecutor.js';
 // eslint-disable-next-line import/no-extraneous-dependencies
 import { v4 as uuidv4 } from 'uuid';
-import { encode } from 'gpt-3-encoder';
+import { encode } from '../lib/encodeCache.js';
 import { getFirstNToken, getLastNToken, getSemanticChunks } from './chunker.js';
 import { PathwayResponseParser } from './pathwayResponseParser.js';
 import { Prompt } from './prompt.js';
@@ -339,7 +339,7 @@ class PathwayResolver {
             const data = await Promise.all(chunks.map(chunk =>
                 this.applyPromptsSerially(chunk, parameters)));
             // Join the chunks with newlines
-            return data.join("\n\n");
+            return data.join(this.pathway.joinChunksWith || "\n\n");
         } else {
             // Apply prompts one by one, serially, across all chunks
             // This is the default processing mode and will make previousResult available at the object level
@@ -373,7 +373,7 @@ class PathwayResolver {
                     if (result.length === 1) {
                         result = result[0];
                     } else if (!currentParameters.stream) {
-                        result = result.join("\n\n");
+                        result = result.join(this.pathway.joinChunksWith || "\n\n");
                     }
                 }

package/server/plugins/azureCognitivePlugin.js CHANGED Viewed

@@ -6,6 +6,7 @@ import path from 'path';
 import { config } from '../../config.js';
 import { axios } from '../../lib/requestExecutor.js';
 import logger from '../../lib/logger.js';
+import { getSemanticChunks } from '../chunker.js';
 const API_URL = config.get('whisperMediaApiUrl');
@@ -37,7 +38,8 @@ class AzureCognitivePlugin extends ModelPlugin {
         const data = {};
         if (mode == 'delete') {
-            const searchUrl = this.ensureMode(this.requestUrl(text), 'search');
+            let searchUrl = this.ensureMode(this.requestUrl(text), 'search');
+            searchUrl = this.ensureIndex(searchUrl, indexName);
             let searchQuery = `owner:${savedContextId}`;
             if (docId) {
@@ -155,6 +157,7 @@ class AzureCognitivePlugin extends ModelPlugin {
         const headers = cortexRequest.headers;
         const { file } = parameters;
+        const fileData = { value: [] };
         if(file){
             let url = file;
             //if not txt file, use helper app to convert to txt
@@ -177,11 +180,13 @@ class AzureCognitivePlugin extends ModelPlugin {
                 throw Error(`No data can be extracted out of file!`);
             }
-            return await callPathway('cognitive_insert', {...parameters, file:null, text:data });
-        }
+            const chunkTokenLength = this.promptParameters.inputChunkSize || 1000;
+            const chunks = getSemanticChunks(data, chunkTokenLength);
-        if (mode === 'index' && (!text || !text.trim()) ){
-            return; // nothing to index
+            for (const text of chunks) {
+                const { data: singleData } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest)
+                fileData.value.push(singleData.value[0]);
+            }
         }
         const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
@@ -195,7 +200,7 @@ class AzureCognitivePlugin extends ModelPlugin {
         // execute the request
         cortexRequest.url = url;
-        cortexRequest.data = data;
+        cortexRequest.data = (mode === 'index' && fileData.value.length>0) ? fileData : data;
         cortexRequest.params = params;
         cortexRequest.headers = headers;
         const result = await this.executeRequest(cortexRequest);

package/server/plugins/azureTranslatePlugin.js CHANGED Viewed

@@ -45,8 +45,6 @@ class AzureTranslatePlugin extends ModelPlugin {
     // Override the logging function to display the request and response
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const modelInput = data[0].Text;
         logger.debug(`${modelInput}`);

package/server/plugins/geminiChatPlugin.js CHANGED Viewed

@@ -1,6 +1,5 @@
 // geminiChatPlugin.js
 import ModelPlugin from './modelPlugin.js';
-import { encode } from 'gpt-3-encoder';
 import logger from '../../lib/logger.js';
 const mergeResults = (data) => {
@@ -148,8 +147,6 @@ class GeminiChatPlugin extends ModelPlugin {
     // Override the logging function to display the messages and responses
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const messages = data && data.contents;
         if (messages && messages.length > 1) {
@@ -162,10 +159,10 @@ class GeminiChatPlugin extends ModelPlugin {
                     return acc;
                 } , '');
                 const words = messageContent.split(" ");
-                const tokenCount = encode(messageContent).length;
+                const { length, units } = this.getLength(messageContent);
                 const preview = words.length < 41 ? messageContent : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
-                logger.debug(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
+                logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
             });
         } else if (messages && messages.length === 1) {
             logger.debug(`${messages[0].parts[0].text}`);
@@ -180,8 +177,8 @@ class GeminiChatPlugin extends ModelPlugin {
                 logger.warn(`!!! response was blocked because the input or response potentially violates policies`);
                 logger.debug(`Safety Ratings: ${JSON.stringify(safetyRatings, null, 2)}`);
             }
-            const responseTokens = encode(mergedResult).length;
-            logger.info(`[response received containing ${responseTokens} tokens]`);
+            const { length, units } = this.getLength(mergedResult);
+            logger.info(`[response received containing ${length} ${units}]`);
             logger.debug(`${mergedResult}`);
         }

package/server/plugins/localModelPlugin.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // localModelPlugin.js
 import ModelPlugin from './modelPlugin.js';
 import { execFileSync } from 'child_process';
-import { encode } from 'gpt-3-encoder';
+import { encode } from '../../lib/encodeCache.js';
 import logger from '../../lib/logger.js';
 class LocalModelPlugin extends ModelPlugin {

package/server/plugins/modelPlugin.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // ModelPlugin.js
 import HandleBars from '../../lib/handleBars.js';
 import { executeRequest } from '../../lib/requestExecutor.js';
-import { encode } from 'gpt-3-encoder';
+import { encode } from '../../lib/encodeCache.js';
 import { getFirstNToken } from '../chunker.js';
 import logger, { obscureUrlParams } from '../../lib/logger.js';
 import { config } from '../../config.js';
@@ -32,7 +32,6 @@ class ModelPlugin {
         }
         this.requestCount = 0;
-        this.lastRequestStartTime = new Date();
     }
     truncateMessagesToTargetLength(messages, targetTokenLength) {
@@ -221,7 +220,6 @@ class ModelPlugin {
     // Default simple logging
     logRequestStart() {
         this.requestCount++;
-        this.lastRequestStartTime = new Date();
         const logMessage = `>>> [${this.requestId}: ${this.pathwayName}.${this.requestCount}] request`;
         const header = '>'.repeat(logMessage.length);
         logger.info(`${header}`);
@@ -229,28 +227,32 @@ class ModelPlugin {
         logger.info(`>>> Making API request to ${obscureUrlParams(this.url)}`);
     }
-    logAIRequestFinished() {
-        const currentTime = new Date();
-        const timeElapsed = (currentTime - this.lastRequestStartTime) / 1000;
-        const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${timeElapsed}s - data:`;
+    logAIRequestFinished(requestDuration) {
+        const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${requestDuration}ms - data:`;
         const header = '<'.repeat(logMessage.length);
         logger.info(`${header}`);
         logger.info(`${logMessage}`);
     }
+    getLength(data) {
+        const isProd = config.get('env') === 'production';
+        const length = isProd ? data.length : encode(data).length;
+        const units = isProd ? 'characters' : 'tokens';
+        return {length, units};
+    }
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
         if (modelInput) {
-            const inputTokens = encode(modelInput).length;
-            logger.info(`[request sent containing ${inputTokens} tokens]`);
+            const { length, units } = this.getLength(modelInput);
+            logger.info(`[request sent containing ${length} ${units}]`);
             logger.debug(`${modelInput}`);
         }
-        const responseText = JSON.stringify(this.parseResponse(responseData));
-        const responseTokens = encode(responseText).length;
-        logger.info(`[response received containing ${responseTokens} tokens]`);
+        const responseText = JSON.stringify(responseData);
+        const { length, units } = this.getLength(responseText);
+        logger.info(`[response received containing ${length} ${units}]`);
         logger.debug(`${responseText}`);
         prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
@@ -267,16 +269,18 @@ class ModelPlugin {
             cortexRequest.cache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
             this.logRequestStart();
-            const responseData = await executeRequest(cortexRequest);
+            const { data: responseData, duration: requestDuration } = await executeRequest(cortexRequest);
-            let errorData = Array.isArray(responseData) ? responseData[0] : responseData;
+            const errorData = Array.isArray(responseData) ? responseData[0] : responseData;
             if (errorData && errorData.error) {
                 throw new Error(`Server error: ${JSON.stringify(errorData.error)}`);
             }
-            this.logRequestData(data, responseData, prompt);
-            return this.parseResponse(responseData);
+            this.logAIRequestFinished(requestDuration);
+            const parsedData = this.parseResponse(responseData);
+            this.logRequestData(data, parsedData, prompt);
+            return parsedData;
         } catch (error) {
             // Log the error and continue
             logger.error(error.message || error);

package/server/plugins/openAiChatPlugin.js CHANGED Viewed

@@ -1,6 +1,5 @@
 // OpenAIChatPlugin.js
 import ModelPlugin from './modelPlugin.js';
-import { encode } from 'gpt-3-encoder';
 import logger from '../../lib/logger.js';
 class OpenAIChatPlugin extends ModelPlugin {
@@ -105,28 +104,28 @@ class OpenAIChatPlugin extends ModelPlugin {
     // Override the logging function to display the messages and responses
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const { stream, messages } = data;
         if (messages && messages.length > 1) {
             logger.info(`[chat request sent containing ${messages.length} messages]`);
-            let totalTokens = 0;
+            let totalLength = 0;
+            let totalUnits;
             messages.forEach((message, index) => {
                 //message.content string or array
                 const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
                 const words = content.split(" ");
-                const tokenCount = encode(content).length;
+                const { length, units } = this.getLength(content);
                 const preview = words.length < 41 ? content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
-                logger.debug(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
-                totalTokens += tokenCount;
+                logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
+                totalLength += length;
+                totalUnits = units;
             });
-            logger.info(`[chat request contained ${totalTokens} tokens]`);
+            logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
         } else {
             const message = messages[0];
             const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
-            const tokenCount = encode(content).length;
-            logger.info(`[request sent containing ${tokenCount} tokens]`);
+            const { length, units } = this.getLength(content);
+            logger.info(`[request sent containing ${length} ${units}]`);
             logger.debug(`${content}`);
         }
@@ -134,8 +133,8 @@ class OpenAIChatPlugin extends ModelPlugin {
             logger.info(`[response received as an SSE stream]`);
         } else {
             const responseText = this.parseResponse(responseData);
-            const responseTokens = encode(responseText).length;
-            logger.info(`[response received containing ${responseTokens} tokens]`);
+            const { length, units } = this.getLength(responseText);
+            logger.info(`[response received containing ${length} ${units}]`);
             logger.debug(`${responseText}`);
         }

package/server/plugins/openAiCompletionPlugin.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // OpenAICompletionPlugin.js
 import ModelPlugin from './modelPlugin.js';
-import { encode } from 'gpt-3-encoder';
+import { encode } from '../../lib/encodeCache.js';
 import logger from '../../lib/logger.js';
 // Helper function to truncate the prompt if it is too long
@@ -104,21 +104,20 @@ class OpenAICompletionPlugin extends ModelPlugin {
     // Override the logging function to log the prompt and response
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const stream = data.stream;
         const modelInput = data.prompt;
-        const modelInputTokens = encode(modelInput).length;
-        logger.info(`[request sent containing ${modelInputTokens} tokens]`);
+        const { length, units } = this.getLength(modelInput);
+        logger.info(`[request sent containing ${length} ${units}]`);
         logger.debug(`${modelInput}`);
         if (stream) {
             logger.info(`[response received as an SSE stream]`);
         } else {
             const responseText = this.parseResponse(responseData);
-            const responseTokens = encode(responseText).length;
-            logger.info(`[response received containing ${responseTokens} tokens]`);
+            const { length, units } = this.getLength(responseText);
+            logger.info(`[response received containing ${length} ${units}]`);
             logger.debug(`${responseText}`);
         }

package/server/plugins/openAiWhisperPlugin.js CHANGED Viewed

@@ -201,6 +201,9 @@ class OpenAIWhisperPlugin extends ModelPlugin {
         const processTS = async (uri) => {
             try {
                 const tsparams = { fileurl:uri };
+                const { language } = parameters;
+                if(language) tsparams.language = language;
                 if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
                 if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
                 if(maxLineCount) tsparams.max_line_count = maxLineCount;

package/server/plugins/palmChatPlugin.js CHANGED Viewed

@@ -1,6 +1,5 @@
 // palmChatPlugin.js
 import ModelPlugin from './modelPlugin.js';
-import { encode } from 'gpt-3-encoder';
 import HandleBars from '../../lib/handleBars.js';
 import logger from '../../lib/logger.js';
@@ -181,22 +180,20 @@ class PalmChatPlugin extends ModelPlugin {
     // Override the logging function to display the messages and responses
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const instances = data && data.instances;
         const messages = instances && instances[0] && instances[0].messages;
         const { context, examples } = instances && instances [0] || {};
         if (context) {
-            const contextLength = encode(context).length;
-            logger.info(`[chat request contains context information of length ${contextLength} tokens]`)
-            logger.debug(`Context: ${context}`);
+            const { length, units } = this.getLength(context);
+            logger.info(`[chat request contains context information of length ${length} ${units}]`)
+            logger.debug(`context: ${context}`);
         }
         if (examples && examples.length) {
             logger.info(`[chat request contains ${examples.length} examples]`);
             examples.forEach((example, index) => {
-                logger.debug(`Example ${index + 1}: Input: "${example.input.content}", Output: "${example.output.content}"`);
+                logger.debug(`example ${index + 1}: input: "${example.input.content}", output: "${example.output.content}"`);
             });
         }
@@ -204,10 +201,10 @@ class PalmChatPlugin extends ModelPlugin {
             logger.info(`[chat request contains ${messages.length} messages]`);
             messages.forEach((message, index) => {
                 const words = message.content.split(" ");
-                const tokenCount = encode(message.content).length;
+                const { length, units } = this.getLength(message.content);
                 const preview = words.length < 41 ? message.content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
-                logger.debug(`Message ${index + 1}: Author: ${message.author}, Tokens: ${tokenCount}, Content: "${preview}"`);
+                logger.debug(`message ${index + 1}: author: ${message.author}, ${units}: ${length}, content: "${preview}"`);
             });
         } else if (messages && messages.length === 1) {
             logger.debug(`${messages[0].content}`);
@@ -216,8 +213,8 @@ class PalmChatPlugin extends ModelPlugin {
         const safetyAttributes = this.getSafetyAttributes(responseData);
         const responseText = this.parseResponse(responseData);
-        const responseTokens = encode(responseText).length;
-        logger.info(`[response received containing ${responseTokens} tokens]`);
+        const { length, units } = this.getLength(responseText);
+        logger.info(`[response received containing ${length} ${units}]`);
         logger.debug(`${responseText}`);
         if (safetyAttributes) {

package/server/plugins/palmCompletionPlugin.js CHANGED Viewed

@@ -1,7 +1,6 @@
 // palmCompletionPlugin.js
 import ModelPlugin from './modelPlugin.js';
-import { encode } from 'gpt-3-encoder';
 import logger from '../../lib/logger.js';
 // PalmCompletionPlugin class for handling requests and responses to the PaLM API Text Completion API
@@ -107,22 +106,20 @@ class PalmCompletionPlugin extends ModelPlugin {
     // Override the logging function to log the prompt and response
     logRequestData(data, responseData, prompt) {
-        this.logAIRequestFinished();
         const safetyAttributes = this.getSafetyAttributes(responseData);
         const instances = data && data.instances;
         const modelInput = instances && instances[0] && instances[0].prompt;
         if (modelInput) {
-            const inputTokens = encode(modelInput).length;
-            logger.info(`[request sent containing ${inputTokens} tokens]`);
+            const { length, units } = this.getLength(modelInput);
+            logger.info(`[request sent containing ${length} ${units}]`);
             logger.debug(`${modelInput}`);
         }
         const responseText = this.parseResponse(responseData);
-        const responseTokens = encode(responseText).length;
-        logger.info(`[response received containing ${responseTokens} tokens]`);
+        const { length, units } = this.getLength(responseText);
+        logger.info(`[response received containing ${length} ${units}]`);
         logger.debug(`${responseText}`);
         if (safetyAttributes) {

package/tests/chunkfunction.test.js CHANGED Viewed

@@ -1,7 +1,6 @@
 import test from 'ava';
 import { getSemanticChunks, determineTextFormat } from '../server/chunker.js';
-import { encode } from 'gpt-3-encoder';
+import { encode } from '../lib/encodeCache.js';
 const testText = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id erat sem. Phasellus ac dapibus purus, in fermentum nunc. Mauris quis rutrum magna. Quisque rutrum, augue vel blandit posuere, augue magna convallis turpis, nec elementum augue mauris sit amet nunc. Aenean sit amet leo est. Nunc ante ex, blandit et felis ut, iaculis lacinia est. Phasellus dictum orci id libero ullamcorper tempor.

package/tests/encodeCache.test.js ADDED Viewed

@@ -0,0 +1,92 @@
+import test from 'ava';
+import { faker } from '@faker-js/faker';
+import { performance } from 'perf_hooks';
+import { encode, decode } from '../lib/encodeCache.js';
+import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
+// Test the accuracy of the cached encoding and decoding
+test('cached encode and decode are reversible', t => {
+    const original = faker.lorem.paragraph(50);
+    const encoded = encode(original);
+    const decoded = decode(encoded);
+    t.is(decoded, original);
+})
+// Test whether the cached encoding and decoding is identical to the gpt3-encoder
+test('cached encode and decode are identical to noncached', t => {
+    const original = faker.lorem.paragraph(50);
+    const encoded = encode(original);
+    const gpt3Encoded = gpt3Encode(original);
+    t.deepEqual(encoded, gpt3Encoded);
+    const decoded = decode(encoded);
+    const gpt3Decoded = gpt3Decode(encoded);
+    t.is(decoded, gpt3Decoded);
+})
+// Test whether decoding adds the encoded value to the encode cache
+// the only way to tell is if the encode is faster after the cached decode
+test('decode operation adds to encode cache', t => {
+    const original = faker.lorem.paragraph(50);
+    const encodedOriginal = gpt3Encode(original);
+    const startEncode = performance.now();
+    const encoded = encode(original);
+    const endEncode = performance.now();
+    const encodeTime = endEncode - startEncode;
+    console.log("pre-decode encode time", encodeTime);
+    t.deepEqual(encoded, encodedOriginal);
+    const original2 = faker.lorem.paragraph(50);
+    const encodedOriginal2 = gpt3Encode(original2);
+    const decodedOriginal2 = decode(encodedOriginal2);
+    const startEncode2 = performance.now();
+    const encoded2 = encode(original2);
+    const endEncode2 = performance.now();
+    const encodeTime2 = endEncode2 - startEncode2;
+    console.log("post-decode encode time", encodeTime2);
+    t.deepEqual(encoded2, encodedOriginal2);
+    t.true(encodeTime2 <= encodeTime);
+})
+// Test encode and decode caching
+test('caching', t => {
+    const original = faker.lorem.paragraph(50);
+    const startEncode1 = performance.now();
+    const encoded1 = encode(original);
+    const endEncode1 = performance.now();
+    const encodeTime1 = endEncode1 - startEncode1;
+    const original2 = faker.lorem.paragraph(50);
+    const encodedOriginal2 = gpt3Encode(original2);
+    const startDecode1 = performance.now();
+    const decoded1 = decode(encodedOriginal2);
+    const endDecode1 = performance.now();
+    const decodeTime1 = endDecode1 - startDecode1;
+    t.deepEqual(encoded1, gpt3Encode(original));
+    t.is(decoded1, original2);
+    console.log('uncached encode time', encodeTime1);
+    console.log('uncached decode time', decodeTime1);
+    // Second time encoding and decoding, it should be from the cache
+    const startEncode2 = performance.now();
+    const encoded2 = encode(original);
+    const endEncode2 = performance.now();
+    const encodeTime2 = endEncode2 - startEncode2;
+    const startDecode2 = performance.now();
+    const decoded2 = decode(encodedOriginal2);
+    const endDecode2 = performance.now();
+    const decodeTime2 = endDecode2 - startDecode2;
+    console.log('cached encode time', encodeTime2);
+    console.log('cached decode time', decodeTime2);
+    t.true(encodeTime2 <= encodeTime1);
+    t.true(decodeTime2 <= decodeTime1);
+});

package/tests/fastLruCache.test.js ADDED Viewed

@@ -0,0 +1,29 @@
+import test from 'ava';
+import { FastLRUCache } from '../lib/fastLruCache.js';
+test('FastLRUCache - get and put', t => {
+    const cache = new FastLRUCache(2);
+    cache.put(1, 1);
+    cache.put(2, 2);
+    t.is(cache.get(1), 1); // returns 1
+    cache.put(3, 3); // evicts key 2
+    t.is(cache.get(2), -1); // returns -1 (not found)
+    cache.put(4, 4); // evicts key 1
+    t.is(cache.get(1), -1); // returns -1 (not found)
+    t.is(cache.get(3), 3); // returns 3
+    t.is(cache.get(4), 4); // returns 4
+});
+test('FastLRUCache - get non-existent key', t => {
+    const cache = new FastLRUCache(2);
+    t.is(cache.get(99), -1); // returns -1 (not found)
+});
+test('FastLRUCache - update value of existing key', t => {
+    const cache = new FastLRUCache(2);
+    cache.put(1, 1);
+    cache.put(1, 100);
+    t.is(cache.get(1), 100); // returns updated value 100
+});

package/tests/requestMonitor.test.js CHANGED Viewed

@@ -37,7 +37,7 @@ test('RequestMonitor: getAverageCallDuration', async t => {
 test('RequestMonitor: incrementError429Count', t => {
   const rm = new RequestMonitor();
-  rm.incrementError429Count();
+  rm.incrementErrorCount(null, 429);
   t.is(rm.error429Count.size(), 1);
 });
@@ -74,7 +74,7 @@ test('RequestMonitor: getError429Rate', t => {
   rm.startCall();
   rm.endCall();
-  rm.incrementError429Count();
+  rm.incrementErrorCount(null, 429);
   t.is(rm.getError429Rate(), 1);
 });
@@ -84,7 +84,7 @@ test('RequestMonitor: reset', t => {
   rm.startCall();
   rm.endCall();
-  rm.incrementError429Count();
+  rm.incrementErrorCount(null, 429);
   rm.reset();

package/tests/truncateMessages.test.js CHANGED Viewed

@@ -1,7 +1,7 @@
 // ModelPlugin.test.js
 import test from 'ava';
 import ModelPlugin from '../server/plugins/modelPlugin.js';
-import { encode } from 'gpt-3-encoder';
+import { encode } from '../lib/encodeCache.js';
 import { mockPathwayResolverString } from './mocks.js';
 const { config, pathway, modelName, model } = mockPathwayResolverString;