npm - @aj-archipelago/cortex - Versions diffs - 1.1.4 → 1.1.6 - Mend

@aj-archipelago/cortex 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/config.js +3 -3
package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
package/lib/cortexRequest.js +11 -1
package/lib/encodeCache.js +38 -0
package/lib/fastLruCache.js +82 -0
package/lib/pathwayTools.js +1 -1
package/lib/requestExecutor.js +71 -68
package/lib/requestMonitor.js +19 -9
package/package.json +3 -1
package/pathways/basePathway.js +5 -3
package/pathways/bias.js +1 -1
package/pathways/cognitive_insert.js +1 -1
package/server/chunker.js +1 -1
package/server/graphql.js +2 -0
package/server/modelExecutor.js +8 -0
package/server/pathwayResolver.js +26 -8
package/server/plugins/azureCognitivePlugin.js +11 -6
package/server/plugins/azureTranslatePlugin.js +0 -2
package/server/plugins/geminiChatPlugin.js +192 -0
package/server/plugins/geminiVisionPlugin.js +102 -0
package/server/plugins/localModelPlugin.js +1 -1
package/server/plugins/modelPlugin.js +24 -19
package/server/plugins/openAiChatPlugin.js +11 -12
package/server/plugins/openAiCompletionPlugin.js +6 -7
package/server/plugins/openAiEmbeddingsPlugin.js +3 -1
package/server/plugins/openAiWhisperPlugin.js +3 -0
package/server/plugins/palmChatPlugin.js +8 -11
package/server/plugins/palmCompletionPlugin.js +4 -7
package/server/rest.js +11 -5
package/tests/chunkfunction.test.js +1 -2
package/tests/encodeCache.test.js +92 -0
package/tests/fastLruCache.test.js +29 -0
package/tests/requestMonitor.test.js +3 -3
package/tests/truncateMessages.test.js +1 -1

package/config.js CHANGED Viewed

@@ -118,13 +118,13 @@ var config = convict({
                     "api-key": "{{AZURE_COGNITIVE_API_KEY}}",
                     "Content-Type": "application/json"
                 },
-                "requestsPerSecond": 6
+                "requestsPerSecond": 10
             },
             "oai-embeddings": {
                 "type": "OPENAI-EMBEDDINGS",
-                "url": "https://archipelago-openai.openai.azure.com/openai/deployments/archipelago-embedding/embeddings?api-version=2023-12-01",
+                "url": "https://api.openai.com/v1/embeddings",
                 "headers": {
-                    "api-key": "{{ARCHIPELAGO_OPENAI_KEY}}",
+                    "Authorization": "Bearer {{OPENAI_API_KEY}}",
                     "Content-Type": "application/json"
                 },
                 "params": {

package/helper-apps/cortex-whisper-wrapper/app.py CHANGED Viewed

@@ -38,9 +38,14 @@ def transcribe(params):
     if 'word_timestamps' in params: #parse as bool
         word_timestamps = False if params['word_timestamps'] == 'False' else True
+    decode_options = {}
+    if 'language' in params:
+        decode_options["language"] = params["language"]
+        print(f"Transcription language set as {decode_options['language']}")
     print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
     start_time = time.time()
-    result = model.transcribe(fileurl, word_timestamps=word_timestamps)
+    result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
     end_time = time.time()
     execution_time = end_time - start_time
     print("Transcribe execution time:", execution_time, "seconds")

package/lib/cortexRequest.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { selectEndpoint } from './requestExecutor.js';
 class CortexRequest {
-    constructor( { url, data, params, headers, cache, model, pathwayResolver, selectedEndpoint } = {}) {
+    constructor( { url, data, params, headers, cache, model, pathwayResolver, selectedEndpoint, stream } = {}) {
         this._url = url || '';
         this._data = data || {};
         this._params = params || {};
@@ -10,6 +10,7 @@ class CortexRequest {
         this._model = model || '';
         this._pathwayResolver = pathwayResolver || {};
         this._selectedEndpoint = selectedEndpoint || {};
+        this._stream = stream || false;
         if (this._pathwayResolver) {
             this._model = this._pathwayResolver.model;
@@ -112,6 +113,15 @@ class CortexRequest {
     set pathwayResolver(value) {
         this._pathwayResolver = value;
     }
+    // stream getter and setter
+    get stream() {
+        return this._stream;
+    }
+    set stream(value) {
+        this._stream = value;
+    }
 }
 export default CortexRequest;

package/lib/encodeCache.js ADDED Viewed

@@ -0,0 +1,38 @@
+import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
+import { FastLRUCache } from './fastLruCache.js';
+class EncodeCache {
+    constructor() {
+        this.encodeCache = new FastLRUCache(1000);
+        this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
+    }
+    encode(value) {
+        if (this.encodeCache.get(value) !== -1) {
+            return this.encodeCache.get(value);
+        }
+        const encoded = gpt3Encode(value);
+        this.encodeCache.put(value, encoded);
+        return encoded;
+    }
+    decode(value) {
+        if (this.decodeCache.get(value) !== -1) {
+            return this.decodeCache.get(value);
+        }
+        const decoded = gpt3Decode(value);
+        this.decodeCache.put(value, decoded);
+        if (this.encodeCache.get(decoded) === -1) {
+            this.encodeCache.put(decoded, value);
+        }
+        return decoded;
+    }
+}
+// Create one instance of the cache
+const cache = new EncodeCache();
+// Make sure the instance is bound to the methods, so
+// references to 'this' are correct
+export const encode = cache.encode.bind(cache);
+export const decode = cache.decode.bind(cache);

package/lib/fastLruCache.js ADDED Viewed

@@ -0,0 +1,82 @@
+// This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
+class Node {
+    constructor(key, value) {
+        this.key = key;
+        this.value = value;
+        this.next = null;
+        this.prev = null;
+    }
+}
+class FastLRUCache {
+    constructor(capacity) {
+        this.capacity = capacity;
+        this.cache = new Map();
+        this.head = null;
+        this.tail = null;
+    }
+    get(key) {
+        if (!this.cache.has(key)) {
+            return -1;
+        }
+        const node = this.cache.get(key);
+        this.moveToEnd(node);
+        return node.value;
+    }
+    put(key, value) {
+        if (this.cache.has(key)) {
+            const node = this.cache.get(key);
+            node.value = value;
+            this.moveToEnd(node);
+        } else {
+            const node = new Node(key, value);
+            if (this.cache.size >= this.capacity) {
+                this.cache.delete(this.head.key);
+                this.shiftHeadToNext();
+            }
+            this.cache.set(key, node);
+            this.addNodeToTail(node);
+        }
+    }
+    addNodeToTail(node) {
+        if (!this.tail) {
+            this.head = node;
+            this.tail = node;
+        } else {
+            node.prev = this.tail;
+            this.tail.next = node;
+            this.tail = node;
+        }
+    }
+    moveToEnd(node) {
+        if (node === this.tail) {
+            return;
+        }
+        if (node === this.head) {
+            this.shiftHeadToNext();
+        } else {
+            node.prev.next = node.next;
+            node.next.prev = node.prev;
+        }
+        node.prev = this.tail;
+        node.next = null;
+        this.tail.next = node;
+        this.tail = node;
+    }
+    shiftHeadToNext() {
+        this.head = this.head.next;
+        if (this.head) {
+            this.head.prev = null;
+        } else {
+            this.tail = null;
+        }
+    }
+}
+export { FastLRUCache };

package/lib/pathwayTools.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // pathwayTools.js
-import { encode , decode } from 'gpt-3-encoder';
+import { encode, decode } from '../lib/encodeCache.js';
 import { config } from '../config.js';
 // callPathway - call a pathway from another pathway

package/lib/requestExecutor.js CHANGED Viewed

@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
     endpoint.limiter.on('failed', (error, info) => {
         if (error.name === 'CanceledError') {
-            logger.debug(`Request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
+            logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
+            endpoint.monitor.incrementErrorCount();
         } else {
-            logger.error(`Request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error}`);
+            logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
         }
     });
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
     });
 }
+//log statistics about active endpoints
 setInterval(() => {
   // Iterate over each model
   for (const [name, model] of Object.entries(modelEndpoints)) {
@@ -179,30 +181,51 @@ setInterval(() => {
         endpointIndex++;
     });
   }
-}, 10000); // Log rates every 10 seconds (10000 ms).
+}, 30000); // Log rates every 30 seconds
 const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
-    return cortexAxios.post(url, data, axiosConfigObj);
+    const callId = endpoint?.monitor?.startCall();
+    let response;
+    try {
+        response = await cortexAxios.post(url, data, axiosConfigObj);
+    } catch (error) {
+        // throw new error with duration as part of the error data
+        throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
+    }
+    let duration;
+    if (response.status >= 200 && response.status < 300) {
+        duration = endpoint?.monitor?.endCall(callId);
+    } else {
+        duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
+    }
+    return { response, duration };
 }
 const MAX_RETRY = 10; // retries for error handling
 const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
 const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
+const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
+    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
+    const jitter = duplicateRequestTime * 0.2 * Math.random();
+    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
+    return duplicateRequestTimeout;
+}
 const postRequest = async (cortexRequest) => {
     let promises = [];
+    // retry certain errors up to MAX_RETRY times
     for (let i = 0; i < MAX_RETRY; i++) {
-        const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model} = cortexRequest;
+        const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
         const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
-        let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
-        let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
-        if (enableDuplicateRequests) {
-            //logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
-        }
+        const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
+        const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
         const axiosConfigObj = { params, headers, cache };
-        const streamRequested = (params?.stream || data?.stream);
+        const streamRequested = (stream || params?.stream || data?.stream);
+        // if we're using streaming, duplicate requests are
+        // not supported, so we just push one promise into the array
         if (streamRequested && model.supportsStreaming) {
             axiosConfigObj.responseType = 'stream';
             promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
@@ -212,14 +235,20 @@ const postRequest = async (cortexRequest) => {
                 axiosConfigObj.params.stream = false;
                 data.stream = false;
             }
+            // if we're not streaming, we push at least one promise
+            // into the array, but if we're supporting duplicate
+            // requests we push one for each potential duplicate,
+            // heading to a new endpoint (if available) and
+            // staggered by a jittered amount of time
             const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
             promises = controllers.map((controller, index) =>
                 new Promise((resolve, reject) => {
-                    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
-                    const jitter = duplicateRequestTime * 0.2 * Math.random();
-                    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
                     setTimeout(async () => {
                         try {
+                            if (index > 0) {
+                                cortexRequest.selectNewEndpoint();
+                            }
+                            const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
                             const endpointName = selectedEndpoint.name || model;
                             if (!selectedEndpoint.limiter) {
                                 throw new Error(`No limiter for endpoint ${endpointName}!`);
@@ -227,52 +256,27 @@ const postRequest = async (cortexRequest) => {
                             const axiosConfigObj = { params, headers, cache };
                             let response = null;
+                            let duration = null;
                             if (!controller.signal?.aborted) {
                                 axiosConfigObj.signal = controller.signal;
                                 axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
-                                if (index === 0) {
-                                    //logger.info(`>>> [${requestId}] sending request to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`);
-                                } else {
-                                    if (model.supportsStreaming) {
-                                        axiosConfigObj.responseType = 'stream';
-                                        axiosConfigObj.cache = false;
-                                    }
-                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
+                                if (index > 0) {
+                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
                                     const header = '>'.repeat(logMessage.length);
                                     logger.info(`\n${header}\n${logMessage}`);
                                 }
-                                response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
+                                ({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
                                 if (!controller.signal?.aborted) {
-                                    //logger.info(`<<< [${requestId}] received response for request ${index}`);
-                                    if (axiosConfigObj.responseType === 'stream') {
-                                        // Buffering and collecting the stream data
-                                        logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
-                                        response = await new Promise((resolve, reject) => {
-                                            let responseData = '';
-                                            response.data.on('data', (chunk) => {
-                                                responseData += chunk;
-                                                //logger.info(`<<< [${requestId}] received chunk for request ${index}`);
-                                            });
-                                            response.data.on('end', () => {
-                                                response.data = JSON.parse(responseData);
-                                                resolve(response);
-                                            });
-                                            response.data.on('error', (error) => {
-                                                reject(error);
-                                            });
-                                        });
-                                    }
+                                    logger.debug(`<<< [${requestId}] received response for request ${index}`);
                                 }
                             }
-                            resolve(response);
+                            resolve({ response, duration });
                         } catch (error) {
                             if (error.name === 'AbortError' || error.name === 'CanceledError') {
@@ -285,45 +289,48 @@ const postRequest = async (cortexRequest) => {
                         } finally {
                             controllers.forEach(controller => controller.abort());
                         }
-                    }, duplicateRequestTimeout);
+                    }, getDuplicateRequestDelay(index, duplicateRequestAfter));
                 })
             );
         }
+        // no requests have been made yet, but the promises array
+        // is full, so now we execute them in parallel
         try {
-            const response = await Promise.race(promises);
+            const { response, duration } = await Promise.race(promises);
             // if response status is 2xx
             if (response.status >= 200 && response.status < 300) {
-                return response;
+                return { response, duration };
             } else {
                 throw new Error(`Received error response: ${response.status}`);
             }
         } catch (error) {
-            if (error.response) {
-                selectedEndpoint.monitor.incrementErrorCount();
-                const status = error.response.status;
-                if (status === 429) {
-                    selectedEndpoint.monitor.incrementError429Count();
-                }
+            const { response, duration } = error;
+            if (response) {
+                const status = response.status;
+                // if there is only one endpoint, only retry select error codes
                 if (cortexRequest.model.endpoints.length === 1) {
-                    if (status !== 429) {
-                        return error.response;
+                    if (status !== 429 &&
+                        status !== 408 &&
+                        status !== 502 &&
+                        status !== 503 &&
+                        status !== 504) {
+                        return { response, duration };
                     }
                 } else {
-                    // if there are multiple endpoints, retry everything
+                    // if there are multiple endpoints, retry everything as it
+                    // could be going to a different host
                     cortexRequest.selectNewEndpoint();
                 }
-                logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
+                logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
                 if (i < MAX_RETRY - 1) {
                     const backoffTime = 200 * Math.pow(2, i);
                     const jitter = backoffTime * 0.2 * Math.random();
                     await new Promise(r => setTimeout(r, backoffTime + jitter));
                 } else {
-                    return error.response;
+                    return { response, duration };
                 }
             } else {
                 throw error;
@@ -334,10 +341,7 @@ const postRequest = async (cortexRequest) => {
 const executeRequest = async (cortexRequest) => {
     try {
-        const endpoint = cortexRequest.selectedEndpoint;
-        const callId = endpoint?.monitor?.startCall();
-        const response = await postRequest(cortexRequest);
-        endpoint?.monitor?.endCall(callId);
+        const { response, duration } = await postRequest(cortexRequest);
         const requestId = cortexRequest.requestId;
         const { error, data, cached } = response;
         if (cached) {
@@ -347,8 +351,7 @@ const executeRequest = async (cortexRequest) => {
             const lastError = error[error.length - 1];
             return { error: lastError.toJSON() ?? lastError ?? error };
         }
-        //logger.info(`<<< [${requestId}] response: ${data.choices[0].delta || data.choices[0]}`)
-        return data;
+        return { data, duration };
     } catch (error) {
         logger.error(`Error in request: ${error.message || error}`);
         return { error: error };

package/lib/requestMonitor.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { v4 as uuidv4 } from 'uuid';
-// eslint-disable-next-line import/no-extraneous-dependencies
 import { Deque } from '@datastructures-js/deque';
 class RequestMonitor {
@@ -20,6 +19,15 @@ class RequestMonitor {
     return this.healthy;
   }
+  removeOldCallStarts() {
+    const currentTime = new Date();
+    for (const [callId, startTime] of this.callStartTimes) {
+      if (currentTime - startTime > this.ageOutTime) {
+        this.callStartTimes.delete(callId);
+      }
+    }
+  }
   removeOldCallStats(dq, timeProperty) {
     const currentTime = new Date();
     while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front())  > this.ageOutTime) {
@@ -28,6 +36,7 @@ class RequestMonitor {
   }
   maintain() {
+    this.removeOldCallStarts();
     this.removeOldCallStats(this.callCount);
     if (this.callCount.size() === 0) {
       this.peakCallRate = 0;
@@ -36,7 +45,7 @@ class RequestMonitor {
     this.removeOldCallStats(this.error429Count);
     this.removeOldCallStats(this.errorCount);
-    if (this.getErrorRate() > 0.3) {
+    if (this.getErrorRate() > 0.1) {
       this.healthy = false;
     } else {
       this.healthy = true;
@@ -55,10 +64,11 @@ class RequestMonitor {
   endCall(callId) {
     const endTime = new Date();
     const startTime = this.callStartTimes.get(callId);
+    let callDuration = null;
     if (startTime) {
+      callDuration = (endTime - startTime);
       this.callStartTimes.delete(callId);
-      const callDuration = endTime - startTime;
       this.callDurations.pushBack({endTime, callDuration});
       // Keep the callDurations length to 5
@@ -73,6 +83,7 @@ class RequestMonitor {
     }
     this.maintain();
+    return callDuration;
   }
   getAverageCallDuration() {
@@ -84,14 +95,13 @@ class RequestMonitor {
     return sum / this.callDurations.size();
   }
-  incrementError429Count() {
-    this.error429Count.pushBack(new Date());
-    this.maintain();
-  }
-  incrementErrorCount() {
+  incrementErrorCount(callId, status) {
     this.errorCount.pushBack(new Date());
+    if (status === 429) {
+      this.error429Count.pushBack(new Date());
+    }
     this.maintain();
+    return callId ? this.endCall(callId) : null;
   }
   getCallRate() {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.1.4",
+  "version": "1.1.6",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -53,12 +53,14 @@
     "ioredis": "^5.3.1",
     "keyv": "^4.5.2",
     "langchain": "^0.0.47",
+    "mime-types": "^2.1.35",
     "subsrt": "^1.1.1",
     "uuid": "^9.0.0",
     "winston": "^3.11.0",
     "ws": "^8.12.0"
   },
   "devDependencies": {
+    "@faker-js/faker": "^8.4.1",
     "ava": "^5.2.0",
     "dotenv": "^16.0.3",
     "eslint": "^8.38.0",

package/pathways/basePathway.js CHANGED Viewed

@@ -14,19 +14,21 @@ export default {
     typeDef,
     rootResolver,
     resolver,
-    inputFormat: 'text', // text or html - changes the behavior of the input chunking
+    inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
     useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
     useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
+    joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
     useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
     truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
     timeout: 120, // seconds, cancels the pathway after this many seconds
+    enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
     duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
     // override the default execution of the pathway
-    // callback signature: excuteOverride({args: object, runAllPrompts: function})
+    // callback signature: executeOverride({args: object, runAllPrompts: function})
     // args: the input arguments to the pathway
     // runAllPrompts: a function that runs all prompts in the pathway and returns the result
     executePathway: undefined,
     // Set the temperature to 0 to favor more deterministic output when generating entity extraction.
-    temperature: undefined,
+    temperature: 0.9,
 };

package/pathways/bias.js CHANGED Viewed

@@ -6,5 +6,5 @@ export default {
     // Uncomment the following line to enable caching for this prompt, if desired.
     // enableCache: true,
-    prompt: `{{text}}\n\nIs the above text written objectively?  Why or why not, explain with details:\n`
+    prompt: `{{text}}\n\nIs the above text written objectively?  Why or why not, explain with details:\n`,
 };

package/pathways/cognitive_insert.js CHANGED Viewed

@@ -12,5 +12,5 @@ export default {
     mode: 'index', // 'index' or 'search',
     inputChunkSize:  500,
     enableDuplicateRequests: false,
-    timeout: 300,
+    timeout: 3000,
 };

package/server/chunker.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { encode, decode } from 'gpt-3-encoder';
+import { encode, decode } from '../lib/encodeCache.js';
 import cheerio from 'cheerio';
 const getLastNToken = (text, maxTokenLen) => {

package/server/graphql.js CHANGED Viewed

@@ -131,6 +131,8 @@ const build = async (config) => {
     const app = express();
+    app.use(express.json({ limit: '200mb' }));
     const httpServer = http.createServer(app);
     // Creating the WebSocket server

package/server/modelExecutor.js CHANGED Viewed

@@ -17,6 +17,8 @@ import OpenAiEmbeddingsPlugin from './plugins/openAiEmbeddingsPlugin.js';
 import OpenAIImagePlugin from './plugins/openAiImagePlugin.js';
 import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
 import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
+import GeminiChatPlugin from './plugins/geminiChatPlugin.js';
+import GeminiVisionPlugin from './plugins/geminiVisionPlugin.js';
 class ModelExecutor {
     constructor(pathway, model) {
@@ -72,6 +74,12 @@ class ModelExecutor {
             case 'OPENAI-VISION':
                 plugin = new OpenAIVisionPlugin(pathway, model);
                 break;
+            case 'GEMINI-CHAT':
+                plugin = new GeminiChatPlugin(pathway, model);
+                break;
+            case 'GEMINI-VISION':
+                plugin = new GeminiVisionPlugin(pathway, model);
+                break;
             default:
                 throw new Error(`Unsupported model type: ${model.type}`);
         }