npm - @aj-archipelago/cortex - Versions diffs - 1.1.5 → 1.1.7 - Mend

@aj-archipelago/cortex 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/config.js +11 -1
package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
package/lib/cortexRequest.js +10 -0
package/lib/encodeCache.js +38 -0
package/lib/fastLruCache.js +82 -0
package/lib/pathwayTools.js +1 -1
package/lib/requestExecutor.js +78 -71
package/lib/requestMonitor.js +19 -9
package/package.json +3 -2
package/pathways/basePathway.js +5 -3
package/pathways/bing.js +12 -0
package/pathways/index.js +2 -0
package/server/chunker.js +1 -1
package/server/graphql.js +1 -1
package/server/modelExecutor.js +4 -0
package/server/pathwayResolver.js +3 -3
package/server/plugins/azureBingPlugin.js +44 -0
package/server/plugins/azureCognitivePlugin.js +11 -6
package/server/plugins/azureTranslatePlugin.js +0 -2
package/server/plugins/geminiChatPlugin.js +4 -7
package/server/plugins/localModelPlugin.js +1 -1
package/server/plugins/modelPlugin.js +22 -18
package/server/plugins/openAiChatPlugin.js +11 -12
package/server/plugins/openAiCompletionPlugin.js +6 -7
package/server/plugins/openAiWhisperPlugin.js +3 -0
package/server/plugins/palmChatPlugin.js +8 -11
package/server/plugins/palmCompletionPlugin.js +4 -7
package/tests/chunkfunction.test.js +1 -2
package/tests/encodeCache.test.js +92 -0
package/tests/fastLruCache.test.js +29 -0
package/tests/requestMonitor.test.js +3 -3
package/tests/truncateMessages.test.js +1 -1

package/config.js CHANGED Viewed

@@ -118,7 +118,7 @@ var config = convict({
                     "api-key": "{{AZURE_COGNITIVE_API_KEY}}",
                     "Content-Type": "application/json"
                 },
-                "requestsPerSecond": 6
+                "requestsPerSecond": 10
             },
             "oai-embeddings": {
                 "type": "OPENAI-EMBEDDINGS",
@@ -146,6 +146,16 @@ var config = convict({
                 "maxTokenLength": 128000,
                 "supportsStreaming": true
             },
+            "azure-bing": {
+                "type": "AZURE-BING",
+                "url": "https://api.bing.microsoft.com/v7.0/search",
+                "headers": {
+                    "Ocp-Apim-Subscription-Key": "{{AZURE_BING_KEY}}",
+                    "Content-Type": "application/json"
+                },
+                "requestsPerSecond": 10,
+                "maxTokenLength": 200000
+            },
         },
         env: 'CORTEX_MODELS'
     },

package/helper-apps/cortex-whisper-wrapper/app.py CHANGED Viewed

@@ -38,9 +38,14 @@ def transcribe(params):
     if 'word_timestamps' in params: #parse as bool
         word_timestamps = False if params['word_timestamps'] == 'False' else True
+    decode_options = {}
+    if 'language' in params:
+        decode_options["language"] = params["language"]
+        print(f"Transcription language set as {decode_options['language']}")
     print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
     start_time = time.time()
-    result = model.transcribe(fileurl, word_timestamps=word_timestamps)
+    result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
     end_time = time.time()
     execution_time = end_time - start_time
     print("Transcribe execution time:", execution_time, "seconds")

package/lib/cortexRequest.js CHANGED Viewed

@@ -11,6 +11,7 @@ class CortexRequest {
         this._pathwayResolver = pathwayResolver || {};
         this._selectedEndpoint = selectedEndpoint || {};
         this._stream = stream || false;
+        this._method = 'POST';
         if (this._pathwayResolver) {
             this._model = this._pathwayResolver.model;
@@ -41,6 +42,15 @@ class CortexRequest {
         this._url = value;
     }
+    // method getter and setter
+    get method() {
+        return this._method;
+    }
+    set method(value) {
+        this._method = value;
+    }
     // data getter and setter
     get data() {
         return this._data;

package/lib/encodeCache.js ADDED Viewed

@@ -0,0 +1,38 @@
+import { encode as gpt3Encode, decode as gpt3Decode } from 'gpt-3-encoder';
+import { FastLRUCache } from './fastLruCache.js';
+class EncodeCache {
+    constructor() {
+        this.encodeCache = new FastLRUCache(1000);
+        this.decodeCache = new FastLRUCache(100); // we don't use decode nearly as much
+    }
+    encode(value) {
+        if (this.encodeCache.get(value) !== -1) {
+            return this.encodeCache.get(value);
+        }
+        const encoded = gpt3Encode(value);
+        this.encodeCache.put(value, encoded);
+        return encoded;
+    }
+    decode(value) {
+        if (this.decodeCache.get(value) !== -1) {
+            return this.decodeCache.get(value);
+        }
+        const decoded = gpt3Decode(value);
+        this.decodeCache.put(value, decoded);
+        if (this.encodeCache.get(decoded) === -1) {
+            this.encodeCache.put(decoded, value);
+        }
+        return decoded;
+    }
+}
+// Create one instance of the cache
+const cache = new EncodeCache();
+// Make sure the instance is bound to the methods, so
+// references to 'this' are correct
+export const encode = cache.encode.bind(cache);
+export const decode = cache.decode.bind(cache);

package/lib/fastLruCache.js ADDED Viewed

@@ -0,0 +1,82 @@
+// This class implements a fast O(1) LRU cache using a Map and a doubly linked list.
+class Node {
+    constructor(key, value) {
+        this.key = key;
+        this.value = value;
+        this.next = null;
+        this.prev = null;
+    }
+}
+class FastLRUCache {
+    constructor(capacity) {
+        this.capacity = capacity;
+        this.cache = new Map();
+        this.head = null;
+        this.tail = null;
+    }
+    get(key) {
+        if (!this.cache.has(key)) {
+            return -1;
+        }
+        const node = this.cache.get(key);
+        this.moveToEnd(node);
+        return node.value;
+    }
+    put(key, value) {
+        if (this.cache.has(key)) {
+            const node = this.cache.get(key);
+            node.value = value;
+            this.moveToEnd(node);
+        } else {
+            const node = new Node(key, value);
+            if (this.cache.size >= this.capacity) {
+                this.cache.delete(this.head.key);
+                this.shiftHeadToNext();
+            }
+            this.cache.set(key, node);
+            this.addNodeToTail(node);
+        }
+    }
+    addNodeToTail(node) {
+        if (!this.tail) {
+            this.head = node;
+            this.tail = node;
+        } else {
+            node.prev = this.tail;
+            this.tail.next = node;
+            this.tail = node;
+        }
+    }
+    moveToEnd(node) {
+        if (node === this.tail) {
+            return;
+        }
+        if (node === this.head) {
+            this.shiftHeadToNext();
+        } else {
+            node.prev.next = node.next;
+            node.next.prev = node.prev;
+        }
+        node.prev = this.tail;
+        node.next = null;
+        this.tail.next = node;
+        this.tail = node;
+    }
+    shiftHeadToNext() {
+        this.head = this.head.next;
+        if (this.head) {
+            this.head.prev = null;
+        } else {
+            this.tail = null;
+        }
+    }
+}
+export { FastLRUCache };

package/lib/pathwayTools.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // pathwayTools.js
-import { encode , decode } from 'gpt-3-encoder';
+import { encode, decode } from '../lib/encodeCache.js';
 import { config } from '../config.js';
 // callPathway - call a pathway from another pathway

package/lib/requestExecutor.js CHANGED Viewed

@@ -57,9 +57,10 @@ const createLimiter = (endpoint, name, index) => {
     endpoint.limiter.on('failed', (error, info) => {
         if (error.name === 'CanceledError') {
-            logger.debug(`Request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
+            logger.debug(`Limiter request cancelled for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}`);
+            endpoint.monitor.incrementErrorCount();
         } else {
-            logger.error(`Request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error}`);
+            logger.error(`Limiter request failed for ${cortexId}-${name}-${index}: Id: ${info.options.id || 'none'}: ${error?.message || error}`);
         }
     });
@@ -154,6 +155,7 @@ if (config.get('enableCache')) {
     });
 }
+//log statistics about active endpoints
 setInterval(() => {
   // Iterate over each model
   for (const [name, model] of Object.entries(modelEndpoints)) {
@@ -179,100 +181,106 @@ setInterval(() => {
         endpointIndex++;
     });
   }
-}, 10000); // Log rates every 10 seconds (10000 ms).
+}, 30000); // Log rates every 30 seconds
-const postWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
-    return cortexAxios.post(url, data, axiosConfigObj);
+const requestWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
+    const callId = endpoint?.monitor?.startCall();
+    let response;
+    try {
+        if (axiosConfigObj?.method == 'GET'){
+            response = await cortexAxios.get(url, axiosConfigObj);
+        } else {
+            response = await cortexAxios.post(url, data, axiosConfigObj);
+        }
+    } catch (error) {
+        // throw new error with duration as part of the error data
+        throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
+    }
+    let duration;
+    if (response.status >= 200 && response.status < 300) {
+        duration = endpoint?.monitor?.endCall(callId);
+    } else {
+        duration = endpoint?.monitor?.incrementErrorCount(callId, response.status);
+    }
+    return { response, duration };
 }
 const MAX_RETRY = 10; // retries for error handling
 const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
 const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
-const postRequest = async (cortexRequest) => {
+const getDuplicateRequestDelay = (index, duplicateRequestAfter) => {
+    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
+    const jitter = duplicateRequestTime * 0.2 * Math.random();
+    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
+    return duplicateRequestTimeout;
+}
+const makeRequest = async (cortexRequest) => {
     let promises = [];
+    // retry certain errors up to MAX_RETRY times
     for (let i = 0; i < MAX_RETRY; i++) {
-        const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
+        const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream, method} = cortexRequest;
         const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
-        let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
-        let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
-        if (enableDuplicateRequests) {
-            //logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
-        }
+        const maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
+        const duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
-        const axiosConfigObj = { params, headers, cache };
+        const axiosConfigObj = { params, headers, cache, method };
         const streamRequested = (stream || params?.stream || data?.stream);
+        // if we're using streaming, duplicate requests are
+        // not supported, so we just push one promise into the array
         if (streamRequested && model.supportsStreaming) {
             axiosConfigObj.responseType = 'stream';
-            promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
+            promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => requestWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
         } else {
             if (streamRequested) {
                 logger.info(`>>> [${requestId}] ${model} does not support streaming - sending non-streaming request`);
                 axiosConfigObj.params.stream = false;
                 data.stream = false;
             }
+            // if we're not streaming, we push at least one promise
+            // into the array, but if we're supporting duplicate
+            // requests we push one for each potential duplicate,
+            // heading to a new endpoint (if available) and
+            // staggered by a jittered amount of time
             const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
             promises = controllers.map((controller, index) =>
                 new Promise((resolve, reject) => {
-                    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
-                    const jitter = duplicateRequestTime * 0.2 * Math.random();
-                    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
                     setTimeout(async () => {
                         try {
+                            if (index > 0) {
+                                cortexRequest.selectNewEndpoint();
+                            }
+                            const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model } = cortexRequest;
                             const endpointName = selectedEndpoint.name || model;
                             if (!selectedEndpoint.limiter) {
                                 throw new Error(`No limiter for endpoint ${endpointName}!`);
                             }
-                            const axiosConfigObj = { params, headers, cache };
+                            const axiosConfigObj = { params, headers, cache, method };
                             let response = null;
+                            let duration = null;
                             if (!controller.signal?.aborted) {
                                 axiosConfigObj.signal = controller.signal;
                                 axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
-                                if (index === 0) {
-                                    //logger.info(`>>> [${requestId}] sending request to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`);
-                                } else {
-                                    if (model.supportsStreaming) {
-                                        axiosConfigObj.responseType = 'stream';
-                                        axiosConfigObj.cache = false;
-                                    }
-                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
+                                if (index > 0) {
+                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${endpointName} API`;
                                     const header = '>'.repeat(logMessage.length);
                                     logger.info(`\n${header}\n${logMessage}`);
                                 }
-                                response = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj));
+                                ({ response, duration } = await selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`}, () => requestWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
                                 if (!controller.signal?.aborted) {
                                     logger.debug(`<<< [${requestId}] received response for request ${index}`);
-                                    if (axiosConfigObj.responseType === 'stream') {
-                                        // Buffering and collecting the stream data
-                                        logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
-                                        response = await new Promise((resolve, reject) => {
-                                            let responseData = '';
-                                            response.data.on('data', (chunk) => {
-                                                responseData += chunk;
-                                                logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
-                                            });
-                                            response.data.on('end', () => {
-                                                response.data = JSON.parse(responseData);
-                                                resolve(response);
-                                            });
-                                            response.data.on('error', (error) => {
-                                                reject(error);
-                                            });
-                                        });
-                                    }
                                 }
                             }
-                            resolve(response);
+                            resolve({ response, duration });
                         } catch (error) {
                             if (error.name === 'AbortError' || error.name === 'CanceledError') {
@@ -285,45 +293,48 @@ const postRequest = async (cortexRequest) => {
                         } finally {
                             controllers.forEach(controller => controller.abort());
                         }
-                    }, duplicateRequestTimeout);
+                    }, getDuplicateRequestDelay(index, duplicateRequestAfter));
                 })
             );
         }
+        // no requests have been made yet, but the promises array
+        // is full, so now we execute them in parallel
         try {
-            const response = await Promise.race(promises);
+            const { response, duration } = await Promise.race(promises);
             // if response status is 2xx
             if (response.status >= 200 && response.status < 300) {
-                return response;
+                return { response, duration };
             } else {
                 throw new Error(`Received error response: ${response.status}`);
             }
         } catch (error) {
-            if (error.response) {
-                selectedEndpoint.monitor.incrementErrorCount();
-                const status = error.response.status;
-                if (status === 429) {
-                    selectedEndpoint.monitor.incrementError429Count();
-                }
+            const { response, duration } = error;
+            if (response) {
+                const status = response.status;
+                // if there is only one endpoint, only retry select error codes
                 if (cortexRequest.model.endpoints.length === 1) {
-                    if (status !== 429) {
-                        return error.response;
+                    if (status !== 429 &&
+                        status !== 408 &&
+                        status !== 502 &&
+                        status !== 503 &&
+                        status !== 504) {
+                        return { response, duration };
                     }
                 } else {
-                    // if there are multiple endpoints, retry everything
+                    // if there are multiple endpoints, retry everything as it
+                    // could be going to a different host
                     cortexRequest.selectNewEndpoint();
                 }
-                logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
+                logger.info(`>>> [${requestId}] retrying request (${duration}ms) due to ${status} response. Retry count: ${i + 1}`);
                 if (i < MAX_RETRY - 1) {
                     const backoffTime = 200 * Math.pow(2, i);
                     const jitter = backoffTime * 0.2 * Math.random();
                     await new Promise(r => setTimeout(r, backoffTime + jitter));
                 } else {
-                    return error.response;
+                    return { response, duration };
                 }
             } else {
                 throw error;
@@ -334,10 +345,7 @@ const postRequest = async (cortexRequest) => {
 const executeRequest = async (cortexRequest) => {
     try {
-        const endpoint = cortexRequest.selectedEndpoint;
-        const callId = endpoint?.monitor?.startCall();
-        const response = await postRequest(cortexRequest);
-        endpoint?.monitor?.endCall(callId);
+        const { response, duration } = await makeRequest(cortexRequest);
         const requestId = cortexRequest.requestId;
         const { error, data, cached } = response;
         if (cached) {
@@ -347,8 +355,7 @@ const executeRequest = async (cortexRequest) => {
             const lastError = error[error.length - 1];
             return { error: lastError.toJSON() ?? lastError ?? error };
         }
-        //logger.info(`<<< [${requestId}] response: ${data.choices[0].delta || data.choices[0]}`)
-        return data;
+        return { data, duration };
     } catch (error) {
         logger.error(`Error in request: ${error.message || error}`);
         return { error: error };

package/lib/requestMonitor.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { v4 as uuidv4 } from 'uuid';
-// eslint-disable-next-line import/no-extraneous-dependencies
 import { Deque } from '@datastructures-js/deque';
 class RequestMonitor {
@@ -20,6 +19,15 @@ class RequestMonitor {
     return this.healthy;
   }
+  removeOldCallStarts() {
+    const currentTime = new Date();
+    for (const [callId, startTime] of this.callStartTimes) {
+      if (currentTime - startTime > this.ageOutTime) {
+        this.callStartTimes.delete(callId);
+      }
+    }
+  }
   removeOldCallStats(dq, timeProperty) {
     const currentTime = new Date();
     while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front())  > this.ageOutTime) {
@@ -28,6 +36,7 @@ class RequestMonitor {
   }
   maintain() {
+    this.removeOldCallStarts();
     this.removeOldCallStats(this.callCount);
     if (this.callCount.size() === 0) {
       this.peakCallRate = 0;
@@ -36,7 +45,7 @@ class RequestMonitor {
     this.removeOldCallStats(this.error429Count);
     this.removeOldCallStats(this.errorCount);
-    if (this.getErrorRate() > 0.3) {
+    if (this.getErrorRate() > 0.1) {
       this.healthy = false;
     } else {
       this.healthy = true;
@@ -55,10 +64,11 @@ class RequestMonitor {
   endCall(callId) {
     const endTime = new Date();
     const startTime = this.callStartTimes.get(callId);
+    let callDuration = null;
     if (startTime) {
+      callDuration = (endTime - startTime);
       this.callStartTimes.delete(callId);
-      const callDuration = endTime - startTime;
       this.callDurations.pushBack({endTime, callDuration});
       // Keep the callDurations length to 5
@@ -73,6 +83,7 @@ class RequestMonitor {
     }
     this.maintain();
+    return callDuration;
   }
   getAverageCallDuration() {
@@ -84,14 +95,13 @@ class RequestMonitor {
     return sum / this.callDurations.size();
   }
-  incrementError429Count() {
-    this.error429Count.pushBack(new Date());
-    this.maintain();
-  }
-  incrementErrorCount() {
+  incrementErrorCount(callId, status) {
     this.errorCount.pushBack(new Date());
+    if (status === 429) {
+      this.error429Count.pushBack(new Date());
+    }
     this.maintain();
+    return callId ? this.endCall(callId) : null;
   }
   getCallRate() {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.1.5",
+  "version": "1.1.7",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -52,7 +52,7 @@
     "handlebars": "^4.7.7",
     "ioredis": "^5.3.1",
     "keyv": "^4.5.2",
-    "langchain": "^0.0.47",
+    "langchain": "^0.1.28",
     "mime-types": "^2.1.35",
     "subsrt": "^1.1.1",
     "uuid": "^9.0.0",
@@ -60,6 +60,7 @@
     "ws": "^8.12.0"
   },
   "devDependencies": {
+    "@faker-js/faker": "^8.4.1",
     "ava": "^5.2.0",
     "dotenv": "^16.0.3",
     "eslint": "^8.38.0",

package/pathways/basePathway.js CHANGED Viewed

@@ -14,19 +14,21 @@ export default {
     typeDef,
     rootResolver,
     resolver,
-    inputFormat: 'text', // text or html - changes the behavior of the input chunking
+    inputFormat: 'text', // string - 'text' or 'html' - changes the behavior of the input chunking
     useInputChunking: true, // true or false - enables input to be split into multiple chunks to meet context window size
     useParallelChunkProcessing: false, // true or false - enables parallel processing of chunks
+    joinChunksWith: '\n\n', // string - the string to join result chunks with when useInputChunking is 'true'
     useInputSummarization: false, // true or false - instead of chunking, summarize the input and act on the summary
     truncateFromFront: false, // true or false - if true, truncate from the front of the input instead of the back
     timeout: 120, // seconds, cancels the pathway after this many seconds
+    enableDuplicateRequests: true, // true or false - if true, duplicate requests are sent if the request is not completed after duplicateRequestAfter seconds
     duplicateRequestAfter: 10, // seconds, if the request is not completed after this many seconds, a backup request is sent
     // override the default execution of the pathway
-    // callback signature: excuteOverride({args: object, runAllPrompts: function})
+    // callback signature: executeOverride({args: object, runAllPrompts: function})
     // args: the input arguments to the pathway
     // runAllPrompts: a function that runs all prompts in the pathway and returns the result
     executePathway: undefined,
     // Set the temperature to 0 to favor more deterministic output when generating entity extraction.
-    temperature: undefined,
+    temperature: 0.9,
 };

package/pathways/bing.js ADDED Viewed

@@ -0,0 +1,12 @@
+// bing.js
+// Web search tool
+export default {
+    inputParameters: {
+        text: ``,
+    },
+    timeout: 400,
+    enableDuplicateRequests: false,
+    model: 'azure-bing',
+};

package/pathways/index.js CHANGED Viewed

@@ -19,8 +19,10 @@ import transcribe from './transcribe.js';
 import translate from './translate.js';
 import embeddings from './embeddings.js';
 import vision from './vision.js';
+import bing from './bing.js';
 export {
+    bing,
     edit,
     chat,
     bias,

package/server/chunker.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { encode, decode } from 'gpt-3-encoder';
+import { encode, decode } from '../lib/encodeCache.js';
 import cheerio from 'cheerio';
 const getLastNToken = (text, maxTokenLen) => {

package/server/graphql.js CHANGED Viewed

@@ -131,7 +131,7 @@ const build = async (config) => {
     const app = express();
-    app.use(express.json({ limit: '50mb' }));
+    app.use(express.json({ limit: '200mb' }));
     const httpServer = http.createServer(app);

package/server/modelExecutor.js CHANGED Viewed

@@ -19,6 +19,7 @@ import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
 import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
 import GeminiChatPlugin from './plugins/geminiChatPlugin.js';
 import GeminiVisionPlugin from './plugins/geminiVisionPlugin.js';
+import AzureBingPlugin from './plugins/azureBingPlugin.js';
 class ModelExecutor {
     constructor(pathway, model) {
@@ -80,6 +81,9 @@ class ModelExecutor {
             case 'GEMINI-VISION':
                 plugin = new GeminiVisionPlugin(pathway, model);
                 break;
+            case 'AZURE-BING':
+                plugin = new AzureBingPlugin(pathway, model);
+                break;
             default:
                 throw new Error(`Unsupported model type: ${model.type}`);
         }