npm - @aj-archipelago/cortex - Versions diffs - 1.1.3 → 1.1.4 - Mend

@aj-archipelago/cortex 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/.eslintignore +3 -3
package/README.md +17 -4
package/config.js +45 -9
package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/Dockerfile +1 -1
package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/fileChunker.js +4 -1
package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/package-lock.json +25 -216
package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/package.json +2 -2
package/helper-apps/cortex-whisper-wrapper/.dockerignore +27 -0
package/helper-apps/cortex-whisper-wrapper/Dockerfile +32 -0
package/helper-apps/cortex-whisper-wrapper/app.py +104 -0
package/helper-apps/cortex-whisper-wrapper/docker-compose.debug.yml +12 -0
package/helper-apps/cortex-whisper-wrapper/docker-compose.yml +10 -0
package/helper-apps/cortex-whisper-wrapper/models/.gitkeep +0 -0
package/helper-apps/cortex-whisper-wrapper/requirements.txt +5 -0
package/lib/cortexRequest.js +117 -0
package/lib/pathwayTools.js +2 -1
package/lib/redisSubscription.js +2 -2
package/lib/requestExecutor.js +360 -0
package/lib/requestMonitor.js +131 -28
package/package.json +2 -1
package/pathways/summary.js +3 -3
package/server/graphql.js +6 -6
package/server/{pathwayPrompter.js → modelExecutor.js} +24 -21
package/server/pathwayResolver.js +22 -17
package/server/plugins/azureCognitivePlugin.js +25 -20
package/server/plugins/azureTranslatePlugin.js +6 -10
package/server/plugins/cohereGeneratePlugin.js +5 -12
package/server/plugins/cohereSummarizePlugin.js +5 -12
package/server/plugins/localModelPlugin.js +3 -3
package/server/plugins/modelPlugin.js +18 -12
package/server/plugins/openAiChatExtensionPlugin.js +5 -5
package/server/plugins/openAiChatPlugin.js +8 -10
package/server/plugins/openAiCompletionPlugin.js +9 -12
package/server/plugins/openAiDallE3Plugin.js +14 -31
package/server/plugins/openAiEmbeddingsPlugin.js +6 -9
package/server/plugins/openAiImagePlugin.js +19 -15
package/server/plugins/openAiWhisperPlugin.js +168 -100
package/server/plugins/palmChatPlugin.js +9 -10
package/server/plugins/palmCodeCompletionPlugin.js +2 -2
package/server/plugins/palmCompletionPlugin.js +11 -12
package/server/resolver.js +2 -2
package/server/rest.js +1 -1
package/tests/config.test.js +1 -1
package/tests/mocks.js +5 -0
package/tests/modelPlugin.test.js +3 -10
package/tests/openAiChatPlugin.test.js +9 -8
package/tests/openai_api.test.js +3 -3
package/tests/palmChatPlugin.test.js +1 -1
package/tests/palmCompletionPlugin.test.js +1 -1
package/tests/pathwayResolver.test.js +2 -1
package/tests/requestMonitor.test.js +94 -0
package/tests/{requestDurationEstimator.test.js → requestMonitorDurationEstimator.test.js} +21 -17
package/tests/truncateMessages.test.js +1 -1
package/lib/request.js +0 -259
package/lib/requestDurationEstimator.js +0 -90
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/blobHandler.js +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/docHelper.js +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/function.json +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/helper.js +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/index.js +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/localFileHandler.js +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/redis.js +0 -0
/package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/start.js +0 -0

package/tests/pathwayResolver.test.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import test from 'ava';
 import { PathwayResolver } from '../server/pathwayResolver.js';
 import sinon from 'sinon';
-import { mockConfig, mockPathwayString } from './mocks.js';
+import { mockConfig, mockPathwayString, mockModelEndpoints } from './mocks.js';
 const mockPathway = mockPathwayString;
 mockPathway.useInputChunking = false;
@@ -16,6 +16,7 @@ test.beforeEach((t) => {
     config: mockConfig,
     pathway: mockPathway,
     args: mockArgs,
+    endpoints: mockModelEndpoints,
   });
 });

package/tests/requestMonitor.test.js ADDED Viewed

@@ -0,0 +1,94 @@
+import test from 'ava';
+import RequestMonitor from '../lib/requestMonitor.js'; // replace with actual path
+test('RequestMonitor: startCall', t => {
+  const rm = new RequestMonitor();
+  const callId = rm.startCall();
+  t.is(rm.callStartTimes.has(callId), true);
+});
+test('RequestMonitor: endCall', t => {
+  const rm = new RequestMonitor();
+  const callId = rm.startCall();
+  rm.endCall(callId);
+  t.is(rm.callStartTimes.has(callId), false);
+  t.is(rm.callCount.size(), 1);
+});
+test('RequestMonitor: getAverageCallDuration', async t => {
+  const rm = new RequestMonitor();
+  const callId1 = rm.startCall();
+  await new Promise(resolve => setTimeout(resolve, 1000));
+  rm.endCall(callId1);
+  const callId2 = rm.startCall();
+  await new Promise(resolve => setTimeout(resolve, 2000));
+  rm.endCall(callId2);
+  const average = rm.getAverageCallDuration();
+  t.truthy(average > 1400 && average < 1600);
+});
+test('RequestMonitor: incrementError429Count', t => {
+  const rm = new RequestMonitor();
+  rm.incrementError429Count();
+  t.is(rm.error429Count.size(), 1);
+});
+test('RequestMonitor: getCallRate', async t => {
+  const rm = new RequestMonitor();
+  rm.startCall();
+  rm.endCall();
+  await new Promise(resolve => setTimeout(resolve, 1000));
+  const callRate = rm.getCallRate();
+  t.truthy(callRate > 0.9 && callRate < 1.1);
+});
+test('RequestMonitor: getPeakCallRate', async t => {
+  const rm = new RequestMonitor();
+  rm.startCall();
+  rm.endCall();
+  await new Promise(resolve => setTimeout(resolve, 1000));
+  rm.startCall();
+  rm.endCall();
+  const peakCallRate = rm.getPeakCallRate();
+  t.truthy(peakCallRate > 1.9 && peakCallRate < 2.1);
+});
+test('RequestMonitor: getError429Rate', t => {
+  const rm = new RequestMonitor();
+  rm.startCall();
+  rm.endCall();
+  rm.incrementError429Count();
+  t.is(rm.getError429Rate(), 1);
+});
+test('RequestMonitor: reset', t => {
+  const rm = new RequestMonitor();
+  rm.startCall();
+  rm.endCall();
+  rm.incrementError429Count();
+  rm.reset();
+  t.is(rm.callCount.size(), 0);
+  t.is(rm.error429Count.size(), 0);
+  t.is(rm.peakCallRate, 0);
+});

package/tests/{requestDurationEstimator.test.js → requestMonitorDurationEstimator.test.js} RENAMED Viewed

@@ -1,14 +1,14 @@
 import test from 'ava';
-import RequestDurationEstimator from '../lib/requestDurationEstimator.js';
+import RequestMonitor from '../lib/requestMonitor.js';
 test('add and get average request duration', async (t) => {
-    const estimator = new RequestDurationEstimator(5);
+    const estimator = new RequestMonitor(5);
-    estimator.startRequest('req1');
+    const callid = estimator.startCall();
     await new Promise(resolve => setTimeout(() => {
-        estimator.endRequest();
+        estimator.endCall(callid);
-        const average = estimator.calculatePercentComplete();
+        const average = estimator.calculatePercentComplete(callid);
         // An average should be calculated after the first completed request
         t.not(average, 0);
@@ -17,31 +17,31 @@ test('add and get average request duration', async (t) => {
 });
 test('add more requests than size of durations array', (t) => {
-    const estimator = new RequestDurationEstimator(5);
+    const estimator = new RequestMonitor(5);
     for (let i = 0; i < 10; i++) {
-        estimator.startRequest(`req${i}`);
-        estimator.endRequest();
+        const callid = estimator.startCall();
+        estimator.endCall(callid);
     }
     // Array size should not exceed maximum length (5 in this case)
-    t.is(estimator.durations.length, 5);
+    t.is(estimator.callDurations.size(), 5);
 });
 test('calculate percent complete of current request based on average of past durations', async (t) => {
-    const estimator = new RequestDurationEstimator(5);
+    const estimator = new RequestMonitor(5);
     for (let i = 0; i < 4; i++) {
-        estimator.startRequest(`req${i}`);
+        const callid = estimator.startCall();
         // wait 1 second
         await new Promise(resolve => setTimeout(resolve, 1000));
-        estimator.endRequest();
+        estimator.endCall(callid);
     }
-    estimator.startRequest('req5');
+    const callid = estimator.startCall();
     await new Promise(resolve => setTimeout(() => {
-        const percentComplete = estimator.calculatePercentComplete();
+        const percentComplete = estimator.calculatePercentComplete(callid);
         // Depending on how fast the operations are,
         // the percentage may not be exactly 50%, but
@@ -52,8 +52,12 @@ test('calculate percent complete of current request based on average of past dur
 });
 test('calculate percent complete based on average of past durations', async (t) => {
-    const estimator = new RequestDurationEstimator(5);
-    estimator.durations = [1000, 2000, 3000];
-    const average = estimator.getAverage();
+    const estimator = new RequestMonitor(5);
+    estimator.callDurations.clear;
+    estimator.callDurations.pushBack({endTime: new Date(), callDuration: 1000});
+    estimator.callDurations.pushBack({endTime: new Date(), callDuration: 2000});
+    estimator.callDurations.pushBack({endTime: new Date(), callDuration: 3000});
+    const average = estimator.getAverageCallDuration();
     t.is(average, 2000);
 });

package/tests/truncateMessages.test.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { mockPathwayResolverString } from './mocks.js';
 const { config, pathway, modelName, model } = mockPathwayResolverString;
-const modelPlugin = new ModelPlugin(config, pathway, modelName, model);
+const modelPlugin = new ModelPlugin(pathway, model);
 const generateMessage = (role, content) => ({ role, content });

package/lib/request.js DELETED Viewed

@@ -1,259 +0,0 @@
-import Bottleneck from 'bottleneck/es5.js';
-import RequestMonitor from './requestMonitor.js';
-import { config } from '../config.js';
-import axios from 'axios';
-import { setupCache } from 'axios-cache-interceptor';
-import Redis from 'ioredis';
-import logger from './logger.js';
-const connectionString = config.get('storageConnectionString');
-if (!connectionString) {
-    logger.info('No STORAGE_CONNECTION_STRING found in environment. Redis features (caching, pubsub, clustered limiters) disabled.')
-} else {
-    logger.info('Using Redis connection specified in STORAGE_CONNECTION_STRING.');
-}
-let client;
-if (connectionString) {
-    try {
-        client = new Redis(connectionString);
-    } catch (error) {
-        logger.error(`Redis connection error: ${error}`);
-    }
-}
-const cortexId = config.get('cortexId');
-const connection = client && new Bottleneck.IORedisConnection({ client: client });
-const limiters = {};
-const monitors = {};
-const buildLimiters = (config) => {
-    logger.info(`Building ${connection ? 'Redis clustered' : 'local'} model rate limiters for ${cortexId}...`);
-    for (const [name, model] of Object.entries(config.get('models'))) {
-        const rps = model.requestsPerSecond ?? 100;
-        let limiterOptions = {
-            minTime: 1000 / rps,
-            maxConcurrent: rps,
-            reservoir: rps,      // Number of tokens available initially
-            reservoirRefreshAmount: rps,     // Number of tokens added per interval
-            reservoirRefreshInterval: 1000, // Interval in milliseconds
-        };
-        // If Redis connection exists, add id and connection to enable clustering
-        if (connection) {
-            limiterOptions.id = `${cortexId}-${name}-limiter`; // Unique id for each limiter
-            limiterOptions.connection = connection;  // Shared Redis connection
-        }
-        limiters[name] = new Bottleneck(limiterOptions);
-        limiters[name].on('error', (err) => {
-            logger.error(`Limiter error for ${cortexId}-${name}: ${err}`);
-        });
-        monitors[name] = new RequestMonitor();
-    }
-}
-let cortexAxios = axios;
-if (config.get('enableCache')) {
-    // Setup cache
-    cortexAxios = setupCache(axios, {
-        // enable cache for all requests by default
-        methods: ['get', 'post', 'put', 'delete', 'patch'],
-        interpretHeader: false,
-        ttl: 1000 * 60 * 60 * 24 * 7, // 7 days
-    });
-}
-setInterval(() => {
-    const monitorKeys = Object.keys(monitors);
-    // Skip logging if the monitors object does not exist or is empty
-    if (!monitorKeys || monitorKeys.length === 0) {
-      return;
-    }
-    monitorKeys.forEach((monitorName) => {
-        const monitor = monitors[monitorName];
-        const callRate = monitor.getPeakCallRate();
-        const error429Rate = monitor.getError429Rate();
-        if (callRate > 0) {
-            logger.info('------------------------');
-            logger.info(`${monitorName} Call rate: ${callRate} calls/sec, 429 errors: ${error429Rate * 100}%`);
-            logger.info('------------------------');
-            // Reset the rate monitor to start a new monitoring interval.
-            monitor.reset();
-        }
-    });
-  }, 10000); // Log rates every 10 seconds (10000 ms).
-const postWithMonitor = async (model, url, data, axiosConfigObj) => {
-    const monitor = monitors[model];
-    monitor.incrementCallCount();
-    return cortexAxios.post(url, data, axiosConfigObj);
-}
-const MAX_RETRY = 10; // retries for error handling
-const MAX_DUPLICATE_REQUESTS = 3; // duplicate requests to manage latency spikes
-const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
-const postRequest = async ({ url, data, params, headers, cache }, model, requestId, pathway) => {
-    let promises = [];
-    for (let i = 0; i < MAX_RETRY; i++) {
-        const modelProperties = config.get('models')[model];
-        const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
-        let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
-        let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
-        if (enableDuplicateRequests) {
-            //logger.info(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
-        }
-        const axiosConfigObj = { params, headers, cache };
-        const streamRequested = (params?.stream || data?.stream);
-        if (streamRequested && modelProperties.supportsStreaming) {
-            axiosConfigObj.responseType = 'stream';
-            promises.push(limiters[model].schedule(() => postWithMonitor(model, url, data, axiosConfigObj)));
-        } else {
-            if (streamRequested) {
-                logger.info(`>>> [${requestId}] ${model} does not support streaming - sending non-streaming request`);
-                axiosConfigObj.params.stream = false;
-                data.stream = false;
-            }
-            const controllers = Array.from({ length: maxDuplicateRequests }, () => new AbortController());
-            promises = controllers.map((controller, index) =>
-                new Promise((resolve, reject) => {
-                    const duplicateRequestTime = duplicateRequestAfter * Math.pow(2, index) - duplicateRequestAfter;
-                    const jitter = duplicateRequestTime * 0.2 * Math.random();
-                    const duplicateRequestTimeout = Math.max(0, duplicateRequestTime + jitter);
-                    setTimeout(async () => {
-                        try {
-                            if (!limiters[model]) {
-                                throw new Error(`No limiter for model ${model}!`);
-                            }
-                            const axiosConfigObj = { params, headers, cache };
-                            let response = null;
-                            if (!controller.signal?.aborted) {
-                                axiosConfigObj.signal = controller.signal;
-                                axiosConfigObj.headers['X-Cortex-Request-Index'] = index;
-                                if (index === 0) {
-                                    //logger.info(`>>> [${requestId}] sending request to ${model} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`);
-                                } else {
-                                    if (modelProperties.supportsStreaming) {
-                                        axiosConfigObj.responseType = 'stream';
-                                        axiosConfigObj.cache = false;
-                                    }
-                                    const logMessage = `>>> [${requestId}] taking too long - sending duplicate request ${index} to ${model} API ${axiosConfigObj.responseType === 'stream' ? 'with streaming' : ''}`;
-                                    const header = '>'.repeat(logMessage.length);
-                                    logger.info(`\n${header}\n${logMessage}`);
-                                }
-                                response = await limiters[model].schedule(() => postWithMonitor(model, url, data, axiosConfigObj));
-                                if (!controller.signal?.aborted) {
-                                    //logger.info(`<<< [${requestId}] received response for request ${index}`);
-                                    if (axiosConfigObj.responseType === 'stream') {
-                                        // Buffering and collecting the stream data
-                                        logger.info(`<<< [${requestId}] buffering streaming response for request ${index}`);
-                                        response = await new Promise((resolve, reject) => {
-                                            let responseData = '';
-                                            response.data.on('data', (chunk) => {
-                                                responseData += chunk;
-                                                //logger.info(`<<< [${requestId}] received chunk for request ${index}`);
-                                            });
-                                            response.data.on('end', () => {
-                                                response.data = JSON.parse(responseData);
-                                                resolve(response);
-                                            });
-                                            response.data.on('error', (error) => {
-                                                reject(error);
-                                            });
-                                        });
-                                    }
-                                }
-                            }
-                            resolve(response);
-                        } catch (error) {
-                            if (error.name === 'AbortError' || error.name === 'CanceledError') {
-                                //logger.info(`XXX [${requestId}] request ${index} was cancelled`);
-                                reject(error);
-                            } else {
-                                logger.info(`!!! [${requestId}] request ${index} failed with error: ${error?.response?.data?.error?.message || error}`);
-                                reject(error);
-                            }
-                        } finally {
-                            controllers.forEach(controller => controller.abort());
-                        }
-                    }, duplicateRequestTimeout);
-                })
-            );
-        }
-        try {
-            const response = await Promise.race(promises);
-            // if response status is 2xx
-            if (response.status >= 200 && response.status < 300) {
-                return response;
-            } else {
-                throw new Error(`Received error response: ${response.status}`);
-            }
-        } catch (error) {
-            //logger.error(`!!! [${requestId}] failed request with data ${JSON.stringify(data)}: ${error}`);
-            if (error.response) {
-                const status = error.response.status;
-                if ((status === 429) || (status >= 500 && status < 600)) {
-                    if (status === 429) {
-                        monitors[model].incrementError429Count();
-                    }
-                    logger.info(`>>> [${requestId}] retrying request due to ${status} response. Retry count: ${i + 1}`);
-                    if (i < MAX_RETRY - 1) {
-                        const backoffTime = 200 * Math.pow(2, i);
-                        const jitter = backoffTime * 0.2 * Math.random();
-                        await new Promise(r => setTimeout(r, backoffTime + jitter));
-                    } else {
-                        throw error;
-                    }
-                } else {
-                    throw error;
-                }
-            } else {
-                throw error;
-            }
-        }
-    }
-};
-const request = async (params, model, requestId, pathway) => {
-    try {
-        const response = await postRequest(params, model, requestId, pathway);
-        const { error, data, cached } = response;
-        if (cached) {
-            logger.info(`<<< [${requestId}] served with cached response.`);
-        }
-        if (error && error.length > 0) {
-            const lastError = error[error.length - 1];
-            return { error: lastError.toJSON() ?? lastError ?? error };
-        }
-        //logger.info(`<<< [${requestId}] response: ${data.choices[0].delta || data.choices[0]}`)
-        return data;
-    } catch (error) {
-        logger.error(`Error in request: ${error.message || error}`);
-        return { error: error };
-    }
-}
-export {
-    axios, request, postRequest, buildLimiters
-};

package/lib/requestDurationEstimator.js DELETED Viewed

@@ -1,90 +0,0 @@
-/**
- * A class to get request durations and estimate their average.
- */
-export default class RequestDurationEstimator {
-    // Initializing the class with given number of durations to track.
-    constructor(n = 10) {
-        this.n = n;  // Number of last durations to consider
-        this.durations = [];  // List to keep track of last n durations
-    }
-    /**
-     * Private method to add a request duration to the durations list.
-     * If the list is full (n durations already), the oldest duration is removed.
-     * @param {number} duration - The duration of the request
-     */
-    #add(duration) {
-        this.durations.push(duration);
-        // Remove the oldest duration if we have stored n durations
-        if (this.durations.length > this.n) {
-            this.durations.shift();
-        }
-    }
-    /**
-     * To be invoked when a request starts.
-     * If there is an ongoing request, it ends that request.
-     * @param {string} requestId - The ID of the request
-     */
-    startRequest(requestId) {
-        // If there is an ongoing request, end it
-        if (this.requestId) {
-            this.endRequest();
-        }
-        // Store the starting details of the new request
-        this.requestId = requestId;
-        this.startTime = Date.now();
-    }
-    /**
-     * To be invoked when a request ends.
-     * Calculates the duration of the request and adds it to the durations list.
-     */
-    endRequest() {
-        // If there is an ongoing request, add its duration to the durations list
-        if (this.requestId) {
-            this.#add(Date.now() - this.startTime);
-            this.requestId = null;
-        }
-    }
-    /**
-     * Calculate and return the average of the request durations.
-     * @return {number} The average request duration
-     */
-    getAverage() {
-        // If no duration is stored, return 0
-        if (!this.durations.length) {
-            return 0;
-        }
-        // Calculate the sum of the durations and divide by the number of durations to get the average
-        return this.durations.reduce((a, b) => a + b) / this.durations.length;
-    }
-    /**
-     * Calculate the percentage completion of the current request based on the average of past durations.
-     * @return {number} The estimated percent completion of the ongoing request
-     */
-    calculatePercentComplete() {
-        // If no duration is stored, return 0
-        if (!this.durations.length) {
-            return 0;
-        }
-        // Calculate the duration of the current request
-        const duration = Date.now() - this.startTime;
-        // Get the average of the durations
-        const average = this.getAverage();
-        // Calculate the percentage completion
-        let percentComplete = duration / average;
-        if (percentComplete > .8) {
-            percentComplete = 0.8;
-        }
-        return percentComplete;
-    }
-}