npm - @aj-archipelago/cortex - Versions diffs - 1.1.4 → 1.1.5 - Mend

@aj-archipelago/cortex 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/config.js +2 -2
package/lib/cortexRequest.js +11 -1
package/lib/requestExecutor.js +4 -4
package/package.json +2 -1
package/pathways/bias.js +1 -1
package/pathways/cognitive_insert.js +1 -1
package/server/graphql.js +2 -0
package/server/modelExecutor.js +8 -0
package/server/pathwayResolver.js +23 -5
package/server/plugins/geminiChatPlugin.js +195 -0
package/server/plugins/geminiVisionPlugin.js +102 -0
package/server/plugins/modelPlugin.js +4 -3
package/server/plugins/openAiEmbeddingsPlugin.js +3 -1
package/server/rest.js +11 -5

package/config.js CHANGED Viewed

@@ -122,9 +122,9 @@ var config = convict({
             },
             "oai-embeddings": {
                 "type": "OPENAI-EMBEDDINGS",
-                "url": "https://archipelago-openai.openai.azure.com/openai/deployments/archipelago-embedding/embeddings?api-version=2023-12-01",
+                "url": "https://api.openai.com/v1/embeddings",
                 "headers": {
-                    "api-key": "{{ARCHIPELAGO_OPENAI_KEY}}",
+                    "Authorization": "Bearer {{OPENAI_API_KEY}}",
                     "Content-Type": "application/json"
                 },
                 "params": {

package/lib/cortexRequest.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import { selectEndpoint } from './requestExecutor.js';
 class CortexRequest {
-    constructor( { url, data, params, headers, cache, model, pathwayResolver, selectedEndpoint } = {}) {
+    constructor( { url, data, params, headers, cache, model, pathwayResolver, selectedEndpoint, stream } = {}) {
         this._url = url || '';
         this._data = data || {};
         this._params = params || {};
@@ -10,6 +10,7 @@ class CortexRequest {
         this._model = model || '';
         this._pathwayResolver = pathwayResolver || {};
         this._selectedEndpoint = selectedEndpoint || {};
+        this._stream = stream || false;
         if (this._pathwayResolver) {
             this._model = this._pathwayResolver.model;
@@ -112,6 +113,15 @@ class CortexRequest {
     set pathwayResolver(value) {
         this._pathwayResolver = value;
     }
+    // stream getter and setter
+    get stream() {
+        return this._stream;
+    }
+    set stream(value) {
+        this._stream = value;
+    }
 }
 export default CortexRequest;

package/lib/requestExecutor.js CHANGED Viewed

@@ -192,7 +192,7 @@ const DUPLICATE_REQUEST_AFTER = 10; // 10 seconds
 const postRequest = async (cortexRequest) => {
     let promises = [];
     for (let i = 0; i < MAX_RETRY; i++) {
-        const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model} = cortexRequest;
+        const { url, data, params, headers, cache, selectedEndpoint, requestId, pathway, model, stream} = cortexRequest;
         const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
         let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
         let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
@@ -202,7 +202,7 @@ const postRequest = async (cortexRequest) => {
         }
         const axiosConfigObj = { params, headers, cache };
-        const streamRequested = (params?.stream || data?.stream);
+        const streamRequested = (stream || params?.stream || data?.stream);
         if (streamRequested && model.supportsStreaming) {
             axiosConfigObj.responseType = 'stream';
             promises.push(selectedEndpoint.limiter.schedule({expiration: pathway.timeout * 1000 + 1000, id: `${requestId}_${uuidv4()}`},() => postWithMonitor(selectedEndpoint, url, data, axiosConfigObj)));
@@ -249,7 +249,7 @@ const postRequest = async (cortexRequest) => {
                                 if (!controller.signal?.aborted) {
-                                    //logger.info(`<<< [${requestId}] received response for request ${index}`);
+                                    logger.debug(`<<< [${requestId}] received response for request ${index}`);
                                     if (axiosConfigObj.responseType === 'stream') {
                                         // Buffering and collecting the stream data
@@ -258,7 +258,7 @@ const postRequest = async (cortexRequest) => {
                                             let responseData = '';
                                             response.data.on('data', (chunk) => {
                                                 responseData += chunk;
-                                                //logger.info(`<<< [${requestId}] received chunk for request ${index}`);
+                                                logger.debug(`<<< [${requestId}] received chunk for request ${index}`);
                                             });
                                             response.data.on('end', () => {
                                                 response.data = JSON.parse(responseData);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.1.4",
+  "version": "1.1.5",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {
@@ -53,6 +53,7 @@
     "ioredis": "^5.3.1",
     "keyv": "^4.5.2",
     "langchain": "^0.0.47",
+    "mime-types": "^2.1.35",
     "subsrt": "^1.1.1",
     "uuid": "^9.0.0",
     "winston": "^3.11.0",

package/pathways/bias.js CHANGED Viewed

@@ -6,5 +6,5 @@ export default {
     // Uncomment the following line to enable caching for this prompt, if desired.
     // enableCache: true,
-    prompt: `{{text}}\n\nIs the above text written objectively?  Why or why not, explain with details:\n`
+    prompt: `{{text}}\n\nIs the above text written objectively?  Why or why not, explain with details:\n`,
 };

package/pathways/cognitive_insert.js CHANGED Viewed

@@ -12,5 +12,5 @@ export default {
     mode: 'index', // 'index' or 'search',
     inputChunkSize:  500,
     enableDuplicateRequests: false,
-    timeout: 300,
+    timeout: 3000,
 };

package/server/graphql.js CHANGED Viewed

@@ -131,6 +131,8 @@ const build = async (config) => {
     const app = express();
+    app.use(express.json({ limit: '50mb' }));
     const httpServer = http.createServer(app);
     // Creating the WebSocket server

package/server/modelExecutor.js CHANGED Viewed

@@ -17,6 +17,8 @@ import OpenAiEmbeddingsPlugin from './plugins/openAiEmbeddingsPlugin.js';
 import OpenAIImagePlugin from './plugins/openAiImagePlugin.js';
 import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
 import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
+import GeminiChatPlugin from './plugins/geminiChatPlugin.js';
+import GeminiVisionPlugin from './plugins/geminiVisionPlugin.js';
 class ModelExecutor {
     constructor(pathway, model) {
@@ -72,6 +74,12 @@ class ModelExecutor {
             case 'OPENAI-VISION':
                 plugin = new OpenAIVisionPlugin(pathway, model);
                 break;
+            case 'GEMINI-CHAT':
+                plugin = new GeminiChatPlugin(pathway, model);
+                break;
+            case 'GEMINI-VISION':
+                plugin = new GeminiVisionPlugin(pathway, model);
+                break;
             default:
                 throw new Error(`Unsupported model type: ${model.type}`);
         }

package/server/pathwayResolver.js CHANGED Viewed

@@ -98,8 +98,9 @@ class PathwayResolver {
                     const incomingMessage = responseData;
                     let messageBuffer = '';
+                    let streamEnded = false;
-                    const processData = (data) => {
+                    const processStreamSSE = (data) => {
                         try {
                             //logger.info(`\n\nReceived stream data for requestId ${this.requestId}: ${data.toString()}`);
                             let events = data.toString().split('\n');
@@ -132,18 +133,35 @@ class PathwayResolver {
                                             return;
                                         }
+                                        // error can be in different places in the message
                                         const streamError = parsedMessage?.error || parsedMessage?.choices?.[0]?.delta?.content?.error || parsedMessage?.choices?.[0]?.text?.error;
                                         if (streamError) {
                                             streamErrorOccurred = true;
                                             logger.error(`Stream error: ${streamError.message}`);
-                                            incomingMessage.off('data', processData); // Stop listening to 'data'
+                                            incomingMessage.off('data', processStreamSSE);
                                             return;
                                         }
+                                        // finish reason can be in different places in the message
+                                        const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
+                                        if (finishReason?.toLowerCase() === 'stop') {
+                                            requestProgress.progress = 1;
+                                        } else {
+                                            if (finishReason?.toLowerCase() === 'safety') {
+                                                const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
+                                                logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
+                                                requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
+                                                requestProgress.progress = 1;
+                                            }
+                                        }
                                     }
                                     try {
-                                        //logger.info(`Publishing stream message to requestId ${this.requestId}: ${message}`);
-                                        publishRequestProgress(requestProgress);
+                                        if (!streamEnded) {
+                                            //logger.info(`Publishing stream message to requestId ${this.requestId}: ${message}`);
+                                            publishRequestProgress(requestProgress);
+                                            streamEnded = requestProgress.progress === 1;
+                                        }
                                     } catch (error) {
                                         logger.error(`Could not publish the stream message: "${messageBuffer}", ${error}`);
                                     }
@@ -156,7 +174,7 @@ class PathwayResolver {
                     if (incomingMessage) {
                         await new Promise((resolve, reject) => {
-                            incomingMessage.on('data', processData);
+                            incomingMessage.on('data', processStreamSSE);
                             incomingMessage.on('end', resolve);
                             incomingMessage.on('error', reject);
                         });

package/server/plugins/geminiChatPlugin.js ADDED Viewed

@@ -0,0 +1,195 @@
+// geminiChatPlugin.js
+import ModelPlugin from './modelPlugin.js';
+import { encode } from 'gpt-3-encoder';
+import logger from '../../lib/logger.js';
+const mergeResults = (data) => {
+    let output = '';
+    let safetyRatings = [];
+    for (let chunk of data) {
+        const { candidates } = chunk;
+        if (!candidates || !candidates.length) {
+            continue;
+        }
+        // If it was blocked, return the blocked message
+        if (candidates[0].safetyRatings.some(rating => rating.blocked)) {
+            safetyRatings = candidates[0].safetyRatings;
+            return {mergedResult: 'The response was blocked because the input or response potentially violates policies. Try rephrasing the prompt or adjusting the parameter settings.', safetyRatings: safetyRatings};
+        }
+        // Append the content of the first part of the first candidate to the output
+        const message = candidates[0].content.parts[0].text;
+        output += message;
+    }
+    return {mergedResult: output || null, safetyRatings: safetyRatings};
+};
+class GeminiChatPlugin extends ModelPlugin {
+    constructor(pathway, model) {
+        super(pathway, model);
+    }
+    // This code converts either OpenAI or PaLM messages to the Gemini messages format
+    convertMessagesToGemini(messages) {
+        let modifiedMessages = [];
+        let lastAuthor = '';
+        // Check if the messages are already in the Gemini format
+        if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
+            modifiedMessages = messages;
+        } else {
+            messages.forEach(message => {
+                const { role, author, content } = message;
+                // Right now Gemini API has no direct translation for system messages,
+                // but they work fine as parts of user messages
+                if (role === 'system') {
+                    modifiedMessages.push({
+                        role: 'user',
+                        parts: [{ text: content }],
+                    });
+                    lastAuthor = 'user';
+                    return;
+                }
+                // Aggregate consecutive author messages, appending the content
+                if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
+                    modifiedMessages[modifiedMessages.length - 1].parts.push({ text: content });
+                }
+                // Push messages that are role: 'user' or 'assistant', changing 'assistant' to 'model'
+                else if (role === 'user' || role === 'assistant' || author) {
+                    modifiedMessages.push({
+                        role: author || role,
+                        parts: [{ text: content }],
+                    });
+                    lastAuthor = author || role;
+                }
+            });
+        }
+        // Gemini requires an even number of messages
+        if (modifiedMessages.length % 2 === 0) {
+            modifiedMessages = modifiedMessages.slice(1);
+        }
+        return {
+            modifiedMessages,
+        };
+    }
+    // Set up parameters specific to the Gemini API
+    getRequestParameters(text, parameters, prompt, cortexRequest) {
+        const { modelPromptText, modelPromptMessages, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
+        const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
+        // Define the model's max token length
+        const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
+        const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);
+        let requestMessages = geminiMessages.modifiedMessages;
+        // Check if the token length exceeds the model's max token length
+        if (tokenLength > modelTargetTokenLength) {
+            // Remove older messages until the token length is within the model's limit
+            requestMessages = this.truncateMessagesToTargetLength(requestMessages, modelTargetTokenLength);
+        }
+        if (max_tokens < 0) {
+            throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens.  The model will not be called.`);
+        }
+        const requestParameters = {
+        contents: requestMessages,
+        generationConfig: {
+            temperature: this.temperature || 0.7,
+            maxOutputTokens: max_tokens || this.getModelMaxReturnTokens(),
+            topP: parameters.topP || 0.95,
+            topK: parameters.topK || 40,
+        },
+        safety_settings: geminiSafetySettings || undefined,
+        tools: geminiTools || undefined
+        };
+        return requestParameters;
+    }
+    // Parse the response from the new Chat API
+    parseResponse(data) {
+        // If data is not an array, return it directly
+        if (!Array.isArray(data)) {
+            return data;
+        }
+        return mergeResults(data).mergedResult || null;
+    }
+    // Execute the request to the new Chat API
+    async execute(text, parameters, prompt, cortexRequest) {
+        const requestParameters = this.getRequestParameters(text, parameters, prompt, cortexRequest);
+        const { stream } = parameters;
+        cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters };
+        cortexRequest.params = {}; // query params
+        cortexRequest.stream = stream;
+        cortexRequest.url = cortexRequest.stream ? `${cortexRequest.url}?alt=sse` : cortexRequest.url;
+        const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
+        const authToken = await gcpAuthTokenHelper.getAccessToken();
+        cortexRequest.headers.Authorization = `Bearer ${authToken}`;
+        return this.executeRequest(cortexRequest);
+    }
+    // Override the logging function to display the messages and responses
+    logRequestData(data, responseData, prompt) {
+        this.logAIRequestFinished();
+        const messages = data && data.contents;
+        if (messages && messages.length > 1) {
+            logger.info(`[chat request contains ${messages.length} messages]`);
+            messages.forEach((message, index) => {
+                const messageContent = message.parts.reduce((acc, part) => {
+                    if (part.text) {
+                        return acc + part.text;
+                    }
+                    return acc;
+                } , '');
+                const words = messageContent.split(" ");
+                const tokenCount = encode(messageContent).length;
+                const preview = words.length < 41 ? messageContent : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
+                logger.debug(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
+            });
+        } else if (messages && messages.length === 1) {
+            logger.debug(`${messages[0].parts[0].text}`);
+        }
+        // check if responseData is an array
+        if (!Array.isArray(responseData)) {
+            logger.info(`[response received as an SSE stream]`);
+        } else {
+            const { mergedResult, safetyRatings } = mergeResults(responseData);
+            if (safetyRatings?.length) {
+                logger.warn(`!!! response was blocked because the input or response potentially violates policies`);
+                logger.debug(`Safety Ratings: ${JSON.stringify(safetyRatings, null, 2)}`);
+            }
+            const responseTokens = encode(mergedResult).length;
+            logger.info(`[response received containing ${responseTokens} tokens]`);
+            logger.debug(`${mergedResult}`);
+        }
+        if (prompt && prompt.debugInfo) {
+            prompt.debugInfo += `\n${JSON.stringify(data)}`;
+        }
+    }
+}
+export default GeminiChatPlugin;

package/server/plugins/geminiVisionPlugin.js ADDED Viewed

@@ -0,0 +1,102 @@
+import GeminiChatPlugin from './geminiChatPlugin.js';
+import mime from 'mime-types';
+import logger from '../../lib/logger.js';
+class GeminiVisionPlugin extends GeminiChatPlugin {
+    // Override the convertMessagesToGemini method to handle multimodal vision messages
+    // This function can operate on messages in Gemini native format or in OpenAI's format
+    // It will convert the messages to the Gemini format
+    convertMessagesToGemini(messages) {
+        let modifiedMessages = [];
+        let lastAuthor = '';
+        // Check if the messages are already in the Gemini format
+        if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
+            modifiedMessages = messages;
+        } else {
+            messages.forEach(message => {
+                const { role, author, content } = message;
+                // Right now Gemini API has no direct translation for system messages,
+                // so we insert them as parts of the first user: role message
+                if (role === 'system') {
+                    modifiedMessages.push({
+                        role: 'user',
+                        parts: [{ text: content }],
+                    });
+                    lastAuthor = 'user';
+                    return;
+                }
+                // Convert content to Gemini format, trying to maintain compatibility
+                const convertPartToGemini = (partString) => {
+                    try {
+                        const part = JSON.parse(partString);
+                        if (typeof part === 'string') {
+                            return { text: part };
+                        } else if (part.type === 'text') {
+                            return { text: part.text };
+                        } else if (part.type === 'image_url') {
+                            if (part.image_url.url.startsWith('gs://')) {
+                                return {
+                                    fileData: {
+                                        mimeType: mime.lookup(part.image_url.url),
+                                        fileUri: part.image_url.url
+                                    }
+                                };
+                            } else {
+                                return {
+                                    inlineData: {
+                                        mimeType: 'image/jpeg', // fixed for now as there's no MIME type in the request
+                                        data: part.image_url.url.split('base64,')[1]
+                                    }
+                                };
+                            }
+                        }
+                    } catch (e) {
+                        logger.warn(`Unable to parse part - including as string: ${partString}`);
+                    }
+                    return { text: partString };
+                };
+                const addPartToMessages = (geminiPart) => {
+                    // Gemini requires alternating user: and model: messages
+                    if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
+                        modifiedMessages[modifiedMessages.length - 1].parts.push(geminiPart);
+                    }
+                    // Gemini only supports user: and model: roles
+                    else if (role === 'user' || role === 'assistant' || author) {
+                        modifiedMessages.push({
+                            role: author || role,
+                            parts: [geminiPart],
+                        });
+                        lastAuthor = author || role;
+                    }
+                };
+                // Content can either be in the "vision" format (array) or in the "chat" format (string)
+                if (Array.isArray(content)) {
+                    content.forEach(part => {
+                        addPartToMessages(convertPartToGemini(part));
+                    });
+                }
+                else {
+                    addPartToMessages(convertPartToGemini(content));
+                }
+            });
+        }
+        // Gemini requires an even number of messages
+        if (modifiedMessages.length % 2 === 0) {
+            modifiedMessages = modifiedMessages.slice(1);
+        }
+        return {
+            modifiedMessages,
+        };
+    }
+}
+export default GeminiVisionPlugin;

package/server/plugins/modelPlugin.js CHANGED Viewed

@@ -269,9 +269,10 @@ class ModelPlugin {
             const responseData = await executeRequest(cortexRequest);
-            if (responseData.error) {
-                logger.error(`An error was returned from the server: ${JSON.stringify(responseData.error)}`);
-                throw responseData;
+            let errorData = Array.isArray(responseData) ? responseData[0] : responseData;
+            if (errorData && errorData.error) {
+                throw new Error(`Server error: ${JSON.stringify(errorData.error)}`);
             }
             this.logRequestData(data, responseData, prompt);

package/server/plugins/openAiEmbeddingsPlugin.js CHANGED Viewed

@@ -7,11 +7,13 @@ class OpenAiEmbeddingsPlugin extends ModelPlugin {
     }
     getRequestParameters(text, parameters, prompt) {
-        const combinedParameters = { ...this.promptParameters, ...parameters };
+        const combinedParameters = { ...this.promptParameters, ...this.model.params, ...parameters };
         const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
+        const { model } = combinedParameters;
         const requestParameters = {
             data:  {
                 input: combinedParameters?.input?.length ? combinedParameters.input :  modelPromptText || text,
+                model
             }
         };
         return requestParameters;

package/server/rest.js CHANGED Viewed

@@ -85,7 +85,7 @@ const processIncomingStream = (requestId, res, jsonResponse) => {
     }
     const sendStreamData = (data) => {
-        //logger.info(`REST SEND: data: ${JSON.stringify(data)}`);
+        logger.debug(`REST SEND: data: ${JSON.stringify(data)}`);
         const dataString = (data==='[DONE]') ? data : JSON.stringify(data);
         if (!res.writableEnded) {
@@ -93,9 +93,9 @@ const processIncomingStream = (requestId, res, jsonResponse) => {
         }
     }
-    const fillJsonResponse = (jsonResponse, inputText, finishReason) => {
+    const fillJsonResponse = (jsonResponse, inputText, _finishReason) => {
-        jsonResponse.choices[0].finish_reason = finishReason;
+        jsonResponse.choices[0].finish_reason = null;
         if (jsonResponse.object === 'text_completion') {
             jsonResponse.choices[0].text = inputText;
         } else {
@@ -114,7 +114,10 @@ const processIncomingStream = (requestId, res, jsonResponse) => {
         const safeUnsubscribe = async () => {
             if (subscription) {
                 try {
-                    pubsub.unsubscribe(await subscription);
+                    const subPromiseResult = await subscription;
+                    if (subPromiseResult) {
+                        pubsub.unsubscribe(subPromiseResult);
+                    }
                 } catch (error) {
                     logger.error(`Error unsubscribing from pubsub: ${error}`);
                 }
@@ -122,7 +125,7 @@ const processIncomingStream = (requestId, res, jsonResponse) => {
         }
         if (data.requestProgress.requestId === requestId) {
-            //logger.info(`REQUEST_PROGRESS received progress: ${data.requestProgress.progress}, data: ${data.requestProgress.data}`);
+            logger.debug(`REQUEST_PROGRESS received progress: ${data.requestProgress.progress}, data: ${data.requestProgress.data}`);
             const progress = data.requestProgress.progress;
             const progressData = data.requestProgress.data;
@@ -142,6 +145,9 @@ const processIncomingStream = (requestId, res, jsonResponse) => {
                     } else {
                         fillJsonResponse(jsonResponse, delta.content, finish_reason);
                     }
+                } else if (messageJson.candidates) {
+                    const { content, finishReason } = messageJson.candidates[0];
+                    fillJsonResponse(jsonResponse, content.parts[0].text, finishReason);
                 } else {
                     fillJsonResponse(jsonResponse, messageJson, null);
                 }