npm - @aj-archipelago/cortex - Versions diffs - 1.3.41 → 1.3.42 - Mend

@aj-archipelago/cortex 1.3.41 → 1.3.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/helper-apps/cortex-browser/.funcignore +8 -0
package/helper-apps/cortex-browser/Dockerfile +52 -0
package/helper-apps/cortex-browser/function_app.py +181 -0
package/helper-apps/cortex-browser/host.json +15 -0
package/helper-apps/cortex-browser/requirements.txt +24 -0
package/lib/requestExecutor.js +6 -1
package/package.json +1 -1
package/pathways/system/entity/sys_entity_agent.js +7 -8
package/pathways/system/entity/tools/sys_tool_bing_search.js +8 -1
package/server/modelExecutor.js +9 -1
package/server/plugins/modelPlugin.js +4 -1
package/server/plugins/openAiVisionPlugin.js +8 -5
package/server/plugins/openAiWhisperPlugin.js +7 -0

package/helper-apps/cortex-browser/.funcignore ADDED Viewed

@@ -0,0 +1,8 @@
+.git*
+.vscode
+__azurite_db*__.json
+__blobstorage__
+__queuestorage__
+local.settings.json
+test
+.venv

package/helper-apps/cortex-browser/Dockerfile ADDED Viewed

@@ -0,0 +1,52 @@
+# Use an official Python runtime as a parent image suitable for Azure Functions
+FROM mcr.microsoft.com/azure-functions/python:4-python3.9
+# Set environment variables for Azure Functions runtime
+ENV AzureWebJobsScriptRoot=/home/site/wwwroot
+ENV AzureFunctionsJobHost__Logging__Console__IsEnabled=true
+# Install OS dependencies needed by Playwright browsers (Debian-based)
+# This list is based on Playwright documentation and common needs
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libnss3 \
+    libnspr4 \
+    libdbus-glib-1-2 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libexpat1 \
+    libgbm1 \
+    libpango-1.0-0 \
+    libx11-6 \
+    libxcb1 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxext6 \
+    libxfixes3 \
+    libxrandr2 \
+    libxrender1 \
+    libxtst6 \
+    lsb-release \
+    wget \
+    xvfb \
+    # Clean up apt cache
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements file first to leverage Docker cache
+COPY requirements.txt /tmp/
+WORKDIR /tmp
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Install Playwright browsers and their dependencies within the container
+# Using --with-deps helps install system dependencies needed by the browsers
+# Installing only chromium as it's specified in the code
+RUN playwright install --with-deps chromium
+# Copy the function app code to the final location
+COPY . /home/site/wwwroot
+# Set the working directory for the function app
+WORKDIR /home/site/wwwroot

package/helper-apps/cortex-browser/function_app.py ADDED Viewed

@@ -0,0 +1,181 @@
+import azure.functions as func
+import logging
+import json
+from playwright.sync_api import sync_playwright
+import trafilatura
+import base64
+app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
+def scrape_and_screenshot(url: str, should_screenshot: bool = True) -> dict:
+    """Scrapes text and takes a screenshot of a given URL, attempting to reject cookies."""
+    screenshot_bytes = None
+    html_content = None
+    extracted_text = None
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            try:
+                context = browser.new_context()
+                page = context.new_page()
+                page.goto(url, wait_until='load', timeout=60000) # Increased timeout
+                # --- Attempt to reject cookies ---
+                # Add more selectors here if needed for different sites
+                reject_selectors = [
+                    "button:has-text('Reject All')",
+                    "button:has-text('Decline')",
+                    "button:has-text('Only necessary')",
+                    "button:has-text('Tümünü Reddet')", # From your example
+                    "button:has-text('Reject')",
+                    "[aria-label*='Reject']", # Common aria labels
+                    "[id*='reject']",
+                    "[class*='reject']",
+                    # Add more specific selectors based on common banner frameworks if known
+                ]
+                cookie_banner_found = False
+                for selector in reject_selectors:
+                    try:
+                        # Wait briefly for the banner element to appear
+                        reject_button = page.locator(selector).first
+                        if reject_button.is_visible(timeout=2000): # Wait up to 2 seconds
+                            logging.info(f"Found potential cookie reject button with selector: {selector}")
+                            reject_button.click(timeout=5000) # Click with a timeout
+                            logging.info("Clicked cookie reject button.")
+                            # Wait a tiny bit for the banner to disappear/page to settle
+                            page.wait_for_timeout(500)
+                            cookie_banner_found = True
+                            break # Stop searching once one is clicked
+                    except Exception as e:
+                        # Ignore timeout errors if the element doesn't appear or other exceptions
+                        # logging.debug(f"Cookie reject selector '{selector}' not found or failed: {e}")
+                        pass # Try the next selector
+                if not cookie_banner_found:
+                     logging.info("No common cookie reject button found or clicked.")
+                # ---------------------------------
+                html_content = page.content()
+                # Take FULL page screenshot before closing
+                if should_screenshot:
+                    screenshot_bytes = page.screenshot(full_page=True) # Added full_page=True
+            finally:
+                browser.close()
+    except Exception as e:
+        logging.error(f"Playwright error accessing {url}: {e}")
+        return {"url": url, "error": f"Playwright error: {e}"}
+    if html_content:
+        try:
+            extracted_text = trafilatura.extract(html_content, include_comments=False)
+        except Exception as e:
+            logging.error(f"Trafilatura error processing {url}: {e}")
+            # Still return screenshot if Playwright succeeded
+            extracted_text = f"Trafilatura extraction failed: {e}"
+    screenshot_base64 = base64.b64encode(screenshot_bytes).decode('utf-8') if screenshot_bytes else None
+    response_data = {
+        "url": url,
+        "text": extracted_text or "",
+    }
+    if screenshot_base64:
+        response_data["screenshot_base64"] = screenshot_base64
+    return response_data
+@app.route(route="scrape") # Changed route name
+def http_scrape_trigger(req: func.HttpRequest) -> func.HttpResponse:
+    logging.info('Python HTTP scrape trigger function processed a request.')
+    url = None
+    take_screenshot = True # Default value
+    # 1. Try getting parameters from query string first
+    try:
+        url = req.params.get('url')
+        if url:
+            logging.info(f"Found URL in query parameters: {url}")
+            # Handle take_screenshot from query params
+            ss_param = req.params.get('take_screenshot', 'true') # Query params are strings
+            take_screenshot = ss_param.lower() != 'false'
+        else:
+             logging.info("URL not found in query parameters.")
+    except Exception as e:
+        # This shouldn't generally happen with req.params, but good practice
+        logging.warning(f"Error reading query parameters: {e}")
+        url = None # Ensure url is None if error occurs here
+    # 2. If URL not found in query, try getting from JSON body
+    if not url:
+        logging.info("Attempting to read URL from JSON body.")
+        try:
+            req_body = req.get_json()
+            if req_body:
+                url = req_body.get('url')
+                if url:
+                    logging.info(f"Found URL in JSON body: {url}")
+                    # Handle take_screenshot from JSON body
+                    ss_param = req_body.get('take_screenshot', True)
+                    if isinstance(ss_param, str):
+                        take_screenshot = ss_param.lower() != 'false'
+                    else:
+                        take_screenshot = bool(ss_param) # Convert other types
+                    logging.info(f"Screenshot parameter from JSON: {take_screenshot}")
+                else:
+                    logging.info("URL key not found in JSON body.")
+            else:
+                logging.info("JSON body is empty.")
+        except ValueError:
+            logging.info("Request body is not valid JSON or missing.")
+            # url remains None
+        except Exception as e:
+            logging.warning(f"Error reading JSON body: {e}")
+            url = None # Ensure url is None if error occurs here
+    # 3. Process the request if URL was found
+    if url:
+        try:
+            # Validate URL basic structure (optional but recommended)
+            if not url.startswith(('http://', 'https://')):
+                 raise ValueError("Invalid URL format. Must start with http:// or https://")
+            result_data = scrape_and_screenshot(url, should_screenshot=take_screenshot) # Pass the flag
+            return func.HttpResponse(
+                json.dumps(result_data),
+                mimetype="application/json",
+                status_code=200
+            )
+        except ValueError as ve:
+             logging.error(f"Invalid URL provided: {ve}")
+             return func.HttpResponse(
+                  json.dumps({"error": str(ve)}),
+                  mimetype="application/json",
+                  status_code=400
+             )
+        except Exception as e:
+            logging.error(f"Error processing scrape request for {url}: {e}")
+            return func.HttpResponse(
+                 json.dumps({"error": f"An internal error occurred: {e}"}),
+                 mimetype="application/json",
+                 status_code=500
+            )
+    else:
+        logging.warning("URL not provided in request body or query string.")
+        return func.HttpResponse(
+             json.dumps({"error": "Please pass a 'url' in the JSON request body or query string"}),
+             mimetype="application/json",
+             status_code=400
+        )
+# Keep this if you might have other triggers, otherwise it can be removed
+# if the scrape trigger is the only one.
+# Example of another potential trigger (e.g., timer)
+# @app.timer_trigger(schedule="0 */5 * * * *", arg_name="myTimer", run_on_startup=True,
+#                    use_monitor=False)
+# def timer_trigger_handler(myTimer: func.TimerRequest) -> None:
+#     if myTimer.past_due:
+#         logging.info('The timer is past due!')
+#     logging.info('Python timer trigger function executed.')

package/helper-apps/cortex-browser/host.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "version": "2.0",
+  "logging": {
+    "applicationInsights": {
+      "samplingSettings": {
+        "isEnabled": true,
+        "excludedTypes": "Request"
+      }
+    }
+  },
+  "extensionBundle": {
+    "id": "Microsoft.Azure.Functions.ExtensionBundle",
+    "version": "[4.*, 5.0.0)"
+  }
+}

package/helper-apps/cortex-browser/requirements.txt ADDED Viewed

@@ -0,0 +1,24 @@
+azure-functions==1.23.0
+babel==2.17.0
+certifi==2025.4.26
+charset-normalizer==3.4.2
+courlan==1.3.2
+dateparser==1.2.1
+greenlet==3.2.1
+htmldate==1.9.3
+jusText==3.0.2
+lxml==5.4.0
+lxml_html_clean==0.4.2
+MarkupSafe==3.0.2
+playwright==1.52.0
+pyee==13.0.0
+python-dateutil==2.9.0.post0
+pytz==2025.2
+regex==2024.11.6
+six==1.17.0
+tld==0.13
+trafilatura==2.0.0
+typing_extensions==4.13.2
+tzlocal==5.3.1
+urllib3==2.4.0
+Werkzeug==3.1.3

package/lib/requestExecutor.js CHANGED Viewed

@@ -195,7 +195,12 @@ const requestWithMonitor = async (endpoint, url, data, axiosConfigObj) => {
         }
     } catch (error) {
         // throw new error with duration as part of the error data
-        throw { ...error, duration: endpoint?.monitor?.incrementErrorCount(callId, error?.response?.status || null) };
+        const { code, name } = error;
+        const finalStatus = error?.response?.status ?? error?.status
+        const statusText = error?.response?.statusText ?? error?.statusText
+        const errorMessage = error?.response?.data?.message ?? error?.response?.data?.error?.message ?? error?.message ?? String(error);
+        throw { code, message: errorMessage, status: finalStatus, statusText, name, duration: endpoint?.monitor?.incrementErrorCount(callId, finalStatus) };
     }
     let duration;
     if (response.status >= 200 && response.status < 300) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aj-archipelago/cortex",
-  "version": "1.3.41",
+  "version": "1.3.42",
   "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
   "private": false,
   "repository": {

package/pathways/system/entity/sys_entity_agent.js CHANGED Viewed

@@ -27,6 +27,7 @@ export default {
         codeRequestId: ``,
         skipCallbackMessage: false,
         entityId: ``,
+        researchMode: false,
         model: 'oai-gpt41'
     },
     timeout: 600,
@@ -179,7 +180,7 @@ export default {
         let pathwayResolver = resolver;
         // Load input parameters and information into args
-        const { entityId, voiceResponse, aiMemorySelfModify, chatId } = { ...pathwayResolver.pathway.inputParameters, ...args };
+        const { entityId, voiceResponse, aiMemorySelfModify, chatId, researchMode } = { ...pathwayResolver.pathway.inputParameters, ...args };
         const entityConfig = loadEntityConfig(entityId);
         const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
@@ -199,18 +200,21 @@ export default {
             entityInstructions,
             voiceResponse,
             aiMemorySelfModify,
-            chatId
+            chatId,
+            researchMode
         };
         pathwayResolver.args = {...args};
+        const promptPrefix = researchMode ? 'Formatting re-enabled\n' : '';
         const memoryTemplates = entityUseMemory ?
             `{{renderTemplate AI_MEMORY}}\n\n{{renderTemplate AI_MEMORY_INSTRUCTIONS}}\n\n` : '';
         const instructionTemplates = entityInstructions ? (entityInstructions + '\n\n') : `{{renderTemplate AI_EXPERTISE}}\n\n{{renderTemplate AI_COMMON_INSTRUCTIONS}}\n\n`;
         const promptMessages = [
-            {"role": "system", "content": `${memoryTemplates}${instructionTemplates}{{renderTemplate AI_TOOLS}}\n\n{{renderTemplate AI_GROUNDING_INSTRUCTIONS}}\n\n{{renderTemplate AI_DATETIME}}`},
+            {"role": "system", "content": `${promptPrefix}${memoryTemplates}${instructionTemplates}{{renderTemplate AI_TOOLS}}\n\n{{renderTemplate AI_GROUNDING_INSTRUCTIONS}}\n\n{{renderTemplate AI_DATETIME}}`},
             "{{chatHistory}}",
         ];
@@ -218,11 +222,6 @@ export default {
             new Prompt({ messages: promptMessages }),
         ];
-        // if the model has been overridden, make sure to use it
-        if (pathwayResolver.modelName) {
-            pathwayResolver.args.model = pathwayResolver.modelName;
-        }
         // set the style model if applicable
         const { aiStyle, AI_STYLE_ANTHROPIC, AI_STYLE_OPENAI } = args;
         const styleModel = aiStyle === "Anthropic" ? AI_STYLE_ANTHROPIC : AI_STYLE_OPENAI;

package/pathways/system/entity/tools/sys_tool_bing_search.js CHANGED Viewed

@@ -55,7 +55,14 @@ export default {
             // Call the Bing search pathway
             const response = await callPathway('bing', {
                 ...args
-            });
+            }, resolver);
+            if (resolver.errors && resolver.errors.length > 0) {
+                const errorMessages = Array.isArray(resolver.errors)
+                    ? resolver.errors.map(err => err.message || err)
+                    : [resolver.errors.message || resolver.errors];
+                return JSON.stringify({ _type: "SearchError", value: errorMessages });
+            }
             const parsedResponse = JSON.parse(response);
             const results = [];

package/server/modelExecutor.js CHANGED Viewed

@@ -1,5 +1,6 @@
 // ModelExecutor.js
 import CortexRequest from '../lib/cortexRequest.js';
+import logger from '../lib/logger.js';
 import OpenAIChatPlugin from './plugins/openAiChatPlugin.js';
 import OpenAICompletionPlugin from './plugins/openAiCompletionPlugin.js';
@@ -125,7 +126,14 @@ class ModelExecutor {
     async execute(text, parameters, prompt, pathwayResolver) {
         const cortexRequest = new CortexRequest({ pathwayResolver });
-        return await this.plugin.execute(text, parameters, prompt, cortexRequest);
+        try {
+            return await this.plugin.execute(text, parameters, prompt, cortexRequest);
+        } catch (error) {
+            logger.error(`Error executing model plugin for pathway ${pathwayResolver?.pathway?.name}: ${error.message}`);
+            logger.debug(error.stack);
+            pathwayResolver.errors.push(error.message);
+            return null;
+        }
     }
 }

package/server/plugins/modelPlugin.js CHANGED Viewed

@@ -565,7 +565,10 @@ class ModelPlugin {
             return parsedData;
         } catch (error) {
             // Log the error and continue
-            const errorMessage = `${error?.response?.data?.message || error?.response?.data?.error?.message || error?.message || error}`;
+            const errorMessage = error?.response?.data?.message
+                                 ?? error?.response?.data?.error?.message
+                                 ?? error?.message
+                                 ?? String(error); // Fallback to string representation
             logger.error(`Error in executeRequest for ${this.pathwayName}: ${errorMessage}`);
             if (error.data) {
                 logger.error(`Additional error data: ${JSON.stringify(error.data)}`);

package/server/plugins/openAiVisionPlugin.js CHANGED Viewed

@@ -41,12 +41,15 @@ class OpenAIVisionPlugin extends OpenAIChatPlugin {
                                 return { type: 'text', text: parsedItem };
                             }
-                            if (typeof parsedItem === 'object' && parsedItem !== null && parsedItem.type === 'image_url') {
-                                const url = parsedItem.url || parsedItem.image_url?.url;
-                                if (url && await this.validateImageUrl(url)) {
-                                    return {type: parsedItem.type, image_url: {url}};
+                            if (typeof parsedItem === 'object' && parsedItem !== null) {
+                                // Handle both 'image' and 'image_url' types
+                                if (parsedItem.type === 'image' || parsedItem.type === 'image_url') {
+                                    const url = parsedItem.image_url?.url || parsedItem.url;
+                                    if (url && await this.validateImageUrl(url)) {
+                                        return { type: 'image_url', image_url: { url } };
+                                    }
+                                    return { type: 'text', text: typeof item === 'string' ? item : JSON.stringify(item) };
                                 }
-                                return { type: 'text', text: typeof item === 'string' ? item : JSON.stringify(item) };
                             }
                             return parsedItem;

package/server/plugins/openAiWhisperPlugin.js CHANGED Viewed

@@ -90,6 +90,9 @@ class OpenAIWhisperPlugin extends ModelPlugin {
                 sendProgress(true, true);
                 try {
                     res = await this.executeRequest(cortexRequest);
+                    if (!res) {
+                        throw new Error('Received null or empty response');
+                    }
                     if(res?.statusCode && res?.statusCode >= 400){
                         throw new Error(res?.message || 'An error occurred.');
                     }
@@ -107,6 +110,10 @@ class OpenAIWhisperPlugin extends ModelPlugin {
             if(!wordTimestamped && !responseFormat){
                 //if no response format, convert to text
+                if (!res) {
+                    logger.warn("Received null or empty response from timestamped API when expecting SRT/VTT format. Returning empty string.");
+                    return "";
+                }
                 return convertSrtToText(res);
             }
             return res;