npm - @aj-archipelago/cortex - Versions diffs - 1.4.2 → 1.4.3 - Mend

@aj-archipelago/cortex 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/helper-apps/cortex-autogen2/tests/test_cases.yaml ADDED Viewed

@@ -0,0 +1,279 @@
+# Cortex AutoGen2 Test Cases
+# Predefined test tasks for automated quality testing
+test_cases:
+  - id: tc001_pokemon_pptx
+    name: "Most Powerful Gen 1 Pokemon PowerPoint"
+    description: "Creates a PowerPoint about the most powerful Gen 1 Pokemon with their images"
+    task: "Create a PowerPoint presentation about the Most Powerful Gen 1 Pokemon. Include images of each Pokemon from the same source."
+    timeout_seconds: 420
+    requires_ajsql: false
+    expected_deliverables:
+      - type: pptx
+        pattern: "*.pptx"
+        min_count: 1
+        description: "PowerPoint presentation file"
+      - type: preview_images
+        pattern: "preview_slide_*.png"
+        min_count: 2
+        description: "Slide preview images"
+      - type: images
+        pattern: "*.png"
+        min_count: 10
+        description: "Pokemon images used in presentation"
+    min_progress_updates: 8
+    quality_criteria:
+      - "CRITICAL: Presentation MUST include actual Pokemon character images (NOT just logos or text)"
+      - "CRITICAL: Each slide about a Pokemon MUST show THAT SPECIFIC Pokemon's image (e.g., Gengar slide shows Gengar, NOT Pikachu or any other Pokemon)"
+      - "CRITICAL: NO REUSING THE SAME IMAGE on multiple slides - each Pokemon needs its own unique character image"
+      - "CRITICAL: Verify in preview images that each Pokemon name matches its image (e.g., if slide says 'Alakazam', the image must show Alakazam)"
+      - "Each Pokemon mentioned MUST have its corresponding character image on the slide"
+      - "All Pokemon images from SAME consistent source (pokemon.com, bulbapedia.net, or pokemondb.net)"
+      - "Images show the actual Pokemon characters (Mewtwo, Dragonite, Alakazam, Gengar, etc.) - NOT generic Pokemon logos"
+      - "Images are high-quality official artwork or game sprites (NO fan art, NO thumbnails)"
+      - "NO watermarked images unless from official Pokemon source"
+      - "Consistent art style throughout (all game sprites OR all official artwork)"
+      - "At least 10-12 DIFFERENT Gen 1 Pokemon featured, each with their OWN unique character image"
+      - "Professional slide design with Pokemon-themed colors (red, blue, yellow)"
+      - "CRITICAL: Images must NOT cover or overlap text - all text must be readable"
+      - "Images positioned properly beside or above text, never covering content"
+      - "Each slide shows: Pokemon name, stats/power info, AND that Pokemon's specific character image"
+      - "Images properly sized and centered (not too small, easily visible)"
+      - "Preview slides generated showing Pokemon images are visible and MATCH the Pokemon names"
+      - "Gen 1 Pokemon ONLY (original 151, including evolutions)"
+      - "Power ranking or stats included (HP, Attack, Special, etc.)"
+    expected_agents:
+      - planner_agent
+      - web_search_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc002_pdf_with_images
+    name: "PDF Report with Images and Charts"
+    description: "Generates a PDF report with images and charts"
+    task: "Generate a PDF report about renewable energy trends in 2026."
+    timeout_seconds: 300
+    requires_ajsql: false
+    expected_deliverables:
+      - type: pdf
+        pattern: "*.pdf"
+        min_count: 1
+        description: "PDF report file"
+      - type: images
+        pattern: "*.png"
+        min_count: 5
+        description: "Images and charts included in report"
+    min_progress_updates: 6
+    quality_criteria:
+      - "PDF contains both text content and images"
+      - "Charts and graphs are professionally designed"
+      - "Real data used, no placeholder or dummy content"
+      - "Proper document formatting with headers and page numbers"
+      - "Images are relevant to renewable energy topic"
+      - "Preview images or thumbnails provided"
+    expected_agents:
+      - planner_agent
+      - web_search_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc003_random_csv
+    name: "Random Sales Data CSV Generation"
+    description: "Generates random sales data CSV with summary statistics"
+    task: "Generate a CSV with 100 rows of random sales data and a summary CSV with statistics."
+    timeout_seconds: 180
+    requires_ajsql: false
+    expected_deliverables:
+      - type: csv
+        pattern: "*sales*.csv"
+        min_count: 1
+        description: "Main sales data CSV"
+      - type: csv
+        pattern: "*summary*.csv"
+        min_count: 1
+        description: "Summary statistics CSV"
+    min_progress_updates: 3
+    quality_criteria:
+      - "Main CSV contains exactly 100 rows of sales data"
+      - "Dates span the last 90 days as specified"
+      - "Realistic product names and prices (no generic 'Product1', 'Product2')"
+      - "Summary statistics calculated correctly from the main data"
+      - "Proper CSV formatting (headers, no missing values)"
+      - "Files uploaded with SAS URLs provided"
+    expected_agents:
+      - planner_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc004_aje_aja_comparison
+    name: "AJE vs AJA Daily Article Count Comparison"
+    description: "Compares daily article counts between AJE and AJA"
+    task: "Compare daily article counts for AJE and AJA from the last 30 days. Give me a chart and CSV."
+    timeout_seconds: 300
+    requires_ajsql: true
+    expected_deliverables:
+      - type: chart
+        pattern: "*.png"
+        min_count: 1
+        description: "Comparison chart showing AJE vs AJA daily counts"
+      - type: csv
+        pattern: "*.csv"
+        min_count: 1
+        description: "Raw data CSV with daily counts"
+    min_progress_updates: 5
+    quality_criteria:
+      - "Data queried from UCMS AJE and AJA databases"
+      - "Exactly 30 days of data (excluding today)"
+      - "Chart clearly shows both AJE and AJA trends"
+      - "CSV contains date, aje_count, aja_count columns"
+      - "No missing dates in the 30-day period"
+      - "Professional chart with legend, labels, and title"
+    expected_agents:
+      - planner_agent
+      - aj_sql_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc005_aje_trump_trend
+    name: "AJE Trump Headlines - 6 Month Trend Analysis"
+    description: "Analyzes Trump headline trends in AJE"
+    task: "Plot Trump headline percentage trends for AJE over the last 6 months by week. Give me a chart and CSV."
+    timeout_seconds: 360
+    requires_ajsql: true
+    expected_deliverables:
+      - type: chart
+        pattern: "*.png"
+        min_count: 1
+        description: "Weekly trend chart with 3 metrics"
+      - type: csv
+        pattern: "*.csv"
+        min_count: 1
+        description: "Weekly data CSV"
+    min_progress_updates: 6
+    quality_criteria:
+      - "Data covers full 6 months from UCMS AJE database"
+      - "Chart shows 3 lines: trump count, total count, and % trump"
+      - "Data aggregated by week (ISO weeks recommended)"
+      - "CSV contains columns: week, trump_count, total_count, percent_trump"
+      - "Case-insensitive Trump matching in headlines"
+      - "Professional multi-line chart with legend and axis labels"
+      - "All weeks in 6-month period represented"
+    expected_agents:
+      - planner_agent
+      - aj_sql_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc006_aje_trump_daily
+    name: "AJE Trump Headlines - Last Month Daily Chart"
+    description: "Daily Trump headline chart for AJE"
+    task: "Chart Trump headlines from AJE for the last month by day. Give me the chart and CSV with the headlines."
+    timeout_seconds: 300
+    requires_ajsql: true
+    expected_deliverables:
+      - type: chart
+        pattern: "*.png"
+        min_count: 1
+        description: "Daily Trump headline count chart"
+      - type: csv
+        pattern: "*headlines*.csv"
+        min_count: 1
+        description: "All Trump headlines with dates"
+      - type: csv
+        pattern: "*daily*.csv"
+        min_count: 1
+        description: "Daily count summary"
+    min_progress_updates: 5
+    quality_criteria:
+      - "Headlines queried from UCMS AJE wp_posts table"
+      - "Last 30 days of data"
+      - "Chart shows daily Trump headline counts"
+      - "CSV includes actual headline text, not just counts"
+      - "Headlines CSV has columns: date, headline (minimum)"
+      - "Daily summary CSV shows Trump count per day"
+      - "Case-insensitive Trump matching"
+      - "All data properly dated and sorted"
+    expected_agents:
+      - planner_agent
+      - aj_sql_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc007_aja_aje_wordclouds
+    name: "AJA & AJE Last Month Word Clouds"
+    description: "Generate word clouds from AJA and AJE headlines"
+    task: "Generate word clouds from AJA and AJE headlines for the last month. Give me word frequency CSVs too."
+    timeout_seconds: 360
+    requires_ajsql: true
+    expected_deliverables:
+      - type: wordcloud
+        pattern: "*aja*.png"
+        min_count: 1
+        description: "AJA word cloud visualization"
+      - type: wordcloud
+        pattern: "*aje*.png"
+        min_count: 1
+        description: "AJE word cloud visualization"
+      - type: csv
+        pattern: "*aja*freq*.csv"
+        min_count: 1
+        description: "AJA word frequency data"
+      - type: csv
+        pattern: "*aje*freq*.csv"
+        min_count: 1
+        description: "AJE word frequency data"
+    min_progress_updates: 6
+    quality_criteria:
+      - "Headlines queried from UCMS databases (both AJA and AJE)"
+      - "Data covers last 30 days"
+      - "Two separate word clouds generated (one for AJA, one for AJE)"
+      - "Word clouds are visually clear and professional"
+      - "Common/stop words filtered out (the, a, an, etc.)"
+      - "CSV files contain word frequency data for both outlets"
+      - "All files uploaded with SAS URLs"
+      - "Word clouds show meaningful topic differences between outlets"
+    expected_agents:
+      - planner_agent
+      - aj_sql_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent
+  - id: tc008_aje_aja_override_test
+    name: "AJE vs AJA Comparison - Override User Instructions Test"
+    description: "Tests NON-NEGOTIABLE OVERRIDE: user explicitly forbids other agents but system must route to aj_sql_agent"
+    task: "Compare daily article counts for Al Jazeera Arabic (AJA) and Al Jazeera English (AJE) from the last 60 days, do not use any other tools do not use any other agents use only coding agent"
+    timeout_seconds: 420
+    requires_ajsql: true
+    expected_deliverables:
+      - type: chart
+        pattern: "*.png"
+        min_count: 1
+        description: "Comparison chart showing AJE vs AJA daily counts"
+      - type: csv
+        pattern: "*.csv"
+        min_count: 1
+        description: "Raw data CSV with daily counts"
+    min_progress_updates: 5
+    quality_criteria:
+      - "CRITICAL: System MUST route to aj_sql_agent despite user's explicit 'do not use any other agents' instruction"
+      - "CRITICAL: NON-NEGOTIABLE OVERRIDE must work - aj_sql_agent called FIRST before coder_agent"
+      - "Data queried from UCMS AJE and AJA databases (proves override worked)"
+      - "Exactly 60 days of data (excluding today)"
+      - "Chart clearly shows both AJE and AJA trends"
+      - "CSV contains date, aje_count, aja_count columns"
+      - "No missing dates in the 60-day period"
+      - "Professional chart with legend, labels, and title"
+      - "CRITICAL: Test fails if coder_agent attempts direct DB access or if aj_sql_agent is never called"
+    expected_agents:
+      - planner_agent
+      - aj_sql_agent
+      - coder_agent
+      - code_executor
+      - presenter_agent

package/helper-apps/cortex-autogen2/tests/test_data.db ADDED Viewed

File without changes

package/helper-apps/cortex-autogen2/tests/utils/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+Utility functions for testing.
+"""

package/helper-apps/cortex-autogen2/tests/utils/connectivity.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""
+Connectivity checkers for external services.
+"""
+import os
+import logging
+from typing import Tuple
+logger = logging.getLogger(__name__)
+def check_ajsql_connectivity() -> Tuple[bool, str]:
+    """
+    Check if AJ SQL database is accessible from current IP.
+    Returns:
+        Tuple of (is_accessible, message)
+    """
+    mysql_url = os.getenv("AJ_MYSQL_URL")
+    if not mysql_url:
+        return False, "AJ_MYSQL_URL environment variable not set"
+    try:
+        import pymysql
+        from urllib.parse import unquote
+    except ImportError:
+        return False, "pymysql library not installed"
+    try:
+        # Parse MySQL URL
+        # Format: mysql://user:password@host:port/database or mysql+pymysql://...
+        if mysql_url.startswith("mysql+pymysql://"):
+            url_parts = mysql_url[16:]  # Remove mysql+pymysql://
+        elif mysql_url.startswith("mysql://"):
+            url_parts = mysql_url[8:]  # Remove mysql://
+        else:
+            return False, "Invalid AJ_MYSQL_URL format (must start with mysql:// or mysql+pymysql://)"
+        # Split user:password@host:port/database
+        if "@" in url_parts:
+            auth_part, host_part = url_parts.split("@", 1)
+            user, password = auth_part.split(":", 1) if ":" in auth_part else (auth_part, "")
+            # URL-decode username and password (handles special characters like @ encoded as %40)
+            user = unquote(user)
+            password = unquote(password)
+        else:
+            return False, "Invalid AJ_MYSQL_URL format (missing credentials)"
+        # Split host:port/database (database is optional)
+        if "/" in host_part:
+            host_port, database = host_part.split("/", 1)
+            # Database can be empty (for multi-database access)
+            if not database:
+                database = None
+        else:
+            host_port = host_part
+            database = None
+        # Split host:port
+        if ":" in host_port:
+            host, port = host_port.rsplit(":", 1)
+            port = int(port)
+        else:
+            host = host_port
+            port = 3306
+        # Try to connect with a short timeout
+        logger.info(f"Testing AJ SQL connectivity to {host}:{port}")
+        # Build connection params
+        connect_params = {
+            'host': host,
+            'port': port,
+            'user': user,
+            'password': password,
+            'connect_timeout': 5,
+            'read_timeout': 5,
+            'write_timeout': 5,
+            'ssl': {'ssl': True}
+        }
+        # Only include database if specified
+        if database:
+            connect_params['database'] = database
+        connection = pymysql.connect(**connect_params)
+        # Run a simple query to verify access
+        with connection.cursor() as cursor:
+            cursor.execute("SELECT 1")
+            cursor.fetchone()
+        connection.close()
+        logger.info(f"✅ AJ SQL database is accessible")
+        return True, "Database is accessible"
+    except pymysql.err.OperationalError as e:
+        error_msg = str(e)
+        if "Access denied" in error_msg:
+            logger.warning(f"⚠️ AJ SQL access denied: {error_msg}")
+            return False, f"Access denied: {error_msg}"
+        elif "Can't connect" in error_msg or "timed out" in error_msg:
+            logger.warning(f"⚠️ AJ SQL connection failed (IP restriction?): {error_msg}")
+            return False, f"Connection failed (likely IP restriction): {error_msg}"
+        else:
+            logger.warning(f"⚠️ AJ SQL operational error: {error_msg}")
+            return False, f"Database error: {error_msg}"
+    except Exception as e:
+        logger.warning(f"⚠️ AJ SQL connectivity check failed: {e}")
+        return False, f"Unexpected error: {str(e)}"

package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py CHANGED Viewed

@@ -9,6 +9,8 @@ import mimetypes
 import uuid
 import time
 import hashlib
+import re
+import unicodedata
 from datetime import datetime, timedelta
 from urllib.parse import urlparse, parse_qs
 from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions, ContentSettings
@@ -17,6 +19,27 @@ import requests
 logger = logging.getLogger(__name__)
+def _sanitize_blob_name(filename: str) -> str:
+    """
+    Sanitize filename to be Azure Blob Storage safe.
+    Removes special characters and converts to ASCII-safe format.
+    """
+    # Normalize unicode characters (e.g., é -> e)
+    normalized = unicodedata.normalize('NFKD', filename)
+    # Remove accents/diacritics
+    ascii_str = normalized.encode('ascii', 'ignore').decode('ascii')
+    # Replace any remaining problematic characters with underscore
+    # Keep only: alphanumeric, dots, dashes, underscores
+    safe_name = re.sub(r'[^a-zA-Z0-9._-]', '_', ascii_str)
+    # Remove consecutive underscores
+    safe_name = re.sub(r'_+', '_', safe_name)
+    # Remove leading/trailing underscores or dots
+    safe_name = safe_name.strip('_.')
+    # Prevent empty filename (e.g., if all chars were special)
+    if not safe_name:
+        return "file"
+    return safe_name
 # Ensure correct MIME types for Office files, especially PPT/PPTX, for proper downloads in browsers
 try:
     mimetypes.add_type("application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx", strict=False)
@@ -112,20 +135,42 @@ class AzureBlobUploader:
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
+        # Determine if we should preserve the exact filename or add timestamp/UUID
+        preserve = (os.getenv("PRESERVE_BLOB_FILENAME", "false").lower() in ("1", "true", "yes"))
+        prefix = (os.getenv("AZURE_BLOB_PREFIX") or "").strip().strip("/")
         if blob_name is None:
+            # Use original filename from file_path
             original_base = os.path.basename(file_path)
             name, ext = os.path.splitext(original_base)
-            # Prefix support for virtual folders
-            prefix = (os.getenv("AZURE_BLOB_PREFIX") or "").strip().strip("/")
-            # Decide uniqueness policy: default add timestamp+short id to avoid static overwrites
-            preserve = (os.getenv("PRESERVE_BLOB_FILENAME", "false").lower() in ("1", "true", "yes"))
-            if preserve:
-                final_name = original_base
-            else:
-                timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
-                short_id = uuid.uuid4().hex[:8]
-                final_name = f"{name}__{timestamp}_{short_id}{ext}"
-            blob_name = f"{prefix}/{final_name}" if prefix else final_name
+        else:
+            # Use provided blob_name (might have path components)
+            # Extract just the filename part
+            blob_base = os.path.basename(blob_name)
+            name, ext = os.path.splitext(blob_base)
+            # Keep any directory prefix from blob_name
+            blob_dir = os.path.dirname(blob_name).strip('/')
+            if blob_dir:
+                prefix = f"{prefix}/{blob_dir}" if prefix else blob_dir
+        # Sanitize filename to be Azure Blob safe (remove special chars like é, ñ, etc.)
+        name = _sanitize_blob_name(name)
+        # Extension already starts with dot, just sanitize the part after the dot
+        if ext:
+            ext_without_dot = ext.lstrip('.')
+            sanitized_ext = _sanitize_blob_name(ext_without_dot)
+            ext = f".{sanitized_ext}" if sanitized_ext else ext
+        # Add timestamp+UUID suffix unless preserve flag is set
+        if preserve:
+            final_name = f"{name}{ext}"
+        else:
+            timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
+            short_id = uuid.uuid4().hex[:8]
+            final_name = f"{name}__{timestamp}_{short_id}{ext}"
+        # Construct final blob_name with prefix if provided
+        blob_name = f"{prefix}/{final_name}" if prefix else final_name
         # Normalize any accidental leading slashes in blob path
         normalized_blob_name = blob_name.lstrip("/")
@@ -186,20 +231,25 @@ class AzureBlobUploader:
 # Keep a single function for external calls to use the singleton uploader
 def upload_file_to_azure_blob(file_path: str, blob_name: str = None) -> str:
     """
-    Uploads a file to Azure Blob Storage and returns a JSON string with the download URL.
-    This function uses the singleton AzureBlobUploader instance.
-    Reference local files in absolute path.
+    Uploads a file to Azure Blob Storage with automatic retry on transient failures.
+    Returns a JSON string with the download URL.
     """
-    try:
-        uploader = AzureBlobUploader()
-        result = uploader.upload_file(file_path, blob_name)
-        logger.info(f"✅ Successfully uploaded and got SAS URL for {file_path}")
-        return json.dumps(result)
-    except Exception as e:
-        logger.error(f"❌ Failed to upload {file_path}. Error: {e}", exc_info=True)
-        return json.dumps({"error": str(e)})
+    max_attempts = 3
+    retry_delay = 3
+    for attempt in range(1, max_attempts + 1):
+        try:
+            uploader = AzureBlobUploader()
+            result = uploader.upload_file(file_path, blob_name)
+            logger.info(f"✅ Successfully uploaded {file_path} (attempt {attempt}/{max_attempts})")
+            return json.dumps(result)
+        except Exception as e:
+            if attempt < max_attempts:
+                logger.warning(f"⚠️ Upload attempt {attempt}/{max_attempts} failed for {file_path}: {e}. Retrying in {retry_delay}s...")
+                time.sleep(retry_delay)
+            else:
+                logger.error(f"❌ Upload failed after {max_attempts} attempts for {file_path}: {e}", exc_info=True)
+                return json.dumps({"error": str(e)})
 # This function is no longer needed as the class handles text uploads if necessary,
 # and direct calls should go through the singleton.

package/helper-apps/cortex-autogen2/tools/entity_api_registry.json ADDED Viewed

@@ -0,0 +1,38 @@
+{
+  "pokemon": {
+    "name": "PokeAPI",
+    "description": "Official Pokemon data API with sprites and artwork",
+    "url_pattern": "https://pokeapi.co/api/v2/pokemon/{entity}",
+    "entity_transform": "lowercase",
+    "image_fields": [
+      "sprites.other.official-artwork.front_default",
+      "sprites.front_default"
+    ],
+    "fallback_search_query": "{entity} pokemon official artwork",
+    "enabled": true
+  },
+  "country": {
+    "name": "REST Countries",
+    "description": "Country data including flags",
+    "url_pattern": "https://restcountries.com/v3.1/name/{entity}",
+    "entity_transform": "none",
+    "image_fields": [
+      "[0].flags.png",
+      "[0].flags.svg"
+    ],
+    "fallback_search_query": "{entity} country flag",
+    "enabled": true
+  },
+  "movie": {
+    "name": "OMDB API",
+    "description": "Movie database with posters (requires API key in OMDB_API_KEY env var)",
+    "url_pattern": "http://www.omdbapi.com/?apikey={OMDB_API_KEY}&t={entity}",
+    "entity_transform": "none",
+    "image_fields": [
+      "Poster"
+    ],
+    "fallback_search_query": "{entity} movie poster",
+    "enabled": false,
+    "requires_env": ["OMDB_API_KEY"]
+  }
+}

package/helper-apps/cortex-autogen2/tools/file_tools.py CHANGED Viewed

@@ -213,7 +213,7 @@ async def download_image(url: str, filename: str, work_dir: Optional[str] = None
             "User-Agent": BROWSER_UA,
             "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
             "Accept-Language": "en-US,en;q=0.9",
-            "Referer": "https://duckduckgo.com/",
+            "Referer": "https://www.google.com/",
             "Cache-Control": "no-cache",
         })