npm - @aj-archipelago/cortex - Versions diffs - 1.4.2 → 1.4.4 - Mend

@aj-archipelago/cortex 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/README.md CHANGED Viewed

@@ -540,6 +540,7 @@ Models are configured in the `models` section of the config. Each model can have
 - `GEMINI-1.5-CHAT`: For Gemini 1.5 Pro chat models
 - `GEMINI-1.5-VISION`: For Gemini vision models (including 2.0 Flash experimental)
 - `CLAUDE-3-VERTEX`: For Claude-3 and 3.5 models (Haiku, Opus, Sonnet)
+- `CLAUDE-4-VERTEX`: For Claude-4 models (Sonnet 4, Sonnet 4.5, Opus 4.1, Haiku 4.5) with enhanced support for PDFs and text files
 - `GROK-VISION`: For XAI Grok models (Grok-3, Grok-4, fast-reasoning, code-fast) with multimodal/vision and reasoning
 - `AZURE-TRANSLATE`: For Azure translation services

package/config.js CHANGED Viewed

@@ -528,7 +528,7 @@ var config = convict({
                 "supportsStreaming": true
             },
             "claude-4-sonnet-vertex": {
-                "type": "CLAUDE-3-VERTEX",
+                "type": "CLAUDE-4-VERTEX",
                 "url": "{{claudeVertexUrl}}",
                 "headers": {
                     "Content-Type": "application/json"

package/helper-apps/cortex-autogen2/.dockerignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ .venv

package/helper-apps/cortex-autogen2/Dockerfile CHANGED Viewed

@@ -23,10 +23,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
         fonts-noto-core fonts-noto-ui-core \
         fonts-noto-color-emoji fonts-noto-cjk \
         fonts-dejavu fonts-dejavu-core fonts-dejavu-extra \
-        fonts-freefont-ttf fonts-liberation2 \
-        # ─ Arabic fonts specifically for matplotlib/reportlab
-        fonts-arabeyes fonts-farsiweb fonts-kacst fonts-kacst-one \
-        fonts-hosny-amiri fonts-sil-scheherazade fonts-sil-lateef \
+        fonts-freefont-ttf fonts-liberation \
         # ─ Build chain for packages that still compile C/C++
         build-essential gcc g++ make \
         # ─ libmagic for `python-magic`
@@ -40,22 +37,21 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
         # ─ Computer-vision helpers sometimes needed by OpenCV
         libsm6 libxext6 libglib2.0-0 \
         # ─ HDF5 stack for `h5py`
-        libhdf5-serial-dev hdf5-tools \
+        libhdf5-dev hdf5-tools \
         # ─ Cairo / Pango / GDK-PixBuf for SVG->PNG rendering (CairoSVG optional deps)
         libcairo2 libcairo2-dev \
         libpango-1.0-0 libpangoft2-1.0-0 libpangocairo-1.0-0 \
         libgdk-pixbuf-2.0-0 libgdk-pixbuf2.0-bin \
-        # ─ GDAL for GIS formats (and Python bindings)
-        gdal-bin libgdal-dev python3-gdal \
         # ─ 7-Zip & RAR extractors (for patool / rarfile fall-back)
         unrar-free \
+        # ─ LibreOffice for PPTX → PDF conversion (slide previews)
+        libreoffice-core libreoffice-writer libreoffice-impress \
+        # ─ Poppler utilities for PDF → PNG conversion (pdf2image)
+        poppler-utils \
         # ─ Clean-up
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
-# Allow GDAL Python wheels to find headers
-ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
-ENV C_INCLUDE_PATH=/usr/include/gdal
 # ------------------------------------------------------------------------------
 # 3. Python dependencies

package/helper-apps/cortex-autogen2/Dockerfile.worker CHANGED Viewed

@@ -8,6 +8,8 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y \
     gcc \
     curl \
+    libreoffice-core libreoffice-writer libreoffice-impress \
+    poppler-utils \
     && rm -rf /var/lib/apt/lists/*
 # Install poetry

package/helper-apps/cortex-autogen2/agents.py CHANGED Viewed

@@ -8,6 +8,43 @@ from tools.azure_blob_tools import upload_file_to_azure_blob
 #AGENTS
 MAGENTIC_ONE_CODER_DESCRIPTION = "A helpful and general-purpose AI assistant that has strong language skills, Python skills, and Linux command line skills."
+def _safe_upload_wrapper(file_path: str, blob_name: str = None) -> str:
+    """
+    Wrapper around upload_file_to_azure_blob that ensures the JSON response is wrapped in markdown.
+    This prevents AutoGen from re-parsing the JSON and losing the 'type' field in message content.
+    """
+    result_json = upload_file_to_azure_blob(file_path, blob_name)
+    # CRITICAL: Wrap JSON in markdown code block to prevent AutoGen re-parsing
+    if result_json.strip().startswith('{'):
+        return f"```json\n{result_json}\n```"
+    return result_json
+def _wrap_json_result(result: str) -> str:
+    """
+    Generic wrapper for ANY tool that returns JSON strings.
+    Wraps JSON in markdown to prevent AutoGen from re-parsing and losing message structure.
+    CRITICAL PATTERN FOR ALL TOOL DEVELOPERS:
+    When a tool returns JSON (via json.dumps()), wrap it in markdown:
+        - return json.dumps({...})                    # ❌ AutoGen will re-parse
+        - return f"```json\n{json.dumps({...})}\n```" # ✅ AutoGen treats as text
+    Or use this wrapper:
+        - return _wrap_json_result(json.dumps({...}))
+    This prevents the "Missing required parameter: 'messages[X].content[0].type'" error
+    because AutoGen won't attempt to re-parse the JSON into a dict.
+    """
+    if not result:
+        return result
+    result_str = str(result).strip()
+    # Wrap any JSON-like string in markdown
+    if result_str.startswith(('{', '[')):
+        return f"```json\n{result_str}\n```"
+    return result
 MAGENTIC_ONE_CODER_SYSTEM_MESSAGE = """You are a helpful AI assistant.
 Solve tasks using your coding and language skills.
 In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
@@ -20,7 +57,7 @@ If the result indicates there is an error, fix the error and output the code aga
 When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible."""
-async def get_agents(default_model_client, big_model_client, small_model_client, request_work_dir: Optional[str] = None):
+async def get_agents(default_model_client, big_model_client, small_model_client, request_work_dir: Optional[str] = None, planner_learnings: Optional[str] = None, task_context: Optional[str] = None):
     # Resolve work dir (prefer per-request dir if provided or from env)
     work_dir = request_work_dir or os.getenv("CORTEX_WORK_DIR", "/home/site/wwwroot/coding")
@@ -39,7 +76,7 @@ async def get_agents(default_model_client, big_model_client, small_model_client,
     code_executor = LocalCommandLineCodeExecutor(work_dir=work_dir, timeout=300)
     #TOOLS
-    upload_file_to_cloud_tool = FunctionTool(upload_file_to_azure_blob, description="Upload files to the cloud. You must use absolute path to reference local files.")
+    upload_file_to_cloud_tool = FunctionTool(_safe_upload_wrapper, description="Upload files to the cloud. You must use absolute path to reference local files.")
     coder_agent = AssistantAgent(
         "coder_agent",
@@ -61,6 +98,144 @@ async def get_agents(default_model_client, big_model_client, small_model_client,
             Code executor working directory is: {work_dir}
             So you can only access files in this directory.
             Always use absolute path to reference files as current directory might be different from the one you think it is.
+            === POWERPOINT PRESENTATION CREATION (python-pptx) ===
+            **When creating .pptx presentations, follow these CRITICAL patterns:**
+            1. **Image Preprocessing (BEFORE adding to presentation):**
+               - Use PIL to convert WEBP, TIFF, and other unsupported formats to PNG
+               - Define supported formats: {'png', 'jpg', 'jpeg', 'gif', 'bmp'}
+               - For each unsupported image, open with PIL and save as PNG
+               - Handle RGBA images by converting to RGB with white background
+            2. **Build Presentation Structure:**
+               - Import: `from pptx import Presentation; from pptx.util import Inches, Pt`
+               - Create presentation: `prs = Presentation()`
+               - Set dimensions: `prs.slide_width = Inches(10); prs.slide_height = Inches(7.5)`
+               - Track slide count to validate before saving
+               - Use `prs.slide_layouts[6]` (blank) for maximum control over image placement
+            3. **Add Slides:**
+               - Title Slide: `slide = prs.slides.add_slide(prs.slide_layouts[0])`
+               - Content Slide: `slide = prs.slides.add_slide(prs.slide_layouts[1])`
+               - Image Slide: Use blank layout `prs.slide_layouts[6]` then `slide.shapes.add_picture(path, left, top, width, height)`
+               - Always wrap image operations in try/except and add text fallback if image fails
+            4. **Save with Validation:**
+               - ALWAYS check: `if len(prs.slides) == 0: raise ValueError("No slides!")`
+               - Save to CORTEX_WORK_DIR: `output_path = os.path.join(os.environ['CORTEX_WORK_DIR'], 'Title.pptx')`
+               - Use descriptive filename (NOT draft.pptx)
+               - After save, verify file size: `os.path.getsize(output_path) > 10000` (empty files are tiny)
+               - Print success: `print(f"📁 Ready for upload: {{output_path}}")`
+            **CRITICAL CHECKLIST:**
+            ✅ Image format validation and conversion BEFORE adding
+            ✅ Slide count > 0 before saving
+            ✅ Safe image dimensions (Inches(9) width for 10" slides)
+            ✅ Error handling with text fallbacks
+            ✅ File size validation after save
+            ✅ Use Inches() for measurements, NOT pixels
+            ✅ Explicit presentation dimensions
+            ✅ Proper output path in CORTEX_WORK_DIR
+            === CRITICAL: FILE AUTO-DISCOVERY & UPLOAD ===
+            After you save files to CORTEX_WORK_DIR, the system AUTOMATICALLY:
+            1. Scans CORTEX_WORK_DIR for deliverable files (.pptx, .ppt, .csv, .png, .jpg, .pdf, .zip)
+            2. For .pptx files specifically: **picks the SINGLE LARGEST file** (assumes most complete)
+            3. Uploads that file to Azure Blob Storage
+            4. Provides URLs to the presenter
+            **CONSEQUENCE**: If your PowerPoint creation:
+            - Fails silently → no .pptx file exists → nothing gets presented
+            - Creates an empty file → small file size → might not be picked OR picked but empty
+            - Crashes before saving → no file → nothing presented
+            **YOUR RESPONSIBILITY**:
+            - ALWAYS validate that prs.slides has content before saving
+            - Print BOTH status AND file size: `print(f"✅ PPTX saved: {{path}} ({{os.path.getsize(path)}} bytes)")`
+            - On error, print explicit error: `print(f"❌ CRITICAL: Failed to create PPTX: {{error}}")`
+            - Never silently fail - ALWAYS log what happened
+            - Test file size > 50000 bytes for any real PowerPoint (empty files are 5-10KB)
+            === COMPLETE PPTX EXAMPLE (COPY-PASTE READY) ===
+            ```python
+            import os
+            import glob
+            from PIL import Image
+            from pptx import Presentation
+            from pptx.util import Inches, Pt
+            # Step 1: Preprocess images
+            work_dir = os.environ['CORTEX_WORK_DIR']
+            supported_formats = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}
+            image_dir = os.path.join(work_dir, 'assets')
+            if os.path.isdir(image_dir):
+                for img_file in glob.glob(os.path.join(image_dir, '*')):
+                    ext = os.path.splitext(img_file)[1].lower().lstrip('.')
+                    if ext not in supported_formats and ext:
+                        try:
+                            with Image.open(img_file) as img:
+                                if img.mode in ('RGBA', 'LA'):
+                                    rgb_img = Image.new('RGB', img.size, (255, 255, 255))
+                                    rgb_img.paste(img, mask=img.split()[-1])
+                                    img = rgb_img
+                                png_path = os.path.splitext(img_file)[0] + '.png'
+                                img.save(png_path, 'PNG')
+                                os.remove(img_file)
+                                print(f"✅ Converted {{os.path.basename(img_file)}} to PNG")
+                        except Exception as e:
+                            print(f"⚠️ Skipping {{img_file}}: {{e}}")
+            # Step 2: Create presentation
+            prs = Presentation()
+            prs.slide_width = Inches(10)
+            prs.slide_height = Inches(7.5)
+            slide_count = 0
+            # Title slide
+            title_slide = prs.slides.add_slide(prs.slide_layouts[0])
+            title_slide.shapes.title.text = "Your Title"
+            title_slide.placeholders[1].text = "Subtitle"
+            slide_count += 1
+            # Content slides with validation
+            image_files = sorted(glob.glob(os.path.join(image_dir, '*.png')))
+            for idx, img_path in enumerate(image_files[:10]):  # Limit to 10 images
+                try:
+                    slide = prs.slides.add_slide(prs.slide_layouts[6])
+                    left = Inches(0.5)
+                    top = Inches(0.5)
+                    height = Inches(6)
+                    pic = slide.shapes.add_picture(img_path, left, top, height=height)
+                    slide_count += 1
+                except Exception as e:
+                    print(f"⚠️ Failed to add image {{idx}}: {{e}}")
+                    continue
+            # Validate before save
+            if slide_count == 0:
+                print("❌ ERROR: No slides were created!")
+                raise ValueError("Empty presentation")
+            # Step 3: Save with validation
+            output_path = os.path.join(work_dir, 'MyPresentation.pptx')
+            prs.save(output_path)
+            # Step 4: Verify
+            if os.path.exists(output_path):
+                size = os.path.getsize(output_path)
+                if size > 50000:
+                    print(f"✅ PPTX saved: {{output_path}} ({{size}} bytes)")
+                    print(f"📁 Ready for upload: {{output_path}}")
+                else:
+                    print(f"❌ ERROR: PPTX too small ({{size}} bytes) - probably empty!")
+            else:
+                print(f"❌ ERROR: PPTX file was not created!")
+            ```
+            **AFTER SAVING FILES**: Immediately mention that file_cloud_uploader_agent should take over:
+            "I have saved the files. Now file_cloud_uploader_agent should upload them to the cloud."
         """,
     )
@@ -122,6 +297,19 @@ async def get_agents(default_model_client, big_model_client, small_model_client,
             **DO NOT provide information about packaging, dependencies, or development workflows.**
             Your output is for a non-technical end-user viewing it in a React app.
             **CRITICAL: ONLY use URLs for any files (images, videos, documents, etc.) that are explicitly provided in the `UPLOADED_FILES_SAS_URLS` or directly within the `RESULT` content from other agents, specifically from the `file_cloud_uploader_agent`. If a valid, real URL is not provided, you MUST NOT include any placeholder, fake, or fabricated URLs. NEVER hallucinate or fabricate any links or content.**
+            === CRITICAL URL VALIDATION (MANDATORY) ===
+            **Before creating your response, STOP and check:**
+            1. Look at `UPLOADED_FILES_SAS_URLS` - is it empty `{}` or does it contain placeholder tokens like 'sas_token', 'skoid', 'sktid'?
+            2. If UPLOADED_FILES_SAS_URLS is empty OR contains only placeholder/fake tokens:
+               **DO NOT ATTEMPT TO CREATE FAKE URLS**
+               Instead, respond: "⚠️ SYSTEM STATUS: Files are still being processed. Please wait for upload completion."
+               Then list what files are expected based on UPLOADED_FILES_LIST
+            3. If UPLOADED_FILES_SAS_URLS contains REAL Azure blob URLs (with format: https://ACCOUNT.blob.core.windows.net/CONTAINER/FILE?sv=...&sig=REAL_TOKEN&se=...):
+               You may proceed to present with those URLs
+            **DO NOT GUESS, INTERPOLATE, OR HALLUCINATE URLs EVER.**
+            **A fake URL is worse than no URL at all.**
         """
     )
@@ -131,6 +319,19 @@ async def get_agents(default_model_client, big_model_client, small_model_client,
         tools=[upload_file_to_cloud_tool],
         description="A helpful assistant that can upload files to the cloud.",
         system_message=f"""You are a helpful assistant that can upload files to the cloud.
+            Your PRIMARY RESPONSIBILITY: After ANY code has been executed or any file-based task is completed,
+            you MUST immediately scan {work_dir} for ALL deliverable files (.pptx, .pdf, .csv, .png, .jpg, .zip, .json, .txt, .md).
+            CRITICAL: Do NOT wait to be asked. Do NOT wait for explicit file references.
+            Simply list all files in {work_dir} and upload EVERY one of them to the cloud.
+            For each file found:
+            1. Upload it using your upload tool
+            2. Report the cloud URL
+            3. Confirm success
+            This ensures that NO files are left behind and the presenter always has access to deliverables.
             Upload referenced files to the cloud.
             Use your tool to upload the files.
             User does not have local access to the files so you must upload them to the cloud and provide the url.

package/helper-apps/cortex-autogen2/main.py CHANGED Viewed

@@ -72,7 +72,7 @@ async def main():
                             decoded_content = base64.b64decode(raw_content).decode('utf-8')
                             task_data = json.loads(decoded_content)
                         except (json.JSONDecodeError, TypeError, ValueError) as e:
-                            logger.warning(f"⚠️ Failed to decode as base64, trying as raw JSON: {e}")
+                            logger.debug(f"Base64 decode failed; falling back to raw JSON: {e}")
                             try:
                                 task_data = json.loads(raw_content)
                             except json.JSONDecodeError as e2:

package/helper-apps/cortex-autogen2/pyproject.toml CHANGED Viewed

@@ -23,6 +23,8 @@ openpyxl          = "^3.1.2"
 xlrd              = "^2.0.1"
 python-pptx       = "^0.6.23"
 odfpy             = "^1.4.1"
+pdf2image         = "^1.16.3"
+python-pptx-interface = "^0.0.14"
 # Infra / networking / AI you already had
 autogen-agentchat = { extras = ["openai"], version = "^0.7.4" }
@@ -41,11 +43,21 @@ playwright        = "^1.54.0"
 markitdown        = "^0.1.2"
 aiofiles          = "^24.1.0"
 aiohttp           = "^3.12.14"
+numpy             = "^1.26.0"
 pandas            = "^2.3.1"
+polars            = "^1.8.2"
+pyarrow           = "^16.1.0"
 matplotlib        = "^3.10.3"
+plotly            = "^5.24.1"
+kaleido           = "^0.2.1"
 lxml              = "^5.3.0"
 reportlab         = "^4.2.5"
 fpdf2             = "^2.7.9"
+wordcloud         = "^1.9.3"
+arabic-reshaper   = "^3.0.0"
+python-bidi       = "^0.4.2"
+nltk              = "^3.9.1"
+spacy             = "^3.8.4"
 # ─────────────────────────────────────────────────
 # Optional groups (install with: poetry install --with media,archives,science …)

package/helper-apps/cortex-autogen2/requirements.txt CHANGED Viewed

@@ -11,6 +11,8 @@ openpyxl>=3.1.2
 xlrd>=2.0.1
 python-pptx>=0.6.23
 odfpy>=1.4.1
+pdf2image>=1.16.3
+python-pptx-interface>=0.0.14
 # -------------------------------------------------------------------
 # MEDIA – images / audio / video
@@ -35,11 +37,23 @@ rarfile>=4.0
 # DATA & SCIENTIFIC
 # -------------------------------------------------------------------
 pandas>=2.2.0
+numpy>=1.26.0
+polars>=1.8.2
+pyarrow>=16.1.0
 tabula-py>=2.8.1
 h5py>=3.10.0
 netCDF4>=1.6.5
 ezdxf>=1.1.1
 seaborn>=0.13.0
+plotly>=5.24.1
+kaleido>=0.2.1
+wordcloud>=1.9.3
+# -------------------------------------------------------------------
+# NLP & TEXT PROCESSING
+# -------------------------------------------------------------------
+nltk>=3.9.1
+spacy>=3.8.4
 # -------------------------------------------------------------------
 # ARABIC & RTL TEXT SUPPORT

package/helper-apps/cortex-autogen2/services/redis_publisher.py CHANGED Viewed

@@ -75,7 +75,7 @@ def publish_request_progress(data: Dict[str, Any]) -> bool:
                     pct_bucket = None
                 if not prev or prev.get('info') != info or prev.get('pct_bucket') != pct_bucket:
                     _last_logged_progress[rid] = {'info': info, 'pct_bucket': pct_bucket}
-                    logger.info(f"Published progress update for request {rid}: progress={pct}, subscribers={result}")
+                    logger.info(f"Published progress update for request {rid}: info='{info}', progress={pct}, subscribers={result}")
             except Exception:
                 # Safe fallback if logging diff fails
                 logger.debug("Progress publish logged without diff due to exception")

package/helper-apps/cortex-autogen2/services/run_analyzer.py CHANGED Viewed

@@ -426,7 +426,7 @@ def build_run_document(
     doc = {
         "id": task_id,
         "date": now_iso,
-        "task": _truncate(redact(task_text), 4000),
+        "task": redact(task_text),
         "content": content_blob,
         "requestId": task_id,
     }