npm - @aj-archipelago/cortex - Versions diffs - 1.4.0 → 1.4.2 - Mend

@aj-archipelago/cortex 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/config.js +25 -4
package/helper-apps/cortex-autogen2/agents.py +19 -6
package/helper-apps/cortex-autogen2/services/azure_ai_search.py +115 -0
package/helper-apps/cortex-autogen2/services/run_analyzer.py +594 -0
package/helper-apps/cortex-autogen2/task_processor.py +98 -2
package/lib/crypto.js +1 -0
package/lib/entityConstants.js +12 -35
package/lib/keyValueStorageClient.js +53 -1
package/lib/util.js +33 -6
package/package.json +2 -1
package/pathways/system/entity/memory/sys_memory_manager.js +1 -0
package/pathways/system/entity/memory/sys_memory_process.js +4 -3
package/pathways/system/entity/memory/sys_memory_update.js +4 -3
package/pathways/system/entity/memory/sys_read_memory.js +12 -4
package/pathways/system/entity/memory/sys_save_memory.js +16 -9
package/pathways/system/entity/memory/sys_search_memory.js +5 -4
package/pathways/system/entity/sys_entity_agent.js +2 -1
package/pathways/system/entity/tools/sys_tool_bing_search.js +2 -2
package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +1 -2
package/pathways/system/entity/tools/sys_tool_callmodel.js +2 -1
package/pathways/system/entity/tools/sys_tool_coding.js +1 -2
package/pathways/system/entity/tools/sys_tool_grok_x_search.js +1 -1
package/pathways/system/entity/tools/sys_tool_image.js +2 -1
package/pathways/system/entity/tools/sys_tool_image_gemini.js +3 -3
package/pathways/system/entity/tools/sys_tool_mermaid.js +187 -38
package/pathways/system/entity/tools/sys_tool_reasoning.js +2 -0
package/pathways/system/entity/tools/sys_tool_verify.js +1 -1
package/pathways/transcribe_gemini.js +3 -2
package/server/graphql.js +1 -1
package/server/pathwayResolver.js +8 -7
package/server/plugins/veoVideoPlugin.js +29 -1
package/testrun.log +35371 -0
package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +1 -3
package/tests/unit/core/crypto.test.js +65 -0
package/tests/unit/core/doubleEncryptionStorageClient.test.js +262 -0

package/helper-apps/cortex-autogen2/services/run_analyzer.py ADDED Viewed

@@ -0,0 +1,594 @@
+import re
+import os
+import json
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+from autogen_core.models import UserMessage
+MASK = "***"
+DEFAULT_KEY_PHRASE = "Validate environment and paths early"
+def _truncate(text: Optional[str], limit: int = 2000) -> str:
+    if not text:
+        return ""
+    t = str(text)
+    if len(t) <= limit:
+        return t
+    return t[:limit] + "\n... [truncated]"
+def redact(text: Optional[str]) -> str:
+    """
+    Redact tokens, SAS query params, and bearer headers from the given text.
+    Keeps general readability while removing secrets.
+    """
+    if not text:
+        return ""
+    s = str(text)
+    # Mask Authorization Bearer tokens
+    s = re.sub(r"(?i)(authorization\s*:\s*bearer\s+)([^\s]+)", rf"\1{MASK}", s)
+    # Mask common SAS params 'sig' and 'se'
+    s = re.sub(r"(?i)([?&]sig=)([^&\s]+)", rf"\1{MASK}", s)
+    s = re.sub(r"(?i)([?&]se=)([^&\s]+)", rf"\1{MASK}", s)
+    # Collapse long base64-like runs
+    s = re.sub(r"([A-Za-z0-9+/]{64,}={0,2})", MASK, s)
+    return s
+def collect_run_metrics(messages: List[Any]) -> Dict[str, Any]:
+    turns = len(messages or [])
+    tool_calls = 0
+    error_mentions = 0
+    schema_err_mentions = 0
+    for idx, m in enumerate(messages or []):
+        try:
+            mtype = getattr(m, "type", None)
+            content = getattr(m, "content", None)
+            text = str(content) if content is not None else ""
+            if mtype == "ToolCallExecutionEvent":
+                tool_calls += 1
+            low = text.lower()
+            if any(x in low for x in ("error", "exception", "traceback", "task not completed")):
+                error_mentions += 1
+            if ("tool_calls" in text) and ("MultiMessage" in text):
+                schema_err_mentions += 1
+        except Exception:
+            continue
+    return {
+        "turnCount": turns,
+        "toolCallCount": tool_calls,
+        "errorMentions": error_mentions,
+        "schemaErrorMentions": schema_err_mentions,
+    }
+def extract_errors(messages: List[Any]) -> List[Dict[str, Any]]:
+    """
+    Extract concrete error signals from the stream with strict filtering to avoid advice-like lines.
+    Includes:
+    - Tool execution errors (ToolCallExecutionEvent items flagged is_error)
+    - Python/stack traces and typical 'Error:'/'Exception:' lines
+    - Explicit 'TASK NOT COMPLETED:' markers
+    Excludes:
+    - Advice like 'include error handling', 'no errors', etc.
+    """
+    def classify_error_line(line: str) -> Optional[str]:
+        s = (line or "").strip()
+        if not s:
+            return None
+        low = s.lower()
+        # Exclusions: advisory phrases
+        if any(p in low for p in ["error handling", "handle errors", "no errors", "without errors", "few errors", "low error"]):
+            return None
+        # Strong markers
+        if "task not completed" in low:
+            return "termination"
+        if "traceback (most recent call last)" in low:
+            return "traceback"
+        # Common error prefixes or exception class names
+        if re.search(r"^(\s*(error|exception)\b|[A-Za-z]+Error:|[A-Za-z]+Exception:)", s, re.IGNORECASE):
+            return "runtime"
+        # Generic lines that merely include the word 'error' are ignored
+        return None
+    results: List[Dict[str, Any]] = []
+    seen: set = set()
+    for idx, m in enumerate(messages or []):
+        try:
+            src = getattr(m, "source", None)
+            created_at = getattr(m, "created_at", None)
+            # 1) Tool execution errors with is_error flag
+            mtype = getattr(m, "type", None)
+            if mtype == "ToolCallExecutionEvent" and hasattr(m, "content") and isinstance(getattr(m, "content"), list):
+                try:
+                    for res in getattr(m, "content"):
+                        try:
+                            if hasattr(res, "is_error") and getattr(res, "is_error"):
+                                msg = _truncate(redact(str(getattr(res, "content", "") or "")), 512)
+                                low = msg.lower()
+                                key = (low[:120], src)
+                                if key in seen:
+                                    continue
+                                seen.add(key)
+                                results.append({
+                                    "type": "tool_error",
+                                    "message": msg,
+                                    "source": src or "unknown",
+                                    "firstSeenIndex": idx,
+                                    "createdAt": str(created_at) if created_at else None,
+                                })
+                        except Exception:
+                            continue
+                except Exception:
+                    pass
+            # 2) Parse textual lines for strong error markers
+            content = getattr(m, "content", None)
+            text = str(content) if content is not None else ""
+            for line in (text.splitlines() if text else []):
+                kind = classify_error_line(line)
+                if not kind:
+                    continue
+                msg = _truncate(redact(line.strip()), 512)
+                low = msg.lower()
+                key = (low[:120], src)
+                if key in seen:
+                    continue
+                seen.add(key)
+                # Capture specific dependency/image/source hints for reuse
+                entry = {
+                    "type": kind,
+                    "message": msg,
+                    "source": src or "unknown",
+                    "firstSeenIndex": idx,
+                    "createdAt": str(created_at) if created_at else None,
+                }
+                # Categories intentionally not assigned; rely on LLM to infer lessons
+                results.append(entry)
+        except Exception:
+            continue
+    return results
+async def summarize_learnings(messages_text: str, errors_text: str, model_client) -> Tuple[str, str]:
+    """
+    Return (best_practices_text, antipatterns_text) using the LLM; avoid static heuristics.
+    """
+    try:
+        prompt = f"""
+You are a senior reliability engineer extracting high-value, reusable lessons from an agent transcript.
+Task: Produce two sections with concise bullets (≤18 words each):
+1) BEST PRACTICES (5–10 bullets): concrete, repeatable actions that prevent failures and speed future runs
+2) ANTIPATTERNS (5 bullets): mistakes to avoid
+Rules:
+- No secrets or environment-specific values
+- Prefer actionable checks (dependency preflight, schema validation), robust fallbacks, and proven fast paths
+- Reflect image acquisition pitfalls (network blocks, non-image payloads, licensing) if present
+TRANSCRIPT (redacted):
+{_truncate(messages_text, 6000)}
+ERROR EXCERPTS:
+{_truncate(errors_text, 2000)}
+Output format exactly:
+BEST PRACTICES:
+- ...
+- ...
+ANTIPATTERNS:
+- ...
+- ...
+"""
+        msgs = [UserMessage(content=prompt, source="run_analyzer_summarize")]
+        resp = await model_client.create(messages=msgs)
+        text = (resp.content or "").strip()
+        best = []
+        anti = []
+        section = None
+        for line in text.splitlines():
+            t = line.strip()
+            if not t:
+                continue
+            u = t.upper()
+            if u.startswith("BEST PRACTICES"):
+                section = "best"
+                continue
+            if u.startswith("ANTIPATTERNS"):
+                section = "anti"
+                continue
+            if t.startswith("-"):
+                if section == "best":
+                    best.append(t)
+                elif section == "anti":
+                    anti.append(t)
+        return ("\n".join(best[:10]), "\n".join(anti[:10]))
+    except Exception:
+        return ("", "")
+async def generate_improvement_playbook(
+    messages_text: str,
+    errors: List[Dict[str, Any]],
+    metrics: Dict[str, Any],
+    external_sources: Optional[List[str]],
+    model_client,
+) -> Dict[str, Any]:
+    """
+    Ask the LLM to produce a compact, high-signal "Improvements Playbook" for future runs.
+    Returns dict: { text: str, actionables: int, improvement_score: int, has_failures: bool }
+    """
+    try:
+        if not model_client:
+            return {
+                "text": (
+                    "IMPROVEMENTS PLAYBOOK\n\n"
+                    "Key Failures & Fixes:\n- None observed.\n\n"
+                    "Proven Source Patterns:\n- Prefer authoritative sites; record source URLs explicitly.\n\n"
+                    "Effective Patterns:\n- Validate required data structures before main logic.\n- Use absolute paths; print/log key steps.\n\n"
+                    "Reliability:\n- Retry downloads 3x with backoff; cache downloaded assets.\n\n"
+                    "Guardrails:\n- Terminate gracefully on missing dependencies and report clear remediation.\n\n"
+                    "Next-Time Plan Outline:\n1) Verify dependencies\n2) Validate inputs\n3) Fetch assets with retries\n4) Assemble\n5) Upload & present\n"
+                ),
+                "actionables": 6,
+                "improvement_score": 60,
+                "has_failures": bool(errors),
+            }
+        err_lines = []
+        for e in (errors or [])[:30]:
+            try:
+                err_lines.append(f"- [{e.get('source','?')}] {e.get('message','')}")
+            except Exception:
+                continue
+        err_block = "\n".join(err_lines)
+        src_block = "\n".join([s for s in (external_sources or []) if isinstance(s, str)])
+        prompt = f"""
+You are optimizing a multi-agent system. Create a compact, high-signal Improvements Playbook strictly for future runs.
+GOAL: Document only reusable improvements and concrete fixes that will materially improve similar tasks next time. Avoid generic advice.
+INPUT METRICS (json):
+{json.dumps(metrics, indent=2)}
+CONCRETE FAILURES:
+{_truncate(err_block, 1800)}
+KNOWN EXTERNAL SOURCES (non-blob):
+{_truncate(src_block, 1200)}
+CONVERSATION EXCERPTS (redacted):
+{_truncate(messages_text, 6000)}
+OUTPUT FORMAT (exact headings, concise bullets ≤18 words each):
+IMPROVEMENTS PLAYBOOK
+Key Failures & Fixes:
+- ...
+Proven Source Patterns:
+- ...
+Effective Tool/Code Patterns:
+- ...
+Reliability (retries, rate-limit, caching):
+- ...
+Guardrails & Preconditions:
+- ...
+Next-Time Plan Outline:
+1) ...
+2) ...
+3) ...
+Image Acquisition Failure Taxonomy (when URLs valid but not usable):
+- Network/HTTP: 403/404/429, timeouts, SSL/captcha blocks
+- Format/Integrity: Content-Type mismatch, non-image payload, Pillow .verify() fails
+- License/Robots: disallowed scraping or reuse; fallback and record reason
+- Mitigations: HEAD check; user-agent; backoff; alternate domain; sprite/fallback pack; manifest notes
+IMPROVEMENT SCORE: <0-100>
+ACTIONABLES: <integer count of distinct concrete actions>
+"""
+        msgs = [UserMessage(content=prompt, source="run_analyzer_improvements")]
+        resp = await model_client.create(messages=msgs)
+        text = (resp.content or "").strip()
+        # Parse score and actionables
+        score = 0
+        actionables = 0
+        try:
+            m = re.search(r"IMPROVEMENT\s*SCORE\s*:\s*(\d{1,3})", text, re.IGNORECASE)
+            if m:
+                score = max(0, min(100, int(m.group(1))))
+        except Exception:
+            pass
+        try:
+            m2 = re.search(r"ACTIONABLES\s*:\s*(\d+)", text, re.IGNORECASE)
+            if m2:
+                actionables = max(0, int(m2.group(1)))
+        except Exception:
+            # Fallback: count bullets
+            actionables = _count_bullets(text)
+        # Build structured quick-reference hints for planner reuse next time
+        hints: List[str] = []
+        tlow = text.lower()
+        if any(k in tlow for k in ["no module named", "importerror", "cannot import"]):
+            hints.append("Preflight: import python-pptx, Pillow; pip install if import fails")
+        if any(k in tlow for k in ["categorychartdata", "radarchart", "bar chart data"]):
+            hints.append("Use python-pptx CategoryChartData; avoid RadarChartData (unsupported)")
+        if any(k in tlow for k in ["antialias", "resampling.lanczos"]):
+            hints.append("Pillow: use Image.Resampling.LANCZOS instead of ANTIALIAS")
+        if any(k in tlow for k in ["cannot identify image file", "head request", "license"]):
+            hints.append("Validate image URLs via HEAD; Pillow .verify(); ensure license before embed")
+        return {
+            "text": text,
+            "actionables": actionables if actionables > 0 else _count_bullets(text),
+            "improvement_score": score,
+            "has_failures": bool(errors),
+            "hints": hints,
+        }
+    except Exception:
+        return {
+            "text": "IMPROVEMENTS PLAYBOOK\n\nKey Failures & Fixes:\n- None parsed due to summarizer error.",
+            "actionables": 0,
+            "improvement_score": 0,
+            "has_failures": bool(errors),
+        }
+def build_run_document(
+    task_id: str,
+    task_text: str,
+    owner: Optional[str],
+    models: Optional[Dict[str, Any]],
+    assets: Optional[Dict[str, Any]],
+    metrics: Dict[str, Any],
+    errors: List[Dict[str, Any]],
+    improvement_text: str,
+    final_snippet: str,
+) -> Dict[str, Any]:
+    now_iso = datetime.utcnow().isoformat() + "Z"
+    # Build sectioned content text (single string field for index)
+    parts: List[str] = []
+    parts.append("Metrics:\n" + _truncate(json.dumps(metrics, indent=2), 1200))
+    if errors:
+        err_lines = []
+        for e in errors[:20]:
+            try:
+                err_lines.append(f"- [{e.get('source','?')}] {e.get('message','')}")
+            except Exception:
+                continue
+        parts.append("Errors:\n" + "\n".join(err_lines))
+    if improvement_text:
+        parts.append("Improvements Playbook:\n" + improvement_text)
+        # Extract high-signal tags for future retrieval and reuse
+        try:
+            tags: List[str] = []
+            tlow = improvement_text.lower()
+            if any(k in tlow for k in ["no module named", "importerror", "cannot import"]):
+                tags.append("dependency")
+            if any(k in tlow for k in ["image", "png", "jpg", "jpeg", "webp", "cannot identify image file", "head request", "license"]):
+                tags.append("image")
+            if any(k in tlow for k in ["pptx", "categorychartdata", "radarchart", "antialias", "resampling.lanczos"]):
+                tags.append("pptx_api")
+            if tags:
+                parts.append("Tags:\n" + ", ".join(sorted(set(tags))))
+        except Exception:
+            pass
+    if final_snippet:
+        parts.append("Final Output Snippet:\n" + _truncate(redact(final_snippet), 2000))
+    # Include external source URLs (not SAS) for provenance; exclude Azure blob SAS links
+    try:
+        if assets and isinstance(assets, dict):
+            raw_sources = []
+            try:
+                raw_sources.extend(list(assets.get("external_media_urls") or []))
+            except Exception:
+                pass
+            def _is_azure_blob(url: str) -> bool:
+                try:
+                    return "blob.core.windows.net" in (url or "").lower()
+                except Exception:
+                    return False
+            srcs = []
+            seen = set()
+            for u in raw_sources:
+                if not isinstance(u, str):
+                    continue
+                if _is_azure_blob(u):
+                    continue
+                if u in seen:
+                    continue
+                seen.add(u)
+                srcs.append(u)
+            if srcs:
+                parts.append("Sources:\n" + _truncate("\n".join(srcs[:24]), 2000))
+    except Exception:
+        pass
+    content_blob = "\n\n".join(parts)
+    doc = {
+        "id": task_id,
+        "date": now_iso,
+        "task": _truncate(redact(task_text), 4000),
+        "content": content_blob,
+        "requestId": task_id,
+    }
+    # Include owner only if provided
+    if owner:
+        doc["owner"] = owner
+    return doc
+async def summarize_prior_learnings(similar_docs: List[Dict[str, Any]], model_client) -> str:
+    """
+    Build <=8 fast-path directives from prior docs aimed at minimizing steps next time.
+    Preference order: Improvements Playbook sections; fallback to Best Practices/Antipatterns.
+    Output: short bullets (≤18 words) that can be directly embedded into planning as constraints.
+    """
+    # Extract bullets from prior content
+    bullets: List[str] = []
+    for d in similar_docs or []:
+        try:
+            content = str(d.get("content") or "")
+            if not content:
+                continue
+            # Prefer fast-path sections from Improvements Playbook
+            sections = [
+                "Key Failures & Fixes:",
+                "Effective Tool/Code Patterns:",
+                "Reliability (retries, rate-limit, caching):",
+                "Guardrails & Preconditions:",
+                "Next-Time Plan Outline:",
+            ]
+            extracted = False
+            for section in sections:
+                idx = content.find(section)
+                if idx >= 0:
+                    seg = content[idx:].split("\n\n", 1)[0]
+                    for line in seg.splitlines()[1:]:
+                        t = line.strip()
+                        if not t:
+                            continue
+                        if t[0].isdigit() and (t[1:2] == ")" or t[1:2] == "."):
+                            t = "- " + t
+                        if t.startswith("-") and len(t) > 2:
+                            bullets.append(t)
+                            extracted = True
+            if not extracted:
+                # Fallback: Best Practices / Antipatterns
+                for section in ("Best Practices:", "Antipatterns:"):
+                    idx = content.find(section)
+                    if idx >= 0:
+                        seg = content[idx:].split("\n\n", 1)[0]
+                        for line in seg.splitlines()[1:]:
+                            t = line.strip()
+                            if t.startswith("-") and len(t) > 2:
+                                bullets.append(t)
+        except Exception:
+            continue
+    # Fallback: take first lines of content
+    if not bullets:
+        for d in similar_docs or []:
+            try:
+                for line in str(d.get("content") or "").splitlines():
+                    t = line.strip()
+                    if t.startswith("-") and len(t) > 2:
+                        bullets.append(t)
+                        if len(bullets) >= 12:
+                            break
+                if len(bullets) >= 12:
+                    break
+            except Exception:
+                continue
+    # Summarize into ≤8 concise, step-minimizing directives
+    if model_client and bullets:
+        try:
+            prompt = f"""
+Condense these prior lessons into 5-8 FAST-PATH DIRECTIVES (≤18 words each) to minimize steps next time.
+Focus on dependency preflight, known API substitutions, asset/download validation, and deliverables verification.
+Avoid secrets and environment-specific details.
+LESSONS:
+{chr(10).join(bullets[:40])}
+Output bullets only (no headings):
+- ...
+- ...
+"""
+            msgs = [UserMessage(content=prompt, source="run_analyzer_prior")]
+            resp = await model_client.create(messages=msgs)
+            text = (resp.content or "").strip()
+            # Keep only bullet lines
+            out_lines = [ln for ln in text.splitlines() if ln.strip().startswith("-")]
+            if out_lines:
+                return "\n".join(out_lines[:8])
+        except Exception:
+            pass
+    # No model or failure: return first up to 8 bullets
+    uniq = []
+    seen = set()
+    for b in bullets:
+        if b not in seen:
+            uniq.append(b)
+            seen.add(b)
+        if len(uniq) >= 8:
+            break
+    if not uniq:
+        uniq = [
+            "- Validate environment and paths early",
+            "- Log outputs and errors concisely",
+            "- Use absolute paths and avoid placeholders",
+            "- Avoid repeating failed steps",
+            "- Upload deliverables once, then reference URLs",
+        ]
+    return "\n".join(uniq)
+def _count_bullets(text: Optional[str]) -> int:
+    try:
+        return sum(1 for ln in (text or "").splitlines() if ln.strip().startswith("-"))
+    except Exception:
+        return 0
+def should_index_run(metrics: Dict[str, Any], errors: List[Dict[str, Any]], best_practices_text: str, antipatterns_text: str, assets: Optional[Dict[str, Any]] = None) -> bool:
+    """
+    Decide whether to index the run based on signal heuristics:
+    - Index if we observed any errors or schema issues
+    - Index if tools/assets were used (useful operational trace)
+    - Index if there is substantial learnings content (>=7 bullets) and conversation had enough depth
+    - Otherwise skip to avoid noise
+    """
+    try:
+        # Any explicit errors/schema problems → index
+        if errors and len(errors) > 0:
+            return True
+        if int(metrics.get("schemaErrorMentions") or 0) > 0:
+            return True
+        # Tools used or assets produced are valuable to index
+        if int(metrics.get("toolCallCount") or 0) > 0:
+            return True
+        if assets:
+            try:
+                up_count = len(assets.get("uploaded_file_urls") or {})
+                media_count = len(assets.get("external_media_urls") or [])
+                if (up_count + media_count) > 0:
+                    return True
+            except Exception:
+                pass
+        # Content-only heuristic: require many actionable bullets and sufficient turns
+        total_bullets = _count_bullets(best_practices_text) + _count_bullets(antipatterns_text)
+        turns = int(metrics.get("turnCount") or 0)
+        if total_bullets >= 7 and turns >= 12:
+            text_combined = f"{best_practices_text}\n{antipatterns_text}".lower()
+            looks_generic = DEFAULT_KEY_PHRASE.lower() in text_combined
+            return not looks_generic
+        return False
+    except Exception:
+        # On analyzer failure, be conservative and skip
+        return False