npm - nexo-brain - Versions diffs - 5.1.1 → 5.2.1 - Mend

nexo-brain 5.1.1 → 5.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +8 -0
package/package.json +1 -1
package/src/plugins/cortex.py +102 -2
package/src/plugins/protocol.py +128 -2
package/src/scripts/deep-sleep/apply_findings.py +2 -1

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "5.1.1",
+  "version": "5.2.1",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -87,6 +87,14 @@ Versions `3.1.7` through `3.2.0` close the recent-memory gap:
 - when even that misses, NEXO now exposes raw transcript fallback tools for Claude Code and Codex session stores
 - NEXO can now inspect itself through a live system catalog derived from canonical sources instead of relying only on stale docs or operator memory
+Version `5.2.0` closes two focused gaps in the Cortex layer that were left open by the v5.1 audit — the high-stakes response-contract detector was English-only, and the `nexo-cortex-cycle` cron was writing a quality snapshot that no reader ever consumed:
+- `HIGH_STAKES_KEYWORDS_ES` adds ~45 Spanish keywords to the high-stakes detector with accented and unaccented variants, so a goal written in Spanish (`migrar la base de datos de producción`) trips the same gate as its English twin.
+- `NEGATION_PATTERNS` suppresses false positives when the user explicitly disclaims touching the sensitive area (`sin afectar producción`, `no tocar prod`, `without touching production`, `don't modify`). The raw keyword being present is no longer enough to flag the task.
+- `evaluate_response_confidence` accepts two new optional kwargs, `pre_action_context_hits` (+up to 10) and `area_has_atlas_entry` (+5), so the score can finally reward tasks that loaded real context instead of only punishing unprepared ones. Both signals are capped and cannot override a real risk penalty.
+- A monotonic numeric safeguard layers on top of the boolean decision tree: `answer` downgrades to `verify` when `final_score < 50`, and `verify` downgrades to `defer` when `high_stakes` and `final_score < 30`. The safeguard can only make response discipline stricter, never looser.
+- `handle_cortex_quality` in `src/plugins/cortex.py` now reads `$NEXO_HOME/operations/cortex-quality-latest.json` when the requested window (7 or 1 days) is fresh (<6h 30m) and the schema matches — silent fallback to the live SQL computation on any failure. The handler's JSON response now includes `"source": "cache" | "live"` for observability.
 Version `5.1.0` lands the full NEXO-AUDIT-2026-04-11 roadmap as a single minor bump — every open evolution / adaptive / cognitive / skills loop now closes under itself, the knowledge graph exports cleanly, OpenTelemetry spans can be turned on without a hard dependency, and every PR has to clear lint, security, coverage, and release-readiness gates before it can merge:
 - Evolution cycle now auto-applies user-approved proposals on the next run (backed by the new idempotent migration `m38`), adaptive learned-weight rollbacks surface as visible followups, outcome patterns auto-promote to draft skills, and a Voyager-style detector exposes co-occurring skill pairs as composite-skill candidates via `nexo_skill_compose_candidates`.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "5.1.1",
+  "version": "5.2.1",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/plugins/cortex.py CHANGED Viewed

@@ -15,9 +15,12 @@ v0.1: Single MCP tool + middleware validation.
 """
 import json
+import os
 import re
 import secrets
 import time
+from datetime import datetime, timedelta
+from pathlib import Path
 def _get_db():
@@ -894,6 +897,27 @@ def handle_cortex_decide(
         task_id=task_id,
     )
+    # Auto-create outcome when none exists, so cortex decisions
+    # get verified by outcome-checker and close the feedback loop.
+    if resolved_outcome_id is None and clean_goal and task_id:
+        try:
+            from db import create_outcome
+            _deadline = (datetime.now() + timedelta(days=7)).strftime("%Y-%m-%d")
+            _outcome = create_outcome(
+                action_type="cortex_decision",
+                description=f"Cortex decision: {clean_goal[:120]}",
+                expected_result=f"Recommended '{scored[0]['name']}' succeeds",
+                metric_source="decision_outcome",
+                action_id=task_id,
+                session_id=session_id,
+                deadline=_deadline,
+            )
+            if isinstance(_outcome, dict) and _outcome.get("id"):
+                resolved_outcome_id = int(_outcome["id"])
+        except Exception:
+            pass  # non-critical: decision still records without outcome
     try:
         from db import create_cortex_evaluation
@@ -1003,12 +1027,88 @@ def handle_cortex_override(evaluation_id: int, chosen: str, reason: str) -> str:
     return json.dumps({"ok": True, "evaluation": updated}, ensure_ascii=False, indent=2)
+# v5.2.0: Cortex quality cache reader. The `nexo-cortex-cycle` cron
+# (src/scripts/nexo-cortex-cycle.py) writes a fresh quality snapshot to
+# $NEXO_HOME/operations/cortex-quality-latest.json every 6h. Until this
+# release the reader was missing — the snapshot was write-only and every
+# call to `nexo_cortex_quality` re-ran the SQL summary. Now the handler
+# reads the cache first for the 7d / 1d windows and falls back silently
+# to the live computation on any failure.
+_CORTEX_QUALITY_CACHE_PATH = (
+    Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
+    / "operations"
+    / "cortex-quality-latest.json"
+)
+# 6h cron + 30 min slack so a slightly-late run still serves cache.
+_CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS = 23400
+_CORTEX_QUALITY_CACHE_WINDOWS = {1: "window_1d", 7: "window_7d"}
+_CORTEX_QUALITY_CACHE_SCHEMA = 1
+def _load_cortex_quality_cache(days: int) -> dict | None:
+    """Return cached summary dict for the requested window, or None if unusable.
+    Silent on any failure so the live path always wins on a corrupt cache.
+    Respects the snapshot schema written by `_persist_quality_snapshot`
+    in src/scripts/nexo-cortex-cycle.py — do NOT change the layout here
+    without updating the writer in the same release.
+    """
+    window_key = _CORTEX_QUALITY_CACHE_WINDOWS.get(days)
+    if window_key is None:
+        return None
+    try:
+        if not _CORTEX_QUALITY_CACHE_PATH.is_file():
+            return None
+        payload = json.loads(
+            _CORTEX_QUALITY_CACHE_PATH.read_text(encoding="utf-8")
+        )
+    except Exception:
+        return None
+    if not isinstance(payload, dict):
+        return None
+    if payload.get("schema") != _CORTEX_QUALITY_CACHE_SCHEMA:
+        return None
+    captured_at = payload.get("captured_at") or ""
+    if not isinstance(captured_at, str):
+        return None
+    try:
+        captured = datetime.fromisoformat(captured_at)
+    except Exception:
+        return None
+    age = time.time() - captured.timestamp()
+    if age < 0 or age > _CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS:
+        return None
+    window = payload.get(window_key)
+    if not isinstance(window, dict):
+        return None
+    return window
 def handle_cortex_quality(days: int = 30) -> str:
-    """Summarise recommendation quality, overrides, and linked outcome results."""
+    """Summarise recommendation quality, overrides, and linked outcome results.
+    v5.2.0: Serves the snapshot written by `nexo-cortex-cycle` when the
+    requested window is 7 or 1 days and the snapshot is fresh
+    (< 6h30m old, schema == 1). Falls back silently to a live SQL
+    summary on any failure, so the caller always gets a valid response.
+    The returned JSON includes `"source": "cache" | "live"` so the
+    path taken is observable from the outside.
+    """
     from db import cortex_evaluation_summary
+    cached = _load_cortex_quality_cache(days)
+    if cached is not None:
+        return json.dumps(
+            {"ok": True, "summary": cached, "source": "cache"},
+            ensure_ascii=False,
+            indent=2,
+        )
     summary = cortex_evaluation_summary(days=days)
-    return json.dumps({"ok": True, "summary": summary}, ensure_ascii=False, indent=2)
+    return json.dumps(
+        {"ok": True, "summary": summary, "source": "live"},
+        ensure_ascii=False,
+        indent=2,
+    )
 TOOLS = [

package/src/plugins/protocol.py CHANGED Viewed

@@ -64,6 +64,74 @@ HIGH_STAKES_KEYWORDS = {
     "revenue",
     "cost",
 }
+# v5.2.0: Spanish high-stakes keywords. Parity with the English set so a
+# goal written in Spanish ("migrar producción a nuevo servidor") trips
+# the same high-stakes gate as its English twin. Accented and unaccented
+# variants are both listed because user prompts mix both freely.
+HIGH_STAKES_KEYWORDS_ES = {
+    "crítico",
+    "critico",
+    "crítica",
+    "critica",
+    "producción",
+    "produccion",
+    "cliente",
+    "clientes",
+    "despliegue",
+    "desplegar",
+    "pago",
+    "pagos",
+    "facturación",
+    "facturacion",
+    "factura",
+    "credencial",
+    "credenciales",
+    "contraseña",
+    "seguridad",
+    "legal",
+    "médico",
+    "medico",
+    "financiero",
+    "financiera",
+    "privacidad",
+    "marca",
+    "reputación",
+    "reputacion",
+    "ingresos",
+    "borrar",
+    "eliminar",
+    "migración",
+    "migracion",
+    "migrar",
+    "lanzamiento",
+    "lanzar",
+    "precio",
+    "precios",
+    "reembolso",
+    "público",
+    "publico",
+    "riesgo",
+    "riesgos",
+    "coste",
+    "costes",
+    "ventas",
+    "pedido",
+    "pedidos",
+}
+# v5.2.0: Negation patterns that should SUPPRESS the high-stakes flag.
+# Without this, a user message like "sin afectar producción" or
+# "no tocar prod" triggers a false positive just because the keyword
+# is physically present. Bilingual and conservative on purpose.
+NEGATION_PATTERNS = (
+    re.compile(r"\bno\s+tocar\s+prod(?:ucci[oó]n|uccion)?\b", re.IGNORECASE),
+    re.compile(r"\bsin\s+(?:tocar|afectar|romper|modificar)\b", re.IGNORECASE),
+    re.compile(r"\bnunca\s+(?:borrar|eliminar|tocar)\b", re.IGNORECASE),
+    re.compile(r"\bno\s+(?:borrar|eliminar|tocar|modificar)\b", re.IGNORECASE),
+    re.compile(r"\bevitar\s+(?:borrar|eliminar|tocar|romper)\b", re.IGNORECASE),
+    re.compile(r"\bavoid\s+(?:deleting|touching|breaking|modifying)\b", re.IGNORECASE),
+    re.compile(r"\bdon'?t\s+(?:touch|break|modify|delete)\b", re.IGNORECASE),
+    re.compile(r"\bwithout\s+(?:touching|breaking|affecting)\b", re.IGNORECASE),
+)
 def _parse_list(value) -> list[str]:
@@ -104,9 +172,32 @@ def _parse_int_list(value) -> list[int]:
     return parsed
+def _has_negation_context(text: str) -> bool:
+    """Return True when the text explicitly disclaims touching the sensitive area.
+    Used to suppress high-stakes false positives where the user is stating
+    the *boundary* of safe work ("without touching production") rather than
+    the *target* of a risky action ("migrate production").
+    """
+    if not text:
+        return False
+    return any(pattern.search(text) for pattern in NEGATION_PATTERNS)
 def _detect_high_stakes(*parts: str) -> bool:
     combined = " ".join((part or "").strip().lower() for part in parts if part)
-    return any(keyword in combined for keyword in HIGH_STAKES_KEYWORDS)
+    if not combined:
+        return False
+    # Negation override: "sin afectar producción" / "don't touch prod" / etc.
+    # Explicit disclaimers suppress the flag even if a high-stakes keyword
+    # is physically present, otherwise boundary statements get miscategorised
+    # as action targets.
+    if _has_negation_context(combined):
+        return False
+    return any(
+        keyword in combined
+        for keyword in HIGH_STAKES_KEYWORDS | HIGH_STAKES_KEYWORDS_ES
+    )
 def _decision_support_required(*, task_type: str, high_stakes: bool) -> bool:
@@ -124,6 +215,8 @@ def evaluate_response_confidence(
     unknowns=None,
     verification_step: str = "",
     stakes: str = "",
+    pre_action_context_hits: int = 0,
+    area_has_atlas_entry: bool = False,
 ) -> dict:
     evidence_refs = _parse_list(evidence_refs)
     unknowns = _parse_list(unknowns)
@@ -152,6 +245,22 @@ def evaluate_response_confidence(
         score -= 20
         reasons.append("high-stakes context detected")
+    # v5.2.0: Positive signals. Before this release the score was purely
+    # a penalty accumulator — there was no way to reward tasks that had
+    # meaningful prior context loaded or that sat inside a known area.
+    # Cap at +10 and +5 so these can never override a real risk signal.
+    if pre_action_context_hits > 0:
+        boost = min(10, pre_action_context_hits * 2)
+        score += boost
+        reasons.append(
+            f"+{boost} from {pre_action_context_hits} pre-action context hit(s)"
+        )
+    if area_has_atlas_entry:
+        score += 5
+        reasons.append("+5 from known project-atlas area")
+    final_score = max(0, min(100, score))
     mode = "answer"
     if task_type in RESPONSE_TASKS:
         if high_stakes and (unknowns or not evidence_refs):
@@ -161,6 +270,23 @@ def evaluate_response_confidence(
         elif high_stakes or not evidence_refs or not verification_step.strip():
             mode = "verify"
+        # v5.2.0: Numeric safeguard. The boolean decision tree above
+        # covers every obvious case, but tasks can accumulate soft
+        # penalties without tripping any single rule. When the final
+        # score is critically low, downgrade the mode by one step.
+        # This catches edge cases and is monotonic — it can only make
+        # the response discipline stricter, never looser.
+        if mode == "answer" and final_score < 50:
+            mode = "verify"
+            reasons.append(
+                f"numeric safeguard: score {final_score} < 50 forces verify"
+            )
+        elif mode == "verify" and final_score < 30 and high_stakes:
+            mode = "defer"
+            reasons.append(
+                f"numeric safeguard: high-stakes with score {final_score} forces defer"
+            )
     next_action = {
         "answer": "You may answer directly, but stay within the evidence you actually have.",
         "verify": "Verify the claim with concrete evidence before answering.",
@@ -170,7 +296,7 @@ def evaluate_response_confidence(
     return {
         "mode": mode,
-        "confidence": max(0, min(100, score)),
+        "confidence": final_score,
         "high_stakes": high_stakes,
         "reasons": reasons,
         "next_action": next_action,

package/src/scripts/deep-sleep/apply_findings.py CHANGED Viewed

@@ -855,7 +855,8 @@ def _parse_any_datetime(value) -> datetime | None:
         except Exception:
             continue
     try:
-        return datetime.fromisoformat(raw.replace("Z", "+00:00").replace("+00:00", ""))
+        dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
+        return dt.replace(tzinfo=None)
     except Exception:
         return None