npm - nexo-brain - Versions diffs - 5.1.0 → 5.2.0 - Mend

nexo-brain 5.1.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +8 -0
package/package.json +1 -1
package/src/doctor/providers/runtime.py +132 -0
package/src/plugins/cortex.py +81 -2
package/src/plugins/episodic_memory.py +13 -1
package/src/plugins/protocol.py +128 -2
package/src/scripts/nexo-daily-self-audit.py +81 -3

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "5.1.0",
+  "version": "5.2.0",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -87,6 +87,14 @@ Versions `3.1.7` through `3.2.0` close the recent-memory gap:
 - when even that misses, NEXO now exposes raw transcript fallback tools for Claude Code and Codex session stores
 - NEXO can now inspect itself through a live system catalog derived from canonical sources instead of relying only on stale docs or operator memory
+Version `5.2.0` closes two focused gaps in the Cortex layer that were left open by the v5.1 audit — the high-stakes response-contract detector was English-only, and the `nexo-cortex-cycle` cron was writing a quality snapshot that no reader ever consumed:
+- `HIGH_STAKES_KEYWORDS_ES` adds ~45 Spanish keywords to the high-stakes detector with accented and unaccented variants, so a goal written in Spanish (`migrar la base de datos de producción`) trips the same gate as its English twin.
+- `NEGATION_PATTERNS` suppresses false positives when the user explicitly disclaims touching the sensitive area (`sin afectar producción`, `no tocar prod`, `without touching production`, `don't modify`). The raw keyword being present is no longer enough to flag the task.
+- `evaluate_response_confidence` accepts two new optional kwargs, `pre_action_context_hits` (+up to 10) and `area_has_atlas_entry` (+5), so the score can finally reward tasks that loaded real context instead of only punishing unprepared ones. Both signals are capped and cannot override a real risk penalty.
+- A monotonic numeric safeguard layers on top of the boolean decision tree: `answer` downgrades to `verify` when `final_score < 50`, and `verify` downgrades to `defer` when `high_stakes` and `final_score < 30`. The safeguard can only make response discipline stricter, never looser.
+- `handle_cortex_quality` in `src/plugins/cortex.py` now reads `$NEXO_HOME/operations/cortex-quality-latest.json` when the requested window (7 or 1 days) is fresh (<6h 30m) and the schema matches — silent fallback to the live SQL computation on any failure. The handler's JSON response now includes `"source": "cache" | "live"` for observability.
 Version `5.1.0` lands the full NEXO-AUDIT-2026-04-11 roadmap as a single minor bump — every open evolution / adaptive / cognitive / skills loop now closes under itself, the knowledge graph exports cleanly, OpenTelemetry spans can be turned on without a hard dependency, and every PR has to clear lint, security, coverage, and release-readiness gates before it can merge:
 - Evolution cycle now auto-applies user-approved proposals on the next run (backed by the new idempotent migration `m38`), adaptive learned-weight rollbacks surface as visible followups, outcome patterns auto-promote to draft skills, and a Voyager-style detector exposes co-occurring skill pairs as composite-skill candidates via `nexo_skill_compose_candidates`.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "5.1.0",
+  "version": "5.2.0",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/doctor/providers/runtime.py CHANGED Viewed

@@ -2710,6 +2710,137 @@ def check_release_artifact_sync() -> DoctorCheck:
     )
+def check_release_trace_hygiene() -> DoctorCheck:
+    db_path = NEXO_HOME / "data" / "nexo.db"
+    if not db_path.is_file():
+        return DoctorCheck(
+            id="runtime.release_trace_hygiene",
+            tier="runtime",
+            status="healthy",
+            severity="info",
+            summary="Release trace hygiene unavailable (no DB)",
+            evidence=[],
+            repair_plan=[],
+            escalation_prompt="",
+        )
+    try:
+        conn = sqlite3.connect(str(db_path), timeout=2)
+        conn.row_factory = sqlite3.Row
+        try:
+            tables = {
+                row[0]
+                for row in conn.execute(
+                    "SELECT name FROM sqlite_master WHERE type='table' AND name IN ('workflow_goals', 'workflow_runs')"
+                ).fetchall()
+            }
+            if "workflow_goals" not in tables or "workflow_runs" not in tables:
+                return DoctorCheck(
+                    id="runtime.release_trace_hygiene",
+                    tier="runtime",
+                    status="healthy",
+                    severity="info",
+                    summary="Release trace hygiene unavailable (workflow tables absent)",
+                    evidence=[],
+                    repair_plan=[],
+                    escalation_prompt="",
+                )
+            stale_run_samples: list[str] = []
+            stale_goal_samples: list[str] = []
+            now = dt.datetime.now(dt.timezone.utc)
+            stale_after_hours = 6
+            run_rows = conn.execute(
+                """SELECT run_id, goal, updated_at
+                   FROM workflow_runs
+                   WHERE workflow_kind = 'audit-phase'
+                     AND status NOT IN ('completed', 'failed', 'cancelled')
+                   ORDER BY updated_at DESC"""
+            ).fetchall()
+            for row in run_rows:
+                updated_at = _parse_timestamp(row["updated_at"] or "")
+                if updated_at is None:
+                    stale_run_samples.append(f"{row['run_id']}: unreadable updated_at")
+                    continue
+                if updated_at.tzinfo is None:
+                    updated_at = updated_at.replace(tzinfo=dt.timezone.utc)
+                age_hours = (now - updated_at).total_seconds() / 3600
+                if age_hours >= stale_after_hours:
+                    stale_run_samples.append(
+                        f"{row['run_id']}: {age_hours:.1f}h stale ({str(row['goal'] or '')[:72]})"
+                    )
+            goal_rows = conn.execute(
+                """SELECT g.goal_id, g.title, g.updated_at,
+                          COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id), 0) AS run_count,
+                          COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id
+                                    AND r.status NOT IN ('completed', 'failed', 'cancelled')), 0) AS open_run_count
+                   FROM workflow_goals g
+                   WHERE g.status = 'active'
+                     AND (g.goal_id LIKE 'WG-AUDIT-%' OR g.title LIKE 'NEXO-AUDIT-%')
+                   ORDER BY g.updated_at DESC"""
+            ).fetchall()
+            for row in goal_rows:
+                if int(row["open_run_count"] or 0) > 0:
+                    continue
+                updated_at = _parse_timestamp(row["updated_at"] or "")
+                if updated_at is None:
+                    stale_goal_samples.append(f"{row['goal_id']}: unreadable updated_at")
+                    continue
+                if updated_at.tzinfo is None:
+                    updated_at = updated_at.replace(tzinfo=dt.timezone.utc)
+                age_hours = (now - updated_at).total_seconds() / 3600
+                if age_hours >= stale_after_hours:
+                    stale_goal_samples.append(
+                        f"{row['goal_id']}: {age_hours:.1f}h stale ({str(row['title'] or '')[:72]})"
+                    )
+        finally:
+            conn.close()
+    except Exception as exc:
+        return DoctorCheck(
+            id="runtime.release_trace_hygiene",
+            tier="runtime",
+            status="degraded",
+            severity="warn",
+            summary="Release trace hygiene check failed",
+            evidence=[str(exc)],
+            repair_plan=["Inspect workflow_goals/workflow_runs state manually"],
+            escalation_prompt="Release traces could not be audited, so stale audit artifacts may be hiding in the runtime.",
+        )
+    evidence = [
+        f"stale audit workflows: {len(stale_run_samples)}",
+        f"stale audit goals: {len(stale_goal_samples)}",
+    ]
+    evidence.extend(stale_run_samples[:3])
+    evidence.extend(stale_goal_samples[:3])
+    if stale_run_samples or stale_goal_samples:
+        return DoctorCheck(
+            id="runtime.release_trace_hygiene",
+            tier="runtime",
+            status="degraded",
+            severity="warn",
+            summary="Release trace hygiene needs cleanup",
+            evidence=evidence,
+            repair_plan=[
+                "Close or complete stale audit-phase workflows and active audit goals",
+                "Keep workflow/goal state aligned with the real shipped state after releases",
+            ],
+            escalation_prompt="Audit/release traces drifted away from reality, which makes shipping state look ambiguous.",
+        )
+    return DoctorCheck(
+        id="runtime.release_trace_hygiene",
+        tier="runtime",
+        status="healthy",
+        severity="info",
+        summary="Release trace hygiene OK",
+        evidence=evidence,
+        repair_plan=[],
+        escalation_prompt="",
+    )
 def check_state_watchers() -> DoctorCheck:
     db_path = NEXO_HOME / "data" / "nexo.db"
     summary_path = NEXO_HOME / "operations" / "state-watchers-status.json"
@@ -2988,6 +3119,7 @@ def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
         safe_check(check_automation_telemetry),
         safe_check(check_state_watchers),
         safe_check(check_release_artifact_sync),
+        safe_check(check_release_trace_hygiene),
         safe_check(check_launchagent_inventory),
         safe_check(check_launchagent_integrity, fix=fix),
         safe_check(check_personal_script_registry, fix=fix),

package/src/plugins/cortex.py CHANGED Viewed

@@ -15,9 +15,12 @@ v0.1: Single MCP tool + middleware validation.
 """
 import json
+import os
 import re
 import secrets
 import time
+from datetime import datetime
+from pathlib import Path
 def _get_db():
@@ -1003,12 +1006,88 @@ def handle_cortex_override(evaluation_id: int, chosen: str, reason: str) -> str:
     return json.dumps({"ok": True, "evaluation": updated}, ensure_ascii=False, indent=2)
+# v5.2.0: Cortex quality cache reader. The `nexo-cortex-cycle` cron
+# (src/scripts/nexo-cortex-cycle.py) writes a fresh quality snapshot to
+# $NEXO_HOME/operations/cortex-quality-latest.json every 6h. Until this
+# release the reader was missing — the snapshot was write-only and every
+# call to `nexo_cortex_quality` re-ran the SQL summary. Now the handler
+# reads the cache first for the 7d / 1d windows and falls back silently
+# to the live computation on any failure.
+_CORTEX_QUALITY_CACHE_PATH = (
+    Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
+    / "operations"
+    / "cortex-quality-latest.json"
+)
+# 6h cron + 30 min slack so a slightly-late run still serves cache.
+_CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS = 23400
+_CORTEX_QUALITY_CACHE_WINDOWS = {1: "window_1d", 7: "window_7d"}
+_CORTEX_QUALITY_CACHE_SCHEMA = 1
+def _load_cortex_quality_cache(days: int) -> dict | None:
+    """Return cached summary dict for the requested window, or None if unusable.
+    Silent on any failure so the live path always wins on a corrupt cache.
+    Respects the snapshot schema written by `_persist_quality_snapshot`
+    in src/scripts/nexo-cortex-cycle.py — do NOT change the layout here
+    without updating the writer in the same release.
+    """
+    window_key = _CORTEX_QUALITY_CACHE_WINDOWS.get(days)
+    if window_key is None:
+        return None
+    try:
+        if not _CORTEX_QUALITY_CACHE_PATH.is_file():
+            return None
+        payload = json.loads(
+            _CORTEX_QUALITY_CACHE_PATH.read_text(encoding="utf-8")
+        )
+    except Exception:
+        return None
+    if not isinstance(payload, dict):
+        return None
+    if payload.get("schema") != _CORTEX_QUALITY_CACHE_SCHEMA:
+        return None
+    captured_at = payload.get("captured_at") or ""
+    if not isinstance(captured_at, str):
+        return None
+    try:
+        captured = datetime.fromisoformat(captured_at)
+    except Exception:
+        return None
+    age = time.time() - captured.timestamp()
+    if age < 0 or age > _CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS:
+        return None
+    window = payload.get(window_key)
+    if not isinstance(window, dict):
+        return None
+    return window
 def handle_cortex_quality(days: int = 30) -> str:
-    """Summarise recommendation quality, overrides, and linked outcome results."""
+    """Summarise recommendation quality, overrides, and linked outcome results.
+    v5.2.0: Serves the snapshot written by `nexo-cortex-cycle` when the
+    requested window is 7 or 1 days and the snapshot is fresh
+    (< 6h30m old, schema == 1). Falls back silently to a live SQL
+    summary on any failure, so the caller always gets a valid response.
+    The returned JSON includes `"source": "cache" | "live"` so the
+    path taken is observable from the outside.
+    """
     from db import cortex_evaluation_summary
+    cached = _load_cortex_quality_cache(days)
+    if cached is not None:
+        return json.dumps(
+            {"ok": True, "summary": cached, "source": "cache"},
+            ensure_ascii=False,
+            indent=2,
+        )
     summary = cortex_evaluation_summary(days=days)
-    return json.dumps({"ok": True, "summary": summary}, ensure_ascii=False, indent=2)
+    return json.dumps(
+        {"ok": True, "summary": summary, "source": "live"},
+        ensure_ascii=False,
+        indent=2,
+    )
 TOOLS = [

package/src/plugins/episodic_memory.py CHANGED Viewed

@@ -229,8 +229,20 @@ def handle_session_diary_write(decisions: str, summary: str,
     orphan_changes = conn.execute(
         "SELECT COUNT(*) FROM change_log WHERE (commit_ref IS NULL OR commit_ref = '')"
     ).fetchone()[0]
+    recent_orphan_changes = conn.execute(
+        """SELECT COUNT(*) FROM change_log
+           WHERE (commit_ref IS NULL OR commit_ref = '')
+             AND created_at >= datetime('now', '-7 days')"""
+    ).fetchone()[0]
     if orphan_changes > 0:
-        warnings.append(f"{orphan_changes} changes sin commit_ref")
+        if recent_orphan_changes > 0 and recent_orphan_changes != orphan_changes:
+            warnings.append(
+                f"{recent_orphan_changes} changes recientes sin commit_ref ({orphan_changes} históricas total)"
+            )
+        elif recent_orphan_changes > 0:
+            warnings.append(f"{recent_orphan_changes} changes recientes sin commit_ref")
+        else:
+            warnings.append(f"{orphan_changes} changes históricas sin commit_ref")
     orphan_decisions = conn.execute(
         "SELECT COUNT(*) FROM decisions WHERE (outcome IS NULL OR outcome = '') AND created_at < datetime('now', '-7 days')"
     ).fetchone()[0]

package/src/plugins/protocol.py CHANGED Viewed

@@ -64,6 +64,74 @@ HIGH_STAKES_KEYWORDS = {
     "revenue",
     "cost",
 }
+# v5.2.0: Spanish high-stakes keywords. Parity with the English set so a
+# goal written in Spanish ("migrar producción a nuevo servidor") trips
+# the same high-stakes gate as its English twin. Accented and unaccented
+# variants are both listed because user prompts mix both freely.
+HIGH_STAKES_KEYWORDS_ES = {
+    "crítico",
+    "critico",
+    "crítica",
+    "critica",
+    "producción",
+    "produccion",
+    "cliente",
+    "clientes",
+    "despliegue",
+    "desplegar",
+    "pago",
+    "pagos",
+    "facturación",
+    "facturacion",
+    "factura",
+    "credencial",
+    "credenciales",
+    "contraseña",
+    "seguridad",
+    "legal",
+    "médico",
+    "medico",
+    "financiero",
+    "financiera",
+    "privacidad",
+    "marca",
+    "reputación",
+    "reputacion",
+    "ingresos",
+    "borrar",
+    "eliminar",
+    "migración",
+    "migracion",
+    "migrar",
+    "lanzamiento",
+    "lanzar",
+    "precio",
+    "precios",
+    "reembolso",
+    "público",
+    "publico",
+    "riesgo",
+    "riesgos",
+    "coste",
+    "costes",
+    "ventas",
+    "pedido",
+    "pedidos",
+}
+# v5.2.0: Negation patterns that should SUPPRESS the high-stakes flag.
+# Without this, a user message like "sin afectar producción" or
+# "no tocar prod" triggers a false positive just because the keyword
+# is physically present. Bilingual and conservative on purpose.
+NEGATION_PATTERNS = (
+    re.compile(r"\bno\s+tocar\s+prod(?:ucci[oó]n|uccion)?\b", re.IGNORECASE),
+    re.compile(r"\bsin\s+(?:tocar|afectar|romper|modificar)\b", re.IGNORECASE),
+    re.compile(r"\bnunca\s+(?:borrar|eliminar|tocar)\b", re.IGNORECASE),
+    re.compile(r"\bno\s+(?:borrar|eliminar|tocar|modificar)\b", re.IGNORECASE),
+    re.compile(r"\bevitar\s+(?:borrar|eliminar|tocar|romper)\b", re.IGNORECASE),
+    re.compile(r"\bavoid\s+(?:deleting|touching|breaking|modifying)\b", re.IGNORECASE),
+    re.compile(r"\bdon'?t\s+(?:touch|break|modify|delete)\b", re.IGNORECASE),
+    re.compile(r"\bwithout\s+(?:touching|breaking|affecting)\b", re.IGNORECASE),
+)
 def _parse_list(value) -> list[str]:
@@ -104,9 +172,32 @@ def _parse_int_list(value) -> list[int]:
     return parsed
+def _has_negation_context(text: str) -> bool:
+    """Return True when the text explicitly disclaims touching the sensitive area.
+    Used to suppress high-stakes false positives where the user is stating
+    the *boundary* of safe work ("without touching production") rather than
+    the *target* of a risky action ("migrate production").
+    """
+    if not text:
+        return False
+    return any(pattern.search(text) for pattern in NEGATION_PATTERNS)
 def _detect_high_stakes(*parts: str) -> bool:
     combined = " ".join((part or "").strip().lower() for part in parts if part)
-    return any(keyword in combined for keyword in HIGH_STAKES_KEYWORDS)
+    if not combined:
+        return False
+    # Negation override: "sin afectar producción" / "don't touch prod" / etc.
+    # Explicit disclaimers suppress the flag even if a high-stakes keyword
+    # is physically present, otherwise boundary statements get miscategorised
+    # as action targets.
+    if _has_negation_context(combined):
+        return False
+    return any(
+        keyword in combined
+        for keyword in HIGH_STAKES_KEYWORDS | HIGH_STAKES_KEYWORDS_ES
+    )
 def _decision_support_required(*, task_type: str, high_stakes: bool) -> bool:
@@ -124,6 +215,8 @@ def evaluate_response_confidence(
     unknowns=None,
     verification_step: str = "",
     stakes: str = "",
+    pre_action_context_hits: int = 0,
+    area_has_atlas_entry: bool = False,
 ) -> dict:
     evidence_refs = _parse_list(evidence_refs)
     unknowns = _parse_list(unknowns)
@@ -152,6 +245,22 @@ def evaluate_response_confidence(
         score -= 20
         reasons.append("high-stakes context detected")
+    # v5.2.0: Positive signals. Before this release the score was purely
+    # a penalty accumulator — there was no way to reward tasks that had
+    # meaningful prior context loaded or that sat inside a known area.
+    # Cap at +10 and +5 so these can never override a real risk signal.
+    if pre_action_context_hits > 0:
+        boost = min(10, pre_action_context_hits * 2)
+        score += boost
+        reasons.append(
+            f"+{boost} from {pre_action_context_hits} pre-action context hit(s)"
+        )
+    if area_has_atlas_entry:
+        score += 5
+        reasons.append("+5 from known project-atlas area")
+    final_score = max(0, min(100, score))
     mode = "answer"
     if task_type in RESPONSE_TASKS:
         if high_stakes and (unknowns or not evidence_refs):
@@ -161,6 +270,23 @@ def evaluate_response_confidence(
         elif high_stakes or not evidence_refs or not verification_step.strip():
             mode = "verify"
+        # v5.2.0: Numeric safeguard. The boolean decision tree above
+        # covers every obvious case, but tasks can accumulate soft
+        # penalties without tripping any single rule. When the final
+        # score is critically low, downgrade the mode by one step.
+        # This catches edge cases and is monotonic — it can only make
+        # the response discipline stricter, never looser.
+        if mode == "answer" and final_score < 50:
+            mode = "verify"
+            reasons.append(
+                f"numeric safeguard: score {final_score} < 50 forces verify"
+            )
+        elif mode == "verify" and final_score < 30 and high_stakes:
+            mode = "defer"
+            reasons.append(
+                f"numeric safeguard: high-stakes with score {final_score} forces defer"
+            )
     next_action = {
         "answer": "You may answer directly, but stay within the evidence you actually have.",
         "verify": "Verify the claim with concrete evidence before answering.",
@@ -170,7 +296,7 @@ def evaluate_response_confidence(
     return {
         "mode": mode,
-        "confidence": max(0, min(100, score)),
+        "confidence": final_score,
         "high_stakes": high_stakes,
         "reasons": reasons,
         "next_action": next_action,

package/src/scripts/nexo-daily-self-audit.py CHANGED Viewed

@@ -78,6 +78,10 @@ CLAUDE_CLI = _resolve_claude_cli()
 findings = []
+AUDIT_GOAL_NEXT_ACTION = "Convert the recurring theme into an explicit workflow or close it as intentional noise."
+AUDIT_GOAL_OWNER = "system:self-audit"
+AUDIT_GOAL_STALE_HOURS = 36
 def log(msg):
     ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@@ -492,7 +496,7 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
         f"Recurring {area} theme detected by daily self-audit. "
         f"The theme '{sample_goal}' appeared {count} times without a durable goal, learning, or resolved workflow."
     )
-    next_action = "Convert the recurring theme into an explicit workflow or close it as intentional noise."
+    next_action = AUDIT_GOAL_NEXT_ACTION
     success_signal = "The theme stops resurfacing in unresolved protocol tasks."
     now_iso = datetime.now().isoformat(timespec="seconds")
     if existing:
@@ -504,7 +508,7 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
         if "priority" in columns:
             updates["priority"] = "high"
         if "owner" in columns:
-            updates["owner"] = "system:self-audit"
+            updates["owner"] = AUDIT_GOAL_OWNER
         if "next_action" in columns:
             updates["next_action"] = next_action
         if "success_signal" in columns:
@@ -534,7 +538,7 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
     if "priority" in columns:
         values["priority"] = "high"
     if "owner" in columns:
-        values["owner"] = "system:self-audit"
+        values["owner"] = AUDIT_GOAL_OWNER
     if "next_action" in columns:
         values["next_action"] = next_action
     if "success_signal" in columns:
@@ -553,6 +557,75 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
     return {"ok": True, "action": "created", "goal_id": goal_id}
+def _retire_stale_audit_goals_inline(
+    conn: sqlite3.Connection, *, max_age_hours: int = AUDIT_GOAL_STALE_HOURS
+) -> dict:
+    if not _table_exists(conn, "workflow_goals"):
+        return {"ok": False, "reason": "workflow_goals_missing"}
+    has_runs = _table_exists(conn, "workflow_runs")
+    if has_runs:
+        rows = conn.execute(
+            """SELECT g.goal_id, g.title, g.status, g.owner, g.next_action, g.opened_at, g.updated_at,
+                      COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id), 0) AS run_count,
+                      COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id
+                                AND r.status NOT IN ('completed', 'failed', 'cancelled')), 0) AS open_run_count
+               FROM workflow_goals g
+               WHERE g.status = 'active'
+                 AND g.goal_id LIKE 'WG-AUDIT-%'
+               ORDER BY g.updated_at DESC, g.opened_at DESC"""
+        ).fetchall()
+    else:
+        rows = conn.execute(
+            """SELECT g.goal_id, g.title, g.status, g.owner, g.next_action, g.opened_at, g.updated_at,
+                      0 AS run_count,
+                      0 AS open_run_count
+               FROM workflow_goals g
+               WHERE g.status = 'active'
+                 AND g.goal_id LIKE 'WG-AUDIT-%'
+               ORDER BY g.updated_at DESC, g.opened_at DESC"""
+        ).fetchall()
+    if not rows:
+        return {"ok": True, "retired": 0}
+    now = datetime.now()
+    now_iso = now.isoformat(timespec="seconds")
+    retired = 0
+    for row in rows:
+        if str(row["next_action"] or "").strip() != AUDIT_GOAL_NEXT_ACTION:
+            continue
+        owner = str(row["owner"] or "").strip()
+        if owner and owner != AUDIT_GOAL_OWNER:
+            continue
+        if int(row["open_run_count"] or 0) > 0:
+            continue
+        updated_at = _parse_mixed_datetime(row["updated_at"]) or _parse_mixed_datetime(row["opened_at"])
+        if not updated_at:
+            continue
+        age_hours = (now - updated_at).total_seconds() / 3600
+        if age_hours < max_age_hours:
+            continue
+        conn.execute(
+            """UPDATE workflow_goals
+               SET status = 'abandoned',
+                   next_action = ?,
+                   blocker_reason = ?,
+                   updated_at = ?,
+                   closed_at = ?
+               WHERE goal_id = ?""",
+            (
+                "Ninguna. Placeholder stale retirado automáticamente; el self-audit lo recreará si el patrón reaparece.",
+                f"Self-audit placeholder stale >{max_age_hours}h sin workflow runs abiertos.",
+                now_iso,
+                now_iso,
+                row["goal_id"],
+            ),
+        )
+        retired += 1
+    return {"ok": True, "retired": retired}
 def _queue_public_core_handoff(
     conn: sqlite3.Connection,
     *,
@@ -1174,6 +1247,11 @@ def check_unformalized_mentions():
         conn.close()
         return
+    retired_result = _retire_stale_audit_goals_inline(conn)
+    retired_count = int(retired_result.get("retired") or 0)
+    if retired_count:
+        finding("INFO", "formalization", f"retired {retired_count} stale self-audit workflow goals")
     rows = conn.execute(
         """SELECT goal, area, learning_id, followup_id
            FROM protocol_tasks