npm - nexo-brain - Versions diffs - 2.7.0 → 3.0.1 - Mend

nexo-brain 2.7.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +66 -12
package/hooks/hooks.json +79 -0
package/package.json +1 -1
package/src/agent_runner.py +295 -7
package/src/cli.py +111 -0
package/src/client_preferences.py +99 -1
package/src/client_sync.py +207 -3
package/src/cognitive/__init__.py +1 -1
package/src/cognitive/_search.py +39 -19
package/src/dashboard/app.py +141 -1
package/src/dashboard/templates/base.html +4 -0
package/src/dashboard/templates/protocol.html +199 -0
package/src/db/__init__.py +23 -1
package/src/db/_learnings.py +31 -4
package/src/db/_personal_scripts.py +12 -0
package/src/db/_protocol.py +303 -0
package/src/db/_schema.py +248 -0
package/src/db/_watchers.py +173 -0
package/src/db/_workflow.py +952 -0
package/src/doctor/providers/boot.py +45 -19
package/src/doctor/providers/runtime.py +923 -8
package/src/evolution_cycle.py +62 -0
package/src/hook_guardrails.py +308 -0
package/src/hooks/protocol-guardrail.sh +10 -0
package/src/nexo_sdk.py +103 -0
package/src/plugins/cognitive_memory.py +18 -0
package/src/plugins/cortex.py +55 -35
package/src/plugins/guard.py +132 -16
package/src/plugins/protocol.py +911 -0
package/src/plugins/schedule.py +40 -6
package/src/plugins/simple_api.py +103 -0
package/src/plugins/skills.py +67 -0
package/src/plugins/state_watchers.py +79 -0
package/src/plugins/workflow.py +588 -0
package/src/public_contribution.py +86 -12
package/src/requirements.txt +1 -0
package/src/script_registry.py +142 -0
package/src/scripts/deep-sleep/apply_findings.py +204 -0
package/src/scripts/deep-sleep/collect.py +49 -4
package/src/scripts/nexo-agent-run.py +2 -0
package/src/scripts/nexo-daily-self-audit.py +843 -5
package/src/scripts/nexo-evolution-run.py +343 -1
package/src/server.py +92 -6
package/src/skills_runtime.py +151 -0
package/src/state_watchers_runtime.py +334 -0
package/src/tools_learnings.py +345 -7
package/src/tools_sessions.py +183 -0
package/templates/CLAUDE.md.template +9 -1
package/templates/CODEX.AGENTS.md.template +10 -2

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "2.7.0",
+  "version": "3.0.1",
   "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
   "author": {
     "name": "NEXO Brain",

package/README.md CHANGED Viewed

@@ -6,7 +6,7 @@
 [![GitHub stars](https://img.shields.io/github/stars/wazionapps/nexo?style=social)](https://github.com/wazionapps/nexo/stargazers)
 [![License: AGPL-3.0](https://img.shields.io/badge/License-AGPL--3.0-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
-> Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
+> Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, durable workflow runs, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
 **NEXO Brain transforms any MCP-compatible AI agent from a stateless assistant into a cognitive partner that remembers, learns, forgets, adapts, and builds a relationship with you over time.**
@@ -18,6 +18,16 @@
 [Watch the overview on YouTube](https://www.youtube.com/watch?v=IBs7zh7ZMG0) · [Watch the full deep-dive](https://www.youtube.com/watch?v=bKAfowyyy5M)
+Start here:
+- [5-minute quickstart](docs/quickstart-5-minutes.md)
+- [Architecture visuals](docs/architecture-visuals.md)
+- [Memory classes](docs/memory-classes.md)
+- [Session portability](docs/session-portability.md)
+- [Python SDK](docs/sdk-python.md)
+- [Reference verticals](docs/reference-verticals.md)
+- [Measured compare scorecard](compare/README.md)
+- [Public contribution guide](docs/public-contribution.md)
 Every time you close a session, everything is lost. Your agent doesn't remember yesterday's decisions, repeats the same mistakes, and starts from zero. NEXO Brain fixes this with a cognitive architecture modeled after how human memory actually works.
 ## Shared Brain Across Clients
@@ -38,15 +48,32 @@ That means NEXO now manages not only the shared runtime and MCP wiring, but also
 - For Codex specifically, `nexo chat` and Codex headless automation inject the current bootstrap explicitly, so Codex starts as NEXO even when plain global Codex startup is inconsistent about global instructions.
 - Deep Sleep now reads both Claude Code and Codex transcript stores, so overnight analysis still works even when the user spends the day in Codex.
-Versions `2.6.14` through `2.6.21` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first Deep Sleep engineering loop.
-Version `2.7.0` closes the next operational gap:
-- Weekly/monthly Deep Sleep summaries now include protocol compliance, engineering-loop output, project pulse, and trend-vs-previous-period data.
-- Runtime doctor now audits both weekly protocol compliance and release-artifact sync drift instead of leaving those checks implicit.
-- The repo now ships `scripts/verify_release_readiness.py`, and tagged publish runs it automatically so release discipline is enforced in the product itself.
-- The dashboard now surfaces `What Matters Now`, `What Is Drifting`, and `What Is Improving` directly from the periodic Deep Sleep summaries.
-- The unreleased Codex launcher fixes after `v2.6.21` are now included: stronger `nexo chat` client selection, corrected launch mode handling, tracked last terminal choice, and aligned interactive flags.
+Versions `2.6.14` through `2.7.0` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first measured engineering loop.
+Versions `3.0.0` and `3.0.1` close the next execution gap:
+- protocol discipline is now a runtime contract, not just instructions:
+  - `nexo_task_open`
+  - `nexo_task_close`
+  - persistent `protocol_debt`
+  - enforceable `Cortex` gates
+- durable execution is now first-class:
+  - resumable workflow runs
+  - checkpoints
+  - replay
+  - retries
+  - durable goals
+- conditioned learnings on critical files are now real guardrails across Claude hooks, Codex transcript audits, and headless automation prompts
+- repair/correction work now routes through canonical learning capture instead of depending on the model to remember to document after the fact
+- runtime truth is stricter:
+  - no more healthy-looking warning storms
+  - no more silent Deep Sleep schema drift
+  - keep-alive jobs report alive/degraded/duplicated honestly
+- public proof is stronger:
+  - measured compare scorecard
+  - external and internal ablations
+  - `cost_per_solved_task`
+  - SDK/API/quickstart surface
 ### Client Capability Matrix
@@ -227,6 +254,20 @@ User message → Fast Path check → Simple chat? → Respond directly
 The Cortex was designed through a 3-way AI debate (Claude Opus 4.6 + GPT-5.4 + Gemini 3.1 Pro) and validated against 6 months of real production failures.
+## Durable Workflow Runtime
+Memory and guardrails are not enough if long work still restarts from zero.
+NEXO now ships a durable workflow runtime for multi-step and cross-session execution:
+- `nexo_workflow_open` creates a persistent run with step metadata, idempotency key, priority, and shared state
+- `nexo_workflow_update` records replayable checkpoints, retry metadata, approval gates, and the current actionable state
+- `nexo_workflow_resume` tells the agent what to do next without guessing
+- `nexo_workflow_replay` reconstructs the recent execution history honestly instead of pretending the run is still in memory
+- `nexo_workflow_list` keeps active and blocked work visible so it does not disappear into reminders or prose notes
+This is the bridge between "good memory" and "reliable execution": tasks can now preserve state, retries, approval gates, and next action across interruptions.
 ## Context Continuity (Auto-Compaction)
 NEXO Brain automatically preserves session context when Claude Code compacts conversations. Using PreCompact and PostCompact hooks:
@@ -642,6 +683,19 @@ nexo scripts list  # See your personal scripts
 During install, NEXO now asks which interactive clients you want to connect, which one `nexo chat` should suggest first when multiple terminal clients are available, whether to enable background automation, which backend should run that automation, and which model profile each active terminal/backend should use. Shared brain stays on in every mode.
+Public entry points for the mental model now stay intentionally small:
+- `nexo_remember`
+- `nexo_memory_recall`
+- `nexo_consolidate`
+- `nexo_run_workflow`
+If you want the shell or Python wrappers instead of raw MCP tools:
+- [docs/quickstart-5-minutes.md](docs/quickstart-5-minutes.md)
+- [docs/memory-classes.md](docs/memory-classes.md)
+- [docs/sdk-python.md](docs/sdk-python.md)
+- [docs/reference-verticals.md](docs/reference-verticals.md)
+- [compare/README.md](compare/README.md)
 Recommended defaults:
 - Claude Code: `Opus 4.6 with 1M context`
 - Codex: `gpt-5.4` with `xhigh` reasoning
@@ -714,7 +768,7 @@ nexo doctor --tier runtime --json  # Machine-readable health report
 nexo doctor --fix              # Apply deterministic repairs
 ```
-Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. See `docs/writing-scripts.md` for details.
+Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. `nexo-agent-run.py` now also supports task profiles (`fast`, `balanced`, `deep`) plus safe backend fallback, so automations can prefer cheaper/faster Codex paths or deeper Claude paths without hardcoding one provider forever. See `docs/writing-scripts.md` for details.
 Skills v2 combine procedural guides with optional executable scripts. Personal skills live in `NEXO_HOME/skills/`, packaged core skills live in `NEXO_CODE/skills/` during development and `NEXO_HOME/skills-core/` in installed environments, and staged runtime copies live in `NEXO_HOME/skills-runtime/`. Execution is fully autonomous: Deep Sleep can evolve mature guide skills into executable drafts automatically, and runtime execution no longer waits for manual approval. See `docs/skills-v2.md` for the full model.
@@ -840,7 +894,7 @@ When Claude Desktop is installed, `nexo-brain`, `nexo update`, and `nexo clients
 ### Codex
-When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning.
+When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning. Runtime Doctor also audits recent Codex sessions for NEXO startup markers and conditioned-file protocol discipline so parity drift does not hide behind the lack of native Claude-style hooks.
 ### OpenClaw

package/hooks/hooks.json CHANGED Viewed

@@ -2,10 +2,89 @@
   "hooks": {
     "SessionStart": [
       {
+        "matcher": "*",
         "hooks": [
           {
             "type": "command",
             "command": "diff -q \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\" >/dev/null 2>&1 || (python3 -m venv \"${CLAUDE_PLUGIN_DATA}/.venv\" 2>/dev/null; cp \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\"; \"${CLAUDE_PLUGIN_DATA}/.venv/bin/pip\" install --quiet -r \"${CLAUDE_PLUGIN_DATA}/requirements.txt\") || rm -f \"${CLAUDE_PLUGIN_DATA}/requirements.txt\""
+          },
+          {
+            "type": "command",
+            "command": "mkdir -p \"${CLAUDE_PLUGIN_DATA}/operations\" && date +%s > \"${CLAUDE_PLUGIN_DATA}/operations/.session-start-ts\"",
+            "timeout": 2
+          },
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/daily-briefing-check.sh\"",
+            "timeout": 5
+          },
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-start.sh\"",
+            "timeout": 35
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-stop.sh\"",
+            "timeout": 10
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-tool-logs.sh\"",
+            "timeout": 5
+          },
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-session.sh\"",
+            "timeout": 3
+          },
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/inbox-hook.sh\"",
+            "timeout": 5
+          },
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/protocol-guardrail.sh\"",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "PreCompact": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/pre-compact.sh\"",
+            "timeout": 10
+          }
+        ]
+      }
+    ],
+    "PostCompact": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/post-compact.sh\"",
+            "timeout": 10
           }
         ]
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nexo-brain",
-  "version": "2.7.0",
+  "version": "3.0.1",
   "mcpName": "io.github.wazionapps/nexo",
   "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
   "homepage": "https://nexo-brain.com",

package/src/agent_runner.py CHANGED Viewed

@@ -8,9 +8,14 @@ import shlex
 import shutil
 import subprocess
 import tempfile
-import tomllib
+import time
 from pathlib import Path
+try:
+    import tomllib
+except ModuleNotFoundError:  # Python < 3.11
+    import tomli as tomllib
 from client_preferences import (
     BACKEND_NONE,
     CLIENT_CLAUDE_CODE,
@@ -18,6 +23,7 @@ from client_preferences import (
     TERMINAL_CLIENT_KEYS,
     load_client_preferences,
     resolve_automation_backend,
+    resolve_automation_task_profile,
     resolve_client_runtime_profile,
     resolve_terminal_client,
 )
@@ -25,6 +31,12 @@ from client_preferences import (
 NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
 CLAUDE_LEGACY_MODEL_HINTS = {"opus", "sonnet"}
+MODEL_PRICING_USD_PER_1M = {
+    # Pricing snapshot used only when the backend does not return explicit cost.
+    # Codex model names map to the current GPT-5 family pricing.
+    "gpt-5.4": {"input": 1.25, "cached_input": 0.125, "output": 10.0},
+    "gpt-5.4-mini": {"input": 0.25, "cached_input": 0.025, "output": 2.0},
+}
 class AgentRunnerError(RuntimeError):
@@ -39,6 +51,192 @@ class AutomationBackendUnavailableError(AgentRunnerError):
     """Raised when the configured automation backend is unavailable."""
+def _canonical_pricing_model(model: str) -> str:
+    lowered = str(model or "").strip().lower()
+    lowered = lowered.split("[", 1)[0]
+    aliases = {
+        "gpt-5": "gpt-5.4",
+        "gpt-5.4": "gpt-5.4",
+        "gpt-5-mini": "gpt-5.4-mini",
+        "gpt-5.4-mini": "gpt-5.4-mini",
+    }
+    return aliases.get(lowered, lowered)
+def _estimate_openai_cost_usd(model: str, *, input_tokens: int, cached_input_tokens: int, output_tokens: int) -> tuple[float | None, str]:
+    pricing = MODEL_PRICING_USD_PER_1M.get(_canonical_pricing_model(model))
+    if not pricing:
+        return None, "pricing_unavailable"
+    total = 0.0
+    total += (max(0, int(input_tokens or 0)) / 1_000_000.0) * pricing["input"]
+    total += (max(0, int(cached_input_tokens or 0)) / 1_000_000.0) * pricing["cached_input"]
+    total += (max(0, int(output_tokens or 0)) / 1_000_000.0) * pricing["output"]
+    return round(total, 6), "pricing_snapshot"
+def _safe_json_loads(raw: str) -> dict | list | None:
+    try:
+        return json.loads(raw)
+    except Exception:
+        return None
+def _extract_claude_telemetry(raw_stdout: str, *, requested_output_format: str) -> tuple[str, dict]:
+    payload = _safe_json_loads(raw_stdout) if str(raw_stdout or "").strip().startswith("{") else None
+    if not isinstance(payload, dict):
+        return raw_stdout or "", {
+            "telemetry_source": "missing",
+            "cost_source": "missing",
+            "usage": {},
+            "warnings": ["backend did not return parseable JSON telemetry"],
+        }
+    result_payload = payload.get("result", "")
+    if requested_output_format and requested_output_format.lower() == "json" and not isinstance(result_payload, str):
+        final_stdout = json.dumps(result_payload, ensure_ascii=False)
+    else:
+        final_stdout = result_payload if isinstance(result_payload, str) else json.dumps(result_payload, ensure_ascii=False)
+    usage = payload.get("usage") or {}
+    model_usage = payload.get("modelUsage") or {}
+    explicit_cost = payload.get("total_cost_usd")
+    if explicit_cost is None and isinstance(model_usage, dict):
+        explicit_cost = sum(
+            float((item or {}).get("costUSD") or 0.0)
+            for item in model_usage.values()
+            if isinstance(item, dict)
+        )
+    return final_stdout, {
+        "telemetry_source": "claude_json",
+        "cost_source": "backend",
+        "usage": {
+            "input_tokens": int(usage.get("input_tokens") or 0),
+            "cached_input_tokens": int(usage.get("cache_read_input_tokens") or 0),
+            "output_tokens": int(usage.get("output_tokens") or 0),
+        },
+        "total_cost_usd": float(explicit_cost) if explicit_cost is not None else None,
+        "raw": payload,
+        "warnings": [],
+    }
+def _extract_codex_telemetry(stream_stdout: str, *, final_stdout: str, model: str) -> tuple[str, dict]:
+    usage_payload: dict = {}
+    raw_events: list[dict] = []
+    for line in str(stream_stdout or "").splitlines():
+        line = line.strip()
+        if not line.startswith("{"):
+            continue
+        payload = _safe_json_loads(line)
+        if not isinstance(payload, dict):
+            continue
+        raw_events.append(payload)
+        if payload.get("type") == "turn.completed" and isinstance(payload.get("usage"), dict):
+            usage_payload = payload["usage"]
+    usage = {
+        "input_tokens": int(usage_payload.get("input_tokens") or 0),
+        "cached_input_tokens": int(usage_payload.get("cached_input_tokens") or 0),
+        "output_tokens": int(usage_payload.get("output_tokens") or 0),
+    }
+    total_cost_usd = usage_payload.get("total_cost_usd")
+    cost_source = "backend" if total_cost_usd is not None else "missing"
+    warnings: list[str] = []
+    if total_cost_usd is None:
+        estimated_cost, estimated_source = _estimate_openai_cost_usd(
+            model,
+            input_tokens=usage["input_tokens"],
+            cached_input_tokens=usage["cached_input_tokens"],
+            output_tokens=usage["output_tokens"],
+        )
+        total_cost_usd = estimated_cost
+        cost_source = estimated_source
+        if estimated_cost is None:
+            warnings.append(f"no pricing snapshot available for model `{model}`")
+    if not usage_payload:
+        warnings.append("backend did not return usage telemetry")
+    return final_stdout, {
+        "telemetry_source": "codex_jsonl",
+        "cost_source": cost_source,
+        "usage": usage,
+        "total_cost_usd": float(total_cost_usd) if total_cost_usd is not None else None,
+        "raw": raw_events[-8:],
+        "warnings": warnings,
+    }
+def _append_stderr(stderr: str, message: str) -> str:
+    bits = [part for part in [str(stderr or "").rstrip(), str(message or "").strip()] if part]
+    if not bits:
+        return ""
+    return "\n".join(bits) + "\n"
+def _record_automation_run(
+    *,
+    backend: str,
+    task_profile: str,
+    model: str,
+    reasoning_effort: str,
+    cwd: Path,
+    output_format: str,
+    prompt: str,
+    returncode: int,
+    duration_ms: int,
+    telemetry: dict,
+) -> tuple[bool, str]:
+    try:
+        from db._core import get_db
+    except Exception as exc:
+        return False, f"automation telemetry unavailable: {exc}"
+    try:
+        conn = get_db()
+        usage = telemetry.get("usage") or {}
+        conn.execute(
+            """
+            INSERT INTO automation_runs (
+                backend, task_profile, model, reasoning_effort, cwd, output_format,
+                prompt_chars, returncode, duration_ms,
+                input_tokens, cached_input_tokens, output_tokens,
+                total_cost_usd, telemetry_source, cost_source, status, metadata
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                backend,
+                task_profile or "default",
+                model,
+                reasoning_effort,
+                str(cwd),
+                output_format or "text",
+                len(prompt or ""),
+                int(returncode),
+                int(duration_ms),
+                int(usage.get("input_tokens") or 0),
+                int(usage.get("cached_input_tokens") or 0),
+                int(usage.get("output_tokens") or 0),
+                telemetry.get("total_cost_usd"),
+                telemetry.get("telemetry_source", ""),
+                telemetry.get("cost_source", ""),
+                "ok" if int(returncode) == 0 else "failed",
+                json.dumps(
+                    {
+                        "warnings": telemetry.get("warnings") or [],
+                        "raw": telemetry.get("raw") or {},
+                    },
+                    ensure_ascii=False,
+                ),
+            ),
+        )
+        conn.commit()
+        return True, ""
+    except Exception as exc:
+        return False, f"automation telemetry unavailable: {exc}"
 def _resolve_claude_cli() -> str:
     saved = NEXO_HOME / "config" / "claude-cli-path"
     if saved.exists():
@@ -245,6 +443,27 @@ def _resolve_runtime_model_and_effort(
     return requested_model, requested_effort
+def _backend_is_available(backend: str) -> bool:
+    if backend == CLIENT_CLAUDE_CODE:
+        return bool(_resolve_claude_cli())
+    if backend == CLIENT_CODEX:
+        return bool(_resolve_codex_cli())
+    return False
+def _resolve_available_backend(selected_backend: str, *, preferences: dict | None = None) -> str:
+    if _backend_is_available(selected_backend):
+        return selected_backend
+    prefs = preferences or load_client_preferences()
+    preferred = resolve_automation_backend(preferences=prefs)
+    for candidate in (preferred, CLIENT_CLAUDE_CODE, CLIENT_CODEX):
+        if candidate == selected_backend or candidate == BACKEND_NONE:
+            continue
+        if _backend_is_available(candidate):
+            return candidate
+    return selected_backend
 def _build_codex_prompt(
     prompt: str,
     *,
@@ -252,7 +471,18 @@ def _build_codex_prompt(
     append_system_prompt: str = "",
     allowed_tools: str = "",
 ) -> str:
+    protocol_contract = (
+        "NEXO PROTOCOL (MANDATORY):\n"
+        "- Before non-trivial analyze/edit/execute/delegate work, call `nexo_task_open(...)`. "
+        "If that tool is unavailable, call `nexo_guard_check(...)` and `nexo_cortex_check(...)` first.\n"
+        "- For long multi-step or cross-session work, call `nexo_workflow_open(...)` and keep it updated with "
+        "`nexo_workflow_update(...)` so resume/replay use durable state instead of guesswork.\n"
+        "- If a target file has conditioned learnings or blocking guard rules, review them before any read/edit/delete step, and acknowledge guard before any edit/delete step.\n"
+        "- Do not claim done without explicit verification evidence. Close with `nexo_task_close(...)`; if unavailable, capture the change log and state the evidence explicitly.\n"
+        "- When a correction changes the canonical rule, capture or supersede the learning instead of leaving contradictory active rules behind."
+    )
     instructions: list[str] = []
+    instructions.append(protocol_contract)
     if append_system_prompt:
         instructions.append(f"SYSTEM INSTRUCTIONS:\n{append_system_prompt}")
     if output_format and output_format.lower() == "text":
@@ -273,6 +503,7 @@ def run_automation_prompt(
     prompt: str,
     *,
     backend: str | None = None,
+    task_profile: str = "",
     cwd: str | os.PathLike[str] | None = None,
     env: dict | None = None,
     model: str = "",
@@ -288,15 +519,26 @@ def run_automation_prompt(
     if selected_backend == BACKEND_NONE:
         raise AutomationBackendUnavailableError("Automation backend is disabled in config.")
+    if task_profile:
+        profile = resolve_automation_task_profile(task_profile, preferences=prefs)
+        selected_backend = profile["backend"] or selected_backend
+        if not model:
+            model = profile["model"]
+        if not reasoning_effort:
+            reasoning_effort = profile["reasoning_effort"]
+    selected_backend = _resolve_available_backend(selected_backend, preferences=prefs)
     cwd_path = Path(cwd).expanduser().resolve() if cwd else Path.cwd()
     run_env = _headless_env(env)
     extra_args = list(extra_args or [])
+    requested_output_format = output_format or "text"
     resolved_model, resolved_effort = _resolve_runtime_model_and_effort(
         selected_backend,
         model=model,
         reasoning_effort=reasoning_effort,
         preferences=prefs,
     )
+    started_at = time.perf_counter()
     if selected_backend == CLIENT_CLAUDE_CODE:
         claude_bin = _resolve_claude_cli()
@@ -309,14 +551,13 @@ def run_automation_prompt(
             cmd.extend(["--model", resolved_model])
         if resolved_effort:
             cmd.extend(["--effort", resolved_effort])
-        if output_format:
-            cmd.extend(["--output-format", output_format])
+        cmd.extend(["--output-format", "json"])
         if append_system_prompt:
             cmd.extend(["--append-system-prompt", append_system_prompt])
         if allowed_tools:
             cmd.extend(["--allowedTools", allowed_tools])
         cmd.extend(extra_args)
-        return subprocess.run(
+        result = subprocess.run(
             cmd,
             cwd=str(cwd_path),
             capture_output=True,
@@ -324,6 +565,31 @@ def run_automation_prompt(
             timeout=timeout,
             env=run_env,
         )
+        final_stdout, telemetry = _extract_claude_telemetry(
+            result.stdout or "",
+            requested_output_format=requested_output_format,
+        )
+        recorded, record_error = _record_automation_run(
+            backend=selected_backend,
+            task_profile=task_profile,
+            model=resolved_model,
+            reasoning_effort=resolved_effort,
+            cwd=cwd_path,
+            output_format=requested_output_format,
+            prompt=prompt,
+            returncode=result.returncode,
+            duration_ms=int((time.perf_counter() - started_at) * 1000),
+            telemetry=telemetry,
+        )
+        stderr = result.stderr or ""
+        if not recorded:
+            stderr = _append_stderr(stderr, record_error)
+        return subprocess.CompletedProcess(
+            cmd,
+            result.returncode,
+            final_stdout,
+            stderr,
+        )
     if selected_backend == CLIENT_CODEX:
         codex_bin = _resolve_codex_cli()
@@ -339,6 +605,7 @@ def run_automation_prompt(
                 "--skip-git-repo-check",
                 "--dangerously-bypass-approvals-and-sandbox",
                 "--ephemeral",
+                "--json",
                 "-C",
                 str(cwd_path),
                 "-o",
@@ -368,12 +635,33 @@ def run_automation_prompt(
                 timeout=timeout,
                 env=run_env,
             )
-            stdout = output_path.read_text() if output_path.exists() else (result.stdout or "")
+            raw_stdout = result.stdout or ""
+            stdout = output_path.read_text() if output_path.exists() else raw_stdout
+            final_stdout, telemetry = _extract_codex_telemetry(
+                raw_stdout,
+                final_stdout=stdout,
+                model=resolved_model,
+            )
+            recorded, record_error = _record_automation_run(
+                backend=selected_backend,
+                task_profile=task_profile,
+                model=resolved_model,
+                reasoning_effort=resolved_effort,
+                cwd=cwd_path,
+                output_format=requested_output_format,
+                prompt=prompt,
+                returncode=result.returncode,
+                duration_ms=int((time.perf_counter() - started_at) * 1000),
+                telemetry=telemetry,
+            )
+            stderr = result.stderr or ""
+            if not recorded:
+                stderr = _append_stderr(stderr, record_error)
             return subprocess.CompletedProcess(
                 cmd,
                 result.returncode,
-                stdout,
-                result.stderr,
+                final_stdout,
+                stderr,
             )
     raise AutomationBackendUnavailableError(f"Unsupported automation backend: {selected_backend}")