nexo-brain 2.7.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +66 -12
  3. package/hooks/hooks.json +79 -0
  4. package/package.json +1 -1
  5. package/src/agent_runner.py +295 -7
  6. package/src/cli.py +111 -0
  7. package/src/client_preferences.py +99 -1
  8. package/src/client_sync.py +207 -3
  9. package/src/cognitive/__init__.py +1 -1
  10. package/src/cognitive/_search.py +39 -19
  11. package/src/dashboard/app.py +141 -1
  12. package/src/dashboard/templates/base.html +4 -0
  13. package/src/dashboard/templates/protocol.html +199 -0
  14. package/src/db/__init__.py +23 -1
  15. package/src/db/_learnings.py +31 -4
  16. package/src/db/_personal_scripts.py +12 -0
  17. package/src/db/_protocol.py +303 -0
  18. package/src/db/_schema.py +248 -0
  19. package/src/db/_watchers.py +173 -0
  20. package/src/db/_workflow.py +952 -0
  21. package/src/doctor/providers/boot.py +45 -19
  22. package/src/doctor/providers/runtime.py +923 -8
  23. package/src/evolution_cycle.py +62 -0
  24. package/src/hook_guardrails.py +308 -0
  25. package/src/hooks/protocol-guardrail.sh +10 -0
  26. package/src/nexo_sdk.py +103 -0
  27. package/src/plugins/cognitive_memory.py +18 -0
  28. package/src/plugins/cortex.py +55 -35
  29. package/src/plugins/guard.py +132 -16
  30. package/src/plugins/protocol.py +911 -0
  31. package/src/plugins/schedule.py +40 -6
  32. package/src/plugins/simple_api.py +103 -0
  33. package/src/plugins/skills.py +67 -0
  34. package/src/plugins/state_watchers.py +79 -0
  35. package/src/plugins/workflow.py +588 -0
  36. package/src/public_contribution.py +86 -12
  37. package/src/requirements.txt +1 -0
  38. package/src/script_registry.py +142 -0
  39. package/src/scripts/deep-sleep/apply_findings.py +204 -0
  40. package/src/scripts/deep-sleep/collect.py +49 -4
  41. package/src/scripts/nexo-agent-run.py +2 -0
  42. package/src/scripts/nexo-daily-self-audit.py +843 -5
  43. package/src/scripts/nexo-evolution-run.py +343 -1
  44. package/src/server.py +92 -6
  45. package/src/skills_runtime.py +151 -0
  46. package/src/state_watchers_runtime.py +334 -0
  47. package/src/tools_learnings.py +345 -7
  48. package/src/tools_sessions.py +183 -0
  49. package/templates/CLAUDE.md.template +9 -1
  50. package/templates/CODEX.AGENTS.md.template +10 -2
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "2.7.0",
3
+ "version": "3.0.1",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![GitHub stars](https://img.shields.io/github/stars/wazionapps/nexo?style=social)](https://github.com/wazionapps/nexo/stargazers)
7
7
  [![License: AGPL-3.0](https://img.shields.io/badge/License-AGPL--3.0-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
8
8
 
9
- > Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
9
+ > Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, durable workflow runs, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
10
10
 
11
11
  **NEXO Brain transforms any MCP-compatible AI agent from a stateless assistant into a cognitive partner that remembers, learns, forgets, adapts, and builds a relationship with you over time.**
12
12
 
@@ -18,6 +18,16 @@
18
18
 
19
19
  [Watch the overview on YouTube](https://www.youtube.com/watch?v=IBs7zh7ZMG0) · [Watch the full deep-dive](https://www.youtube.com/watch?v=bKAfowyyy5M)
20
20
 
21
+ Start here:
22
+ - [5-minute quickstart](docs/quickstart-5-minutes.md)
23
+ - [Architecture visuals](docs/architecture-visuals.md)
24
+ - [Memory classes](docs/memory-classes.md)
25
+ - [Session portability](docs/session-portability.md)
26
+ - [Python SDK](docs/sdk-python.md)
27
+ - [Reference verticals](docs/reference-verticals.md)
28
+ - [Measured compare scorecard](compare/README.md)
29
+ - [Public contribution guide](docs/public-contribution.md)
30
+
21
31
  Every time you close a session, everything is lost. Your agent doesn't remember yesterday's decisions, repeats the same mistakes, and starts from zero. NEXO Brain fixes this with a cognitive architecture modeled after how human memory actually works.
22
32
 
23
33
  ## Shared Brain Across Clients
@@ -38,15 +48,32 @@ That means NEXO now manages not only the shared runtime and MCP wiring, but also
38
48
  - For Codex specifically, `nexo chat` and Codex headless automation inject the current bootstrap explicitly, so Codex starts as NEXO even when plain global Codex startup is inconsistent about global instructions.
39
49
  - Deep Sleep now reads both Claude Code and Codex transcript stores, so overnight analysis still works even when the user spends the day in Codex.
40
50
 
41
- Versions `2.6.14` through `2.6.21` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first Deep Sleep engineering loop.
42
-
43
- Version `2.7.0` closes the next operational gap:
44
-
45
- - Weekly/monthly Deep Sleep summaries now include protocol compliance, engineering-loop output, project pulse, and trend-vs-previous-period data.
46
- - Runtime doctor now audits both weekly protocol compliance and release-artifact sync drift instead of leaving those checks implicit.
47
- - The repo now ships `scripts/verify_release_readiness.py`, and tagged publish runs it automatically so release discipline is enforced in the product itself.
48
- - The dashboard now surfaces `What Matters Now`, `What Is Drifting`, and `What Is Improving` directly from the periodic Deep Sleep summaries.
49
- - The unreleased Codex launcher fixes after `v2.6.21` are now included: stronger `nexo chat` client selection, corrected launch mode handling, tracked last terminal choice, and aligned interactive flags.
51
+ Versions `2.6.14` through `2.7.0` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first measured engineering loop.
52
+
53
+ Versions `3.0.0` and `3.0.1` close the next execution gap:
54
+
55
+ - protocol discipline is now a runtime contract, not just instructions:
56
+ - `nexo_task_open`
57
+ - `nexo_task_close`
58
+ - persistent `protocol_debt`
59
+ - enforceable `Cortex` gates
60
+ - durable execution is now first-class:
61
+ - resumable workflow runs
62
+ - checkpoints
63
+ - replay
64
+ - retries
65
+ - durable goals
66
+ - conditioned learnings on critical files are now real guardrails across Claude hooks, Codex transcript audits, and headless automation prompts
67
+ - repair/correction work now routes through canonical learning capture instead of depending on the model to remember to document after the fact
68
+ - runtime truth is stricter:
69
+ - no more healthy-looking warning storms
70
+ - no more silent Deep Sleep schema drift
71
+ - keep-alive jobs report alive/degraded/duplicated honestly
72
+ - public proof is stronger:
73
+ - measured compare scorecard
74
+ - external and internal ablations
75
+ - `cost_per_solved_task`
76
+ - SDK/API/quickstart surface
50
77
 
51
78
  ### Client Capability Matrix
52
79
 
@@ -227,6 +254,20 @@ User message → Fast Path check → Simple chat? → Respond directly
227
254
 
228
255
  The Cortex was designed through a 3-way AI debate (Claude Opus 4.6 + GPT-5.4 + Gemini 3.1 Pro) and validated against 6 months of real production failures.
229
256
 
257
+ ## Durable Workflow Runtime
258
+
259
+ Memory and guardrails are not enough if long work still restarts from zero.
260
+
261
+ NEXO now ships a durable workflow runtime for multi-step and cross-session execution:
262
+
263
+ - `nexo_workflow_open` creates a persistent run with step metadata, idempotency key, priority, and shared state
264
+ - `nexo_workflow_update` records replayable checkpoints, retry metadata, approval gates, and the current actionable state
265
+ - `nexo_workflow_resume` tells the agent what to do next without guessing
266
+ - `nexo_workflow_replay` reconstructs the recent execution history honestly instead of pretending the run is still in memory
267
+ - `nexo_workflow_list` keeps active and blocked work visible so it does not disappear into reminders or prose notes
268
+
269
+ This is the bridge between "good memory" and "reliable execution": tasks can now preserve state, retries, approval gates, and next action across interruptions.
270
+
230
271
  ## Context Continuity (Auto-Compaction)
231
272
 
232
273
  NEXO Brain automatically preserves session context when Claude Code compacts conversations. Using PreCompact and PostCompact hooks:
@@ -642,6 +683,19 @@ nexo scripts list # See your personal scripts
642
683
 
643
684
  During install, NEXO now asks which interactive clients you want to connect, which one `nexo chat` should suggest first when multiple terminal clients are available, whether to enable background automation, which backend should run that automation, and which model profile each active terminal/backend should use. Shared brain stays on in every mode.
644
685
 
686
+ Public entry points for the mental model now stay intentionally small:
687
+ - `nexo_remember`
688
+ - `nexo_memory_recall`
689
+ - `nexo_consolidate`
690
+ - `nexo_run_workflow`
691
+
692
+ If you want the shell or Python wrappers instead of raw MCP tools:
693
+ - [docs/quickstart-5-minutes.md](docs/quickstart-5-minutes.md)
694
+ - [docs/memory-classes.md](docs/memory-classes.md)
695
+ - [docs/sdk-python.md](docs/sdk-python.md)
696
+ - [docs/reference-verticals.md](docs/reference-verticals.md)
697
+ - [compare/README.md](compare/README.md)
698
+
645
699
  Recommended defaults:
646
700
  - Claude Code: `Opus 4.6 with 1M context`
647
701
  - Codex: `gpt-5.4` with `xhigh` reasoning
@@ -714,7 +768,7 @@ nexo doctor --tier runtime --json # Machine-readable health report
714
768
  nexo doctor --fix # Apply deterministic repairs
715
769
  ```
716
770
 
717
- Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. See `docs/writing-scripts.md` for details.
771
+ Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. `nexo-agent-run.py` now also supports task profiles (`fast`, `balanced`, `deep`) plus safe backend fallback, so automations can prefer cheaper/faster Codex paths or deeper Claude paths without hardcoding one provider forever. See `docs/writing-scripts.md` for details.
718
772
 
719
773
  Skills v2 combine procedural guides with optional executable scripts. Personal skills live in `NEXO_HOME/skills/`, packaged core skills live in `NEXO_CODE/skills/` during development and `NEXO_HOME/skills-core/` in installed environments, and staged runtime copies live in `NEXO_HOME/skills-runtime/`. Execution is fully autonomous: Deep Sleep can evolve mature guide skills into executable drafts automatically, and runtime execution no longer waits for manual approval. See `docs/skills-v2.md` for the full model.
720
774
 
@@ -840,7 +894,7 @@ When Claude Desktop is installed, `nexo-brain`, `nexo update`, and `nexo clients
840
894
 
841
895
  ### Codex
842
896
 
843
- When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning.
897
+ When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning. Runtime Doctor also audits recent Codex sessions for NEXO startup markers and conditioned-file protocol discipline so parity drift does not hide behind the lack of native Claude-style hooks.
844
898
 
845
899
  ### OpenClaw
846
900
 
package/hooks/hooks.json CHANGED
@@ -2,10 +2,89 @@
2
2
  "hooks": {
3
3
  "SessionStart": [
4
4
  {
5
+ "matcher": "*",
5
6
  "hooks": [
6
7
  {
7
8
  "type": "command",
8
9
  "command": "diff -q \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\" >/dev/null 2>&1 || (python3 -m venv \"${CLAUDE_PLUGIN_DATA}/.venv\" 2>/dev/null; cp \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\"; \"${CLAUDE_PLUGIN_DATA}/.venv/bin/pip\" install --quiet -r \"${CLAUDE_PLUGIN_DATA}/requirements.txt\") || rm -f \"${CLAUDE_PLUGIN_DATA}/requirements.txt\""
10
+ },
11
+ {
12
+ "type": "command",
13
+ "command": "mkdir -p \"${CLAUDE_PLUGIN_DATA}/operations\" && date +%s > \"${CLAUDE_PLUGIN_DATA}/operations/.session-start-ts\"",
14
+ "timeout": 2
15
+ },
16
+ {
17
+ "type": "command",
18
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/daily-briefing-check.sh\"",
19
+ "timeout": 5
20
+ },
21
+ {
22
+ "type": "command",
23
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-start.sh\"",
24
+ "timeout": 35
25
+ }
26
+ ]
27
+ }
28
+ ],
29
+ "Stop": [
30
+ {
31
+ "matcher": "*",
32
+ "hooks": [
33
+ {
34
+ "type": "command",
35
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-stop.sh\"",
36
+ "timeout": 10
37
+ }
38
+ ]
39
+ }
40
+ ],
41
+ "PostToolUse": [
42
+ {
43
+ "matcher": "*",
44
+ "hooks": [
45
+ {
46
+ "type": "command",
47
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-tool-logs.sh\"",
48
+ "timeout": 5
49
+ },
50
+ {
51
+ "type": "command",
52
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-session.sh\"",
53
+ "timeout": 3
54
+ },
55
+ {
56
+ "type": "command",
57
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/inbox-hook.sh\"",
58
+ "timeout": 5
59
+ },
60
+ {
61
+ "type": "command",
62
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/protocol-guardrail.sh\"",
63
+ "timeout": 5
64
+ }
65
+ ]
66
+ }
67
+ ],
68
+ "PreCompact": [
69
+ {
70
+ "matcher": "*",
71
+ "hooks": [
72
+ {
73
+ "type": "command",
74
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/pre-compact.sh\"",
75
+ "timeout": 10
76
+ }
77
+ ]
78
+ }
79
+ ],
80
+ "PostCompact": [
81
+ {
82
+ "matcher": "*",
83
+ "hooks": [
84
+ {
85
+ "type": "command",
86
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/post-compact.sh\"",
87
+ "timeout": 10
9
88
  }
10
89
  ]
11
90
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "2.7.0",
3
+ "version": "3.0.1",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -8,9 +8,14 @@ import shlex
8
8
  import shutil
9
9
  import subprocess
10
10
  import tempfile
11
- import tomllib
11
+ import time
12
12
  from pathlib import Path
13
13
 
14
+ try:
15
+ import tomllib
16
+ except ModuleNotFoundError: # Python < 3.11
17
+ import tomli as tomllib
18
+
14
19
  from client_preferences import (
15
20
  BACKEND_NONE,
16
21
  CLIENT_CLAUDE_CODE,
@@ -18,6 +23,7 @@ from client_preferences import (
18
23
  TERMINAL_CLIENT_KEYS,
19
24
  load_client_preferences,
20
25
  resolve_automation_backend,
26
+ resolve_automation_task_profile,
21
27
  resolve_client_runtime_profile,
22
28
  resolve_terminal_client,
23
29
  )
@@ -25,6 +31,12 @@ from client_preferences import (
25
31
 
26
32
  NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
27
33
  CLAUDE_LEGACY_MODEL_HINTS = {"opus", "sonnet"}
34
+ MODEL_PRICING_USD_PER_1M = {
35
+ # Pricing snapshot used only when the backend does not return explicit cost.
36
+ # Codex model names map to the current GPT-5 family pricing.
37
+ "gpt-5.4": {"input": 1.25, "cached_input": 0.125, "output": 10.0},
38
+ "gpt-5.4-mini": {"input": 0.25, "cached_input": 0.025, "output": 2.0},
39
+ }
28
40
 
29
41
 
30
42
  class AgentRunnerError(RuntimeError):
@@ -39,6 +51,192 @@ class AutomationBackendUnavailableError(AgentRunnerError):
39
51
  """Raised when the configured automation backend is unavailable."""
40
52
 
41
53
 
54
+ def _canonical_pricing_model(model: str) -> str:
55
+ lowered = str(model or "").strip().lower()
56
+ lowered = lowered.split("[", 1)[0]
57
+ aliases = {
58
+ "gpt-5": "gpt-5.4",
59
+ "gpt-5.4": "gpt-5.4",
60
+ "gpt-5-mini": "gpt-5.4-mini",
61
+ "gpt-5.4-mini": "gpt-5.4-mini",
62
+ }
63
+ return aliases.get(lowered, lowered)
64
+
65
+
66
+ def _estimate_openai_cost_usd(model: str, *, input_tokens: int, cached_input_tokens: int, output_tokens: int) -> tuple[float | None, str]:
67
+ pricing = MODEL_PRICING_USD_PER_1M.get(_canonical_pricing_model(model))
68
+ if not pricing:
69
+ return None, "pricing_unavailable"
70
+ total = 0.0
71
+ total += (max(0, int(input_tokens or 0)) / 1_000_000.0) * pricing["input"]
72
+ total += (max(0, int(cached_input_tokens or 0)) / 1_000_000.0) * pricing["cached_input"]
73
+ total += (max(0, int(output_tokens or 0)) / 1_000_000.0) * pricing["output"]
74
+ return round(total, 6), "pricing_snapshot"
75
+
76
+
77
+ def _safe_json_loads(raw: str) -> dict | list | None:
78
+ try:
79
+ return json.loads(raw)
80
+ except Exception:
81
+ return None
82
+
83
+
84
+ def _extract_claude_telemetry(raw_stdout: str, *, requested_output_format: str) -> tuple[str, dict]:
85
+ payload = _safe_json_loads(raw_stdout) if str(raw_stdout or "").strip().startswith("{") else None
86
+ if not isinstance(payload, dict):
87
+ return raw_stdout or "", {
88
+ "telemetry_source": "missing",
89
+ "cost_source": "missing",
90
+ "usage": {},
91
+ "warnings": ["backend did not return parseable JSON telemetry"],
92
+ }
93
+
94
+ result_payload = payload.get("result", "")
95
+ if requested_output_format and requested_output_format.lower() == "json" and not isinstance(result_payload, str):
96
+ final_stdout = json.dumps(result_payload, ensure_ascii=False)
97
+ else:
98
+ final_stdout = result_payload if isinstance(result_payload, str) else json.dumps(result_payload, ensure_ascii=False)
99
+
100
+ usage = payload.get("usage") or {}
101
+ model_usage = payload.get("modelUsage") or {}
102
+ explicit_cost = payload.get("total_cost_usd")
103
+ if explicit_cost is None and isinstance(model_usage, dict):
104
+ explicit_cost = sum(
105
+ float((item or {}).get("costUSD") or 0.0)
106
+ for item in model_usage.values()
107
+ if isinstance(item, dict)
108
+ )
109
+
110
+ return final_stdout, {
111
+ "telemetry_source": "claude_json",
112
+ "cost_source": "backend",
113
+ "usage": {
114
+ "input_tokens": int(usage.get("input_tokens") or 0),
115
+ "cached_input_tokens": int(usage.get("cache_read_input_tokens") or 0),
116
+ "output_tokens": int(usage.get("output_tokens") or 0),
117
+ },
118
+ "total_cost_usd": float(explicit_cost) if explicit_cost is not None else None,
119
+ "raw": payload,
120
+ "warnings": [],
121
+ }
122
+
123
+
124
+ def _extract_codex_telemetry(stream_stdout: str, *, final_stdout: str, model: str) -> tuple[str, dict]:
125
+ usage_payload: dict = {}
126
+ raw_events: list[dict] = []
127
+ for line in str(stream_stdout or "").splitlines():
128
+ line = line.strip()
129
+ if not line.startswith("{"):
130
+ continue
131
+ payload = _safe_json_loads(line)
132
+ if not isinstance(payload, dict):
133
+ continue
134
+ raw_events.append(payload)
135
+ if payload.get("type") == "turn.completed" and isinstance(payload.get("usage"), dict):
136
+ usage_payload = payload["usage"]
137
+
138
+ usage = {
139
+ "input_tokens": int(usage_payload.get("input_tokens") or 0),
140
+ "cached_input_tokens": int(usage_payload.get("cached_input_tokens") or 0),
141
+ "output_tokens": int(usage_payload.get("output_tokens") or 0),
142
+ }
143
+ total_cost_usd = usage_payload.get("total_cost_usd")
144
+ cost_source = "backend" if total_cost_usd is not None else "missing"
145
+ warnings: list[str] = []
146
+ if total_cost_usd is None:
147
+ estimated_cost, estimated_source = _estimate_openai_cost_usd(
148
+ model,
149
+ input_tokens=usage["input_tokens"],
150
+ cached_input_tokens=usage["cached_input_tokens"],
151
+ output_tokens=usage["output_tokens"],
152
+ )
153
+ total_cost_usd = estimated_cost
154
+ cost_source = estimated_source
155
+ if estimated_cost is None:
156
+ warnings.append(f"no pricing snapshot available for model `{model}`")
157
+
158
+ if not usage_payload:
159
+ warnings.append("backend did not return usage telemetry")
160
+
161
+ return final_stdout, {
162
+ "telemetry_source": "codex_jsonl",
163
+ "cost_source": cost_source,
164
+ "usage": usage,
165
+ "total_cost_usd": float(total_cost_usd) if total_cost_usd is not None else None,
166
+ "raw": raw_events[-8:],
167
+ "warnings": warnings,
168
+ }
169
+
170
+
171
+ def _append_stderr(stderr: str, message: str) -> str:
172
+ bits = [part for part in [str(stderr or "").rstrip(), str(message or "").strip()] if part]
173
+ if not bits:
174
+ return ""
175
+ return "\n".join(bits) + "\n"
176
+
177
+
178
+ def _record_automation_run(
179
+ *,
180
+ backend: str,
181
+ task_profile: str,
182
+ model: str,
183
+ reasoning_effort: str,
184
+ cwd: Path,
185
+ output_format: str,
186
+ prompt: str,
187
+ returncode: int,
188
+ duration_ms: int,
189
+ telemetry: dict,
190
+ ) -> tuple[bool, str]:
191
+ try:
192
+ from db._core import get_db
193
+ except Exception as exc:
194
+ return False, f"automation telemetry unavailable: {exc}"
195
+
196
+ try:
197
+ conn = get_db()
198
+ usage = telemetry.get("usage") or {}
199
+ conn.execute(
200
+ """
201
+ INSERT INTO automation_runs (
202
+ backend, task_profile, model, reasoning_effort, cwd, output_format,
203
+ prompt_chars, returncode, duration_ms,
204
+ input_tokens, cached_input_tokens, output_tokens,
205
+ total_cost_usd, telemetry_source, cost_source, status, metadata
206
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
207
+ """,
208
+ (
209
+ backend,
210
+ task_profile or "default",
211
+ model,
212
+ reasoning_effort,
213
+ str(cwd),
214
+ output_format or "text",
215
+ len(prompt or ""),
216
+ int(returncode),
217
+ int(duration_ms),
218
+ int(usage.get("input_tokens") or 0),
219
+ int(usage.get("cached_input_tokens") or 0),
220
+ int(usage.get("output_tokens") or 0),
221
+ telemetry.get("total_cost_usd"),
222
+ telemetry.get("telemetry_source", ""),
223
+ telemetry.get("cost_source", ""),
224
+ "ok" if int(returncode) == 0 else "failed",
225
+ json.dumps(
226
+ {
227
+ "warnings": telemetry.get("warnings") or [],
228
+ "raw": telemetry.get("raw") or {},
229
+ },
230
+ ensure_ascii=False,
231
+ ),
232
+ ),
233
+ )
234
+ conn.commit()
235
+ return True, ""
236
+ except Exception as exc:
237
+ return False, f"automation telemetry unavailable: {exc}"
238
+
239
+
42
240
  def _resolve_claude_cli() -> str:
43
241
  saved = NEXO_HOME / "config" / "claude-cli-path"
44
242
  if saved.exists():
@@ -245,6 +443,27 @@ def _resolve_runtime_model_and_effort(
245
443
  return requested_model, requested_effort
246
444
 
247
445
 
446
+ def _backend_is_available(backend: str) -> bool:
447
+ if backend == CLIENT_CLAUDE_CODE:
448
+ return bool(_resolve_claude_cli())
449
+ if backend == CLIENT_CODEX:
450
+ return bool(_resolve_codex_cli())
451
+ return False
452
+
453
+
454
+ def _resolve_available_backend(selected_backend: str, *, preferences: dict | None = None) -> str:
455
+ if _backend_is_available(selected_backend):
456
+ return selected_backend
457
+ prefs = preferences or load_client_preferences()
458
+ preferred = resolve_automation_backend(preferences=prefs)
459
+ for candidate in (preferred, CLIENT_CLAUDE_CODE, CLIENT_CODEX):
460
+ if candidate == selected_backend or candidate == BACKEND_NONE:
461
+ continue
462
+ if _backend_is_available(candidate):
463
+ return candidate
464
+ return selected_backend
465
+
466
+
248
467
  def _build_codex_prompt(
249
468
  prompt: str,
250
469
  *,
@@ -252,7 +471,18 @@ def _build_codex_prompt(
252
471
  append_system_prompt: str = "",
253
472
  allowed_tools: str = "",
254
473
  ) -> str:
474
+ protocol_contract = (
475
+ "NEXO PROTOCOL (MANDATORY):\n"
476
+ "- Before non-trivial analyze/edit/execute/delegate work, call `nexo_task_open(...)`. "
477
+ "If that tool is unavailable, call `nexo_guard_check(...)` and `nexo_cortex_check(...)` first.\n"
478
+ "- For long multi-step or cross-session work, call `nexo_workflow_open(...)` and keep it updated with "
479
+ "`nexo_workflow_update(...)` so resume/replay use durable state instead of guesswork.\n"
480
+ "- If a target file has conditioned learnings or blocking guard rules, review them before any read/edit/delete step, and acknowledge guard before any edit/delete step.\n"
481
+ "- Do not claim done without explicit verification evidence. Close with `nexo_task_close(...)`; if unavailable, capture the change log and state the evidence explicitly.\n"
482
+ "- When a correction changes the canonical rule, capture or supersede the learning instead of leaving contradictory active rules behind."
483
+ )
255
484
  instructions: list[str] = []
485
+ instructions.append(protocol_contract)
256
486
  if append_system_prompt:
257
487
  instructions.append(f"SYSTEM INSTRUCTIONS:\n{append_system_prompt}")
258
488
  if output_format and output_format.lower() == "text":
@@ -273,6 +503,7 @@ def run_automation_prompt(
273
503
  prompt: str,
274
504
  *,
275
505
  backend: str | None = None,
506
+ task_profile: str = "",
276
507
  cwd: str | os.PathLike[str] | None = None,
277
508
  env: dict | None = None,
278
509
  model: str = "",
@@ -288,15 +519,26 @@ def run_automation_prompt(
288
519
  if selected_backend == BACKEND_NONE:
289
520
  raise AutomationBackendUnavailableError("Automation backend is disabled in config.")
290
521
 
522
+ if task_profile:
523
+ profile = resolve_automation_task_profile(task_profile, preferences=prefs)
524
+ selected_backend = profile["backend"] or selected_backend
525
+ if not model:
526
+ model = profile["model"]
527
+ if not reasoning_effort:
528
+ reasoning_effort = profile["reasoning_effort"]
529
+ selected_backend = _resolve_available_backend(selected_backend, preferences=prefs)
530
+
291
531
  cwd_path = Path(cwd).expanduser().resolve() if cwd else Path.cwd()
292
532
  run_env = _headless_env(env)
293
533
  extra_args = list(extra_args or [])
534
+ requested_output_format = output_format or "text"
294
535
  resolved_model, resolved_effort = _resolve_runtime_model_and_effort(
295
536
  selected_backend,
296
537
  model=model,
297
538
  reasoning_effort=reasoning_effort,
298
539
  preferences=prefs,
299
540
  )
541
+ started_at = time.perf_counter()
300
542
 
301
543
  if selected_backend == CLIENT_CLAUDE_CODE:
302
544
  claude_bin = _resolve_claude_cli()
@@ -309,14 +551,13 @@ def run_automation_prompt(
309
551
  cmd.extend(["--model", resolved_model])
310
552
  if resolved_effort:
311
553
  cmd.extend(["--effort", resolved_effort])
312
- if output_format:
313
- cmd.extend(["--output-format", output_format])
554
+ cmd.extend(["--output-format", "json"])
314
555
  if append_system_prompt:
315
556
  cmd.extend(["--append-system-prompt", append_system_prompt])
316
557
  if allowed_tools:
317
558
  cmd.extend(["--allowedTools", allowed_tools])
318
559
  cmd.extend(extra_args)
319
- return subprocess.run(
560
+ result = subprocess.run(
320
561
  cmd,
321
562
  cwd=str(cwd_path),
322
563
  capture_output=True,
@@ -324,6 +565,31 @@ def run_automation_prompt(
324
565
  timeout=timeout,
325
566
  env=run_env,
326
567
  )
568
+ final_stdout, telemetry = _extract_claude_telemetry(
569
+ result.stdout or "",
570
+ requested_output_format=requested_output_format,
571
+ )
572
+ recorded, record_error = _record_automation_run(
573
+ backend=selected_backend,
574
+ task_profile=task_profile,
575
+ model=resolved_model,
576
+ reasoning_effort=resolved_effort,
577
+ cwd=cwd_path,
578
+ output_format=requested_output_format,
579
+ prompt=prompt,
580
+ returncode=result.returncode,
581
+ duration_ms=int((time.perf_counter() - started_at) * 1000),
582
+ telemetry=telemetry,
583
+ )
584
+ stderr = result.stderr or ""
585
+ if not recorded:
586
+ stderr = _append_stderr(stderr, record_error)
587
+ return subprocess.CompletedProcess(
588
+ cmd,
589
+ result.returncode,
590
+ final_stdout,
591
+ stderr,
592
+ )
327
593
 
328
594
  if selected_backend == CLIENT_CODEX:
329
595
  codex_bin = _resolve_codex_cli()
@@ -339,6 +605,7 @@ def run_automation_prompt(
339
605
  "--skip-git-repo-check",
340
606
  "--dangerously-bypass-approvals-and-sandbox",
341
607
  "--ephemeral",
608
+ "--json",
342
609
  "-C",
343
610
  str(cwd_path),
344
611
  "-o",
@@ -368,12 +635,33 @@ def run_automation_prompt(
368
635
  timeout=timeout,
369
636
  env=run_env,
370
637
  )
371
- stdout = output_path.read_text() if output_path.exists() else (result.stdout or "")
638
+ raw_stdout = result.stdout or ""
639
+ stdout = output_path.read_text() if output_path.exists() else raw_stdout
640
+ final_stdout, telemetry = _extract_codex_telemetry(
641
+ raw_stdout,
642
+ final_stdout=stdout,
643
+ model=resolved_model,
644
+ )
645
+ recorded, record_error = _record_automation_run(
646
+ backend=selected_backend,
647
+ task_profile=task_profile,
648
+ model=resolved_model,
649
+ reasoning_effort=resolved_effort,
650
+ cwd=cwd_path,
651
+ output_format=requested_output_format,
652
+ prompt=prompt,
653
+ returncode=result.returncode,
654
+ duration_ms=int((time.perf_counter() - started_at) * 1000),
655
+ telemetry=telemetry,
656
+ )
657
+ stderr = result.stderr or ""
658
+ if not recorded:
659
+ stderr = _append_stderr(stderr, record_error)
372
660
  return subprocess.CompletedProcess(
373
661
  cmd,
374
662
  result.returncode,
375
- stdout,
376
- result.stderr,
663
+ final_stdout,
664
+ stderr,
377
665
  )
378
666
 
379
667
  raise AutomationBackendUnavailableError(f"Unsupported automation backend: {selected_backend}")