nexo-brain 2.7.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +66 -12
  3. package/hooks/hooks.json +79 -0
  4. package/package.json +1 -1
  5. package/src/agent_runner.py +290 -6
  6. package/src/cli.py +111 -0
  7. package/src/client_preferences.py +94 -0
  8. package/src/client_sync.py +202 -2
  9. package/src/cognitive/__init__.py +1 -1
  10. package/src/cognitive/_search.py +39 -19
  11. package/src/dashboard/app.py +140 -0
  12. package/src/dashboard/templates/base.html +4 -0
  13. package/src/dashboard/templates/protocol.html +199 -0
  14. package/src/db/__init__.py +23 -1
  15. package/src/db/_learnings.py +31 -4
  16. package/src/db/_personal_scripts.py +12 -0
  17. package/src/db/_protocol.py +303 -0
  18. package/src/db/_schema.py +248 -0
  19. package/src/db/_watchers.py +173 -0
  20. package/src/db/_workflow.py +952 -0
  21. package/src/doctor/providers/runtime.py +918 -7
  22. package/src/evolution_cycle.py +62 -0
  23. package/src/hook_guardrails.py +308 -0
  24. package/src/hooks/protocol-guardrail.sh +10 -0
  25. package/src/nexo_sdk.py +103 -0
  26. package/src/plugins/cognitive_memory.py +18 -0
  27. package/src/plugins/cortex.py +55 -35
  28. package/src/plugins/guard.py +132 -16
  29. package/src/plugins/protocol.py +911 -0
  30. package/src/plugins/schedule.py +40 -6
  31. package/src/plugins/simple_api.py +103 -0
  32. package/src/plugins/skills.py +67 -0
  33. package/src/plugins/state_watchers.py +79 -0
  34. package/src/plugins/workflow.py +588 -0
  35. package/src/public_contribution.py +86 -12
  36. package/src/script_registry.py +142 -0
  37. package/src/scripts/deep-sleep/apply_findings.py +204 -0
  38. package/src/scripts/deep-sleep/collect.py +49 -4
  39. package/src/scripts/nexo-agent-run.py +2 -0
  40. package/src/scripts/nexo-daily-self-audit.py +843 -5
  41. package/src/scripts/nexo-evolution-run.py +343 -1
  42. package/src/server.py +92 -6
  43. package/src/skills_runtime.py +151 -0
  44. package/src/state_watchers_runtime.py +334 -0
  45. package/src/tools_learnings.py +345 -7
  46. package/src/tools_sessions.py +183 -0
  47. package/templates/CLAUDE.md.template +9 -1
  48. package/templates/CODEX.AGENTS.md.template +10 -2
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "2.7.0",
3
+ "version": "3.0.0",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![GitHub stars](https://img.shields.io/github/stars/wazionapps/nexo?style=social)](https://github.com/wazionapps/nexo/stargazers)
7
7
  [![License: AGPL-3.0](https://img.shields.io/badge/License-AGPL--3.0-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
8
8
 
9
- > Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
9
+ > Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, durable workflow runs, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
10
10
 
11
11
  **NEXO Brain transforms any MCP-compatible AI agent from a stateless assistant into a cognitive partner that remembers, learns, forgets, adapts, and builds a relationship with you over time.**
12
12
 
@@ -18,6 +18,16 @@
18
18
 
19
19
  [Watch the overview on YouTube](https://www.youtube.com/watch?v=IBs7zh7ZMG0) · [Watch the full deep-dive](https://www.youtube.com/watch?v=bKAfowyyy5M)
20
20
 
21
+ Start here:
22
+ - [5-minute quickstart](docs/quickstart-5-minutes.md)
23
+ - [Architecture visuals](docs/architecture-visuals.md)
24
+ - [Memory classes](docs/memory-classes.md)
25
+ - [Session portability](docs/session-portability.md)
26
+ - [Python SDK](docs/sdk-python.md)
27
+ - [Reference verticals](docs/reference-verticals.md)
28
+ - [Measured compare scorecard](compare/README.md)
29
+ - [Public contribution guide](docs/public-contribution.md)
30
+
21
31
  Every time you close a session, everything is lost. Your agent doesn't remember yesterday's decisions, repeats the same mistakes, and starts from zero. NEXO Brain fixes this with a cognitive architecture modeled after how human memory actually works.
22
32
 
23
33
  ## Shared Brain Across Clients
@@ -38,15 +48,32 @@ That means NEXO now manages not only the shared runtime and MCP wiring, but also
38
48
  - For Codex specifically, `nexo chat` and Codex headless automation inject the current bootstrap explicitly, so Codex starts as NEXO even when plain global Codex startup is inconsistent about global instructions.
39
49
  - Deep Sleep now reads both Claude Code and Codex transcript stores, so overnight analysis still works even when the user spends the day in Codex.
40
50
 
41
- Versions `2.6.14` through `2.6.21` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first Deep Sleep engineering loop.
42
-
43
- Version `2.7.0` closes the next operational gap:
44
-
45
- - Weekly/monthly Deep Sleep summaries now include protocol compliance, engineering-loop output, project pulse, and trend-vs-previous-period data.
46
- - Runtime doctor now audits both weekly protocol compliance and release-artifact sync drift instead of leaving those checks implicit.
47
- - The repo now ships `scripts/verify_release_readiness.py`, and tagged publish runs it automatically so release discipline is enforced in the product itself.
48
- - The dashboard now surfaces `What Matters Now`, `What Is Drifting`, and `What Is Improving` directly from the periodic Deep Sleep summaries.
49
- - The unreleased Codex launcher fixes after `v2.6.21` are now included: stronger `nexo chat` client selection, corrected launch mode handling, tracked last terminal choice, and aligned interactive flags.
51
+ Versions `2.6.14` through `2.7.0` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first measured engineering loop.
52
+
53
+ Version `3.0.0` closes the next execution gap:
54
+
55
+ - protocol discipline is now a runtime contract, not just instructions:
56
+ - `nexo_task_open`
57
+ - `nexo_task_close`
58
+ - persistent `protocol_debt`
59
+ - enforceable `Cortex` gates
60
+ - durable execution is now first-class:
61
+ - resumable workflow runs
62
+ - checkpoints
63
+ - replay
64
+ - retries
65
+ - durable goals
66
+ - conditioned learnings on critical files are now real guardrails across Claude hooks, Codex transcript audits, and headless automation prompts
67
+ - repair/correction work now routes through canonical learning capture instead of depending on the model to remember to document after the fact
68
+ - runtime truth is stricter:
69
+ - no more healthy-looking warning storms
70
+ - no more silent Deep Sleep schema drift
71
+ - keep-alive jobs report alive/degraded/duplicated honestly
72
+ - public proof is stronger:
73
+ - measured compare scorecard
74
+ - external and internal ablations
75
+ - `cost_per_solved_task`
76
+ - SDK/API/quickstart surface
50
77
 
51
78
  ### Client Capability Matrix
52
79
 
@@ -227,6 +254,20 @@ User message → Fast Path check → Simple chat? → Respond directly
227
254
 
228
255
  The Cortex was designed through a 3-way AI debate (Claude Opus 4.6 + GPT-5.4 + Gemini 3.1 Pro) and validated against 6 months of real production failures.
229
256
 
257
+ ## Durable Workflow Runtime
258
+
259
+ Memory and guardrails are not enough if long work still restarts from zero.
260
+
261
+ NEXO now ships a durable workflow runtime for multi-step and cross-session execution:
262
+
263
+ - `nexo_workflow_open` creates a persistent run with step metadata, idempotency key, priority, and shared state
264
+ - `nexo_workflow_update` records replayable checkpoints, retry metadata, approval gates, and the current actionable state
265
+ - `nexo_workflow_resume` tells the agent what to do next without guessing
266
+ - `nexo_workflow_replay` reconstructs the recent execution history honestly instead of pretending the run is still in memory
267
+ - `nexo_workflow_list` keeps active and blocked work visible so it does not disappear into reminders or prose notes
268
+
269
+ This is the bridge between "good memory" and "reliable execution": tasks can now preserve state, retries, approval gates, and next action across interruptions.
270
+
230
271
  ## Context Continuity (Auto-Compaction)
231
272
 
232
273
  NEXO Brain automatically preserves session context when Claude Code compacts conversations. Using PreCompact and PostCompact hooks:
@@ -642,6 +683,19 @@ nexo scripts list # See your personal scripts
642
683
 
643
684
  During install, NEXO now asks which interactive clients you want to connect, which one `nexo chat` should suggest first when multiple terminal clients are available, whether to enable background automation, which backend should run that automation, and which model profile each active terminal/backend should use. Shared brain stays on in every mode.
644
685
 
686
+ Public entry points for the mental model now stay intentionally small:
687
+ - `nexo_remember`
688
+ - `nexo_memory_recall`
689
+ - `nexo_consolidate`
690
+ - `nexo_run_workflow`
691
+
692
+ If you want the shell or Python wrappers instead of raw MCP tools:
693
+ - [docs/quickstart-5-minutes.md](docs/quickstart-5-minutes.md)
694
+ - [docs/memory-classes.md](docs/memory-classes.md)
695
+ - [docs/sdk-python.md](docs/sdk-python.md)
696
+ - [docs/reference-verticals.md](docs/reference-verticals.md)
697
+ - [compare/README.md](compare/README.md)
698
+
645
699
  Recommended defaults:
646
700
  - Claude Code: `Opus 4.6 with 1M context`
647
701
  - Codex: `gpt-5.4` with `xhigh` reasoning
@@ -714,7 +768,7 @@ nexo doctor --tier runtime --json # Machine-readable health report
714
768
  nexo doctor --fix # Apply deterministic repairs
715
769
  ```
716
770
 
717
- Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. See `docs/writing-scripts.md` for details.
771
+ Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. `nexo-agent-run.py` now also supports task profiles (`fast`, `balanced`, `deep`) plus safe backend fallback, so automations can prefer cheaper/faster Codex paths or deeper Claude paths without hardcoding one provider forever. See `docs/writing-scripts.md` for details.
718
772
 
719
773
  Skills v2 combine procedural guides with optional executable scripts. Personal skills live in `NEXO_HOME/skills/`, packaged core skills live in `NEXO_CODE/skills/` during development and `NEXO_HOME/skills-core/` in installed environments, and staged runtime copies live in `NEXO_HOME/skills-runtime/`. Execution is fully autonomous: Deep Sleep can evolve mature guide skills into executable drafts automatically, and runtime execution no longer waits for manual approval. See `docs/skills-v2.md` for the full model.
720
774
 
@@ -840,7 +894,7 @@ When Claude Desktop is installed, `nexo-brain`, `nexo update`, and `nexo clients
840
894
 
841
895
  ### Codex
842
896
 
843
- When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning.
897
+ When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning. Runtime Doctor also audits recent Codex sessions for NEXO startup markers and conditioned-file protocol discipline so parity drift does not hide behind the lack of native Claude-style hooks.
844
898
 
845
899
  ### OpenClaw
846
900
 
package/hooks/hooks.json CHANGED
@@ -2,10 +2,89 @@
2
2
  "hooks": {
3
3
  "SessionStart": [
4
4
  {
5
+ "matcher": "*",
5
6
  "hooks": [
6
7
  {
7
8
  "type": "command",
8
9
  "command": "diff -q \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\" >/dev/null 2>&1 || (python3 -m venv \"${CLAUDE_PLUGIN_DATA}/.venv\" 2>/dev/null; cp \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\"; \"${CLAUDE_PLUGIN_DATA}/.venv/bin/pip\" install --quiet -r \"${CLAUDE_PLUGIN_DATA}/requirements.txt\") || rm -f \"${CLAUDE_PLUGIN_DATA}/requirements.txt\""
10
+ },
11
+ {
12
+ "type": "command",
13
+ "command": "mkdir -p \"${CLAUDE_PLUGIN_DATA}/operations\" && date +%s > \"${CLAUDE_PLUGIN_DATA}/operations/.session-start-ts\"",
14
+ "timeout": 2
15
+ },
16
+ {
17
+ "type": "command",
18
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/daily-briefing-check.sh\"",
19
+ "timeout": 5
20
+ },
21
+ {
22
+ "type": "command",
23
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-start.sh\"",
24
+ "timeout": 35
25
+ }
26
+ ]
27
+ }
28
+ ],
29
+ "Stop": [
30
+ {
31
+ "matcher": "*",
32
+ "hooks": [
33
+ {
34
+ "type": "command",
35
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-stop.sh\"",
36
+ "timeout": 10
37
+ }
38
+ ]
39
+ }
40
+ ],
41
+ "PostToolUse": [
42
+ {
43
+ "matcher": "*",
44
+ "hooks": [
45
+ {
46
+ "type": "command",
47
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-tool-logs.sh\"",
48
+ "timeout": 5
49
+ },
50
+ {
51
+ "type": "command",
52
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-session.sh\"",
53
+ "timeout": 3
54
+ },
55
+ {
56
+ "type": "command",
57
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/inbox-hook.sh\"",
58
+ "timeout": 5
59
+ },
60
+ {
61
+ "type": "command",
62
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/protocol-guardrail.sh\"",
63
+ "timeout": 5
64
+ }
65
+ ]
66
+ }
67
+ ],
68
+ "PreCompact": [
69
+ {
70
+ "matcher": "*",
71
+ "hooks": [
72
+ {
73
+ "type": "command",
74
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/pre-compact.sh\"",
75
+ "timeout": 10
76
+ }
77
+ ]
78
+ }
79
+ ],
80
+ "PostCompact": [
81
+ {
82
+ "matcher": "*",
83
+ "hooks": [
84
+ {
85
+ "type": "command",
86
+ "command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/post-compact.sh\"",
87
+ "timeout": 10
9
88
  }
10
89
  ]
11
90
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "2.7.0",
3
+ "version": "3.0.0",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -8,6 +8,7 @@ import shlex
8
8
  import shutil
9
9
  import subprocess
10
10
  import tempfile
11
+ import time
11
12
  import tomllib
12
13
  from pathlib import Path
13
14
 
@@ -18,6 +19,7 @@ from client_preferences import (
18
19
  TERMINAL_CLIENT_KEYS,
19
20
  load_client_preferences,
20
21
  resolve_automation_backend,
22
+ resolve_automation_task_profile,
21
23
  resolve_client_runtime_profile,
22
24
  resolve_terminal_client,
23
25
  )
@@ -25,6 +27,12 @@ from client_preferences import (
25
27
 
26
28
  NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
27
29
  CLAUDE_LEGACY_MODEL_HINTS = {"opus", "sonnet"}
30
+ MODEL_PRICING_USD_PER_1M = {
31
+ # Pricing snapshot used only when the backend does not return explicit cost.
32
+ # Codex model names map to the current GPT-5 family pricing.
33
+ "gpt-5.4": {"input": 1.25, "cached_input": 0.125, "output": 10.0},
34
+ "gpt-5.4-mini": {"input": 0.25, "cached_input": 0.025, "output": 2.0},
35
+ }
28
36
 
29
37
 
30
38
  class AgentRunnerError(RuntimeError):
@@ -39,6 +47,192 @@ class AutomationBackendUnavailableError(AgentRunnerError):
39
47
  """Raised when the configured automation backend is unavailable."""
40
48
 
41
49
 
50
+ def _canonical_pricing_model(model: str) -> str:
51
+ lowered = str(model or "").strip().lower()
52
+ lowered = lowered.split("[", 1)[0]
53
+ aliases = {
54
+ "gpt-5": "gpt-5.4",
55
+ "gpt-5.4": "gpt-5.4",
56
+ "gpt-5-mini": "gpt-5.4-mini",
57
+ "gpt-5.4-mini": "gpt-5.4-mini",
58
+ }
59
+ return aliases.get(lowered, lowered)
60
+
61
+
62
+ def _estimate_openai_cost_usd(model: str, *, input_tokens: int, cached_input_tokens: int, output_tokens: int) -> tuple[float | None, str]:
63
+ pricing = MODEL_PRICING_USD_PER_1M.get(_canonical_pricing_model(model))
64
+ if not pricing:
65
+ return None, "pricing_unavailable"
66
+ total = 0.0
67
+ total += (max(0, int(input_tokens or 0)) / 1_000_000.0) * pricing["input"]
68
+ total += (max(0, int(cached_input_tokens or 0)) / 1_000_000.0) * pricing["cached_input"]
69
+ total += (max(0, int(output_tokens or 0)) / 1_000_000.0) * pricing["output"]
70
+ return round(total, 6), "pricing_snapshot"
71
+
72
+
73
+ def _safe_json_loads(raw: str) -> dict | list | None:
74
+ try:
75
+ return json.loads(raw)
76
+ except Exception:
77
+ return None
78
+
79
+
80
+ def _extract_claude_telemetry(raw_stdout: str, *, requested_output_format: str) -> tuple[str, dict]:
81
+ payload = _safe_json_loads(raw_stdout) if str(raw_stdout or "").strip().startswith("{") else None
82
+ if not isinstance(payload, dict):
83
+ return raw_stdout or "", {
84
+ "telemetry_source": "missing",
85
+ "cost_source": "missing",
86
+ "usage": {},
87
+ "warnings": ["backend did not return parseable JSON telemetry"],
88
+ }
89
+
90
+ result_payload = payload.get("result", "")
91
+ if requested_output_format and requested_output_format.lower() == "json" and not isinstance(result_payload, str):
92
+ final_stdout = json.dumps(result_payload, ensure_ascii=False)
93
+ else:
94
+ final_stdout = result_payload if isinstance(result_payload, str) else json.dumps(result_payload, ensure_ascii=False)
95
+
96
+ usage = payload.get("usage") or {}
97
+ model_usage = payload.get("modelUsage") or {}
98
+ explicit_cost = payload.get("total_cost_usd")
99
+ if explicit_cost is None and isinstance(model_usage, dict):
100
+ explicit_cost = sum(
101
+ float((item or {}).get("costUSD") or 0.0)
102
+ for item in model_usage.values()
103
+ if isinstance(item, dict)
104
+ )
105
+
106
+ return final_stdout, {
107
+ "telemetry_source": "claude_json",
108
+ "cost_source": "backend",
109
+ "usage": {
110
+ "input_tokens": int(usage.get("input_tokens") or 0),
111
+ "cached_input_tokens": int(usage.get("cache_read_input_tokens") or 0),
112
+ "output_tokens": int(usage.get("output_tokens") or 0),
113
+ },
114
+ "total_cost_usd": float(explicit_cost) if explicit_cost is not None else None,
115
+ "raw": payload,
116
+ "warnings": [],
117
+ }
118
+
119
+
120
+ def _extract_codex_telemetry(stream_stdout: str, *, final_stdout: str, model: str) -> tuple[str, dict]:
121
+ usage_payload: dict = {}
122
+ raw_events: list[dict] = []
123
+ for line in str(stream_stdout or "").splitlines():
124
+ line = line.strip()
125
+ if not line.startswith("{"):
126
+ continue
127
+ payload = _safe_json_loads(line)
128
+ if not isinstance(payload, dict):
129
+ continue
130
+ raw_events.append(payload)
131
+ if payload.get("type") == "turn.completed" and isinstance(payload.get("usage"), dict):
132
+ usage_payload = payload["usage"]
133
+
134
+ usage = {
135
+ "input_tokens": int(usage_payload.get("input_tokens") or 0),
136
+ "cached_input_tokens": int(usage_payload.get("cached_input_tokens") or 0),
137
+ "output_tokens": int(usage_payload.get("output_tokens") or 0),
138
+ }
139
+ total_cost_usd = usage_payload.get("total_cost_usd")
140
+ cost_source = "backend" if total_cost_usd is not None else "missing"
141
+ warnings: list[str] = []
142
+ if total_cost_usd is None:
143
+ estimated_cost, estimated_source = _estimate_openai_cost_usd(
144
+ model,
145
+ input_tokens=usage["input_tokens"],
146
+ cached_input_tokens=usage["cached_input_tokens"],
147
+ output_tokens=usage["output_tokens"],
148
+ )
149
+ total_cost_usd = estimated_cost
150
+ cost_source = estimated_source
151
+ if estimated_cost is None:
152
+ warnings.append(f"no pricing snapshot available for model `{model}`")
153
+
154
+ if not usage_payload:
155
+ warnings.append("backend did not return usage telemetry")
156
+
157
+ return final_stdout, {
158
+ "telemetry_source": "codex_jsonl",
159
+ "cost_source": cost_source,
160
+ "usage": usage,
161
+ "total_cost_usd": float(total_cost_usd) if total_cost_usd is not None else None,
162
+ "raw": raw_events[-8:],
163
+ "warnings": warnings,
164
+ }
165
+
166
+
167
+ def _append_stderr(stderr: str, message: str) -> str:
168
+ bits = [part for part in [str(stderr or "").rstrip(), str(message or "").strip()] if part]
169
+ if not bits:
170
+ return ""
171
+ return "\n".join(bits) + "\n"
172
+
173
+
174
+ def _record_automation_run(
175
+ *,
176
+ backend: str,
177
+ task_profile: str,
178
+ model: str,
179
+ reasoning_effort: str,
180
+ cwd: Path,
181
+ output_format: str,
182
+ prompt: str,
183
+ returncode: int,
184
+ duration_ms: int,
185
+ telemetry: dict,
186
+ ) -> tuple[bool, str]:
187
+ try:
188
+ from db._core import get_db
189
+ except Exception as exc:
190
+ return False, f"automation telemetry unavailable: {exc}"
191
+
192
+ try:
193
+ conn = get_db()
194
+ usage = telemetry.get("usage") or {}
195
+ conn.execute(
196
+ """
197
+ INSERT INTO automation_runs (
198
+ backend, task_profile, model, reasoning_effort, cwd, output_format,
199
+ prompt_chars, returncode, duration_ms,
200
+ input_tokens, cached_input_tokens, output_tokens,
201
+ total_cost_usd, telemetry_source, cost_source, status, metadata
202
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
203
+ """,
204
+ (
205
+ backend,
206
+ task_profile or "default",
207
+ model,
208
+ reasoning_effort,
209
+ str(cwd),
210
+ output_format or "text",
211
+ len(prompt or ""),
212
+ int(returncode),
213
+ int(duration_ms),
214
+ int(usage.get("input_tokens") or 0),
215
+ int(usage.get("cached_input_tokens") or 0),
216
+ int(usage.get("output_tokens") or 0),
217
+ telemetry.get("total_cost_usd"),
218
+ telemetry.get("telemetry_source", ""),
219
+ telemetry.get("cost_source", ""),
220
+ "ok" if int(returncode) == 0 else "failed",
221
+ json.dumps(
222
+ {
223
+ "warnings": telemetry.get("warnings") or [],
224
+ "raw": telemetry.get("raw") or {},
225
+ },
226
+ ensure_ascii=False,
227
+ ),
228
+ ),
229
+ )
230
+ conn.commit()
231
+ return True, ""
232
+ except Exception as exc:
233
+ return False, f"automation telemetry unavailable: {exc}"
234
+
235
+
42
236
  def _resolve_claude_cli() -> str:
43
237
  saved = NEXO_HOME / "config" / "claude-cli-path"
44
238
  if saved.exists():
@@ -245,6 +439,27 @@ def _resolve_runtime_model_and_effort(
245
439
  return requested_model, requested_effort
246
440
 
247
441
 
442
+ def _backend_is_available(backend: str) -> bool:
443
+ if backend == CLIENT_CLAUDE_CODE:
444
+ return bool(_resolve_claude_cli())
445
+ if backend == CLIENT_CODEX:
446
+ return bool(_resolve_codex_cli())
447
+ return False
448
+
449
+
450
+ def _resolve_available_backend(selected_backend: str, *, preferences: dict | None = None) -> str:
451
+ if _backend_is_available(selected_backend):
452
+ return selected_backend
453
+ prefs = preferences or load_client_preferences()
454
+ preferred = resolve_automation_backend(preferences=prefs)
455
+ for candidate in (preferred, CLIENT_CLAUDE_CODE, CLIENT_CODEX):
456
+ if candidate == selected_backend or candidate == BACKEND_NONE:
457
+ continue
458
+ if _backend_is_available(candidate):
459
+ return candidate
460
+ return selected_backend
461
+
462
+
248
463
  def _build_codex_prompt(
249
464
  prompt: str,
250
465
  *,
@@ -252,7 +467,18 @@ def _build_codex_prompt(
252
467
  append_system_prompt: str = "",
253
468
  allowed_tools: str = "",
254
469
  ) -> str:
470
+ protocol_contract = (
471
+ "NEXO PROTOCOL (MANDATORY):\n"
472
+ "- Before non-trivial analyze/edit/execute/delegate work, call `nexo_task_open(...)`. "
473
+ "If that tool is unavailable, call `nexo_guard_check(...)` and `nexo_cortex_check(...)` first.\n"
474
+ "- For long multi-step or cross-session work, call `nexo_workflow_open(...)` and keep it updated with "
475
+ "`nexo_workflow_update(...)` so resume/replay use durable state instead of guesswork.\n"
476
+ "- If a target file has conditioned learnings or blocking guard rules, review them before any read/edit/delete step, and acknowledge guard before any edit/delete step.\n"
477
+ "- Do not claim done without explicit verification evidence. Close with `nexo_task_close(...)`; if unavailable, capture the change log and state the evidence explicitly.\n"
478
+ "- When a correction changes the canonical rule, capture or supersede the learning instead of leaving contradictory active rules behind."
479
+ )
255
480
  instructions: list[str] = []
481
+ instructions.append(protocol_contract)
256
482
  if append_system_prompt:
257
483
  instructions.append(f"SYSTEM INSTRUCTIONS:\n{append_system_prompt}")
258
484
  if output_format and output_format.lower() == "text":
@@ -273,6 +499,7 @@ def run_automation_prompt(
273
499
  prompt: str,
274
500
  *,
275
501
  backend: str | None = None,
502
+ task_profile: str = "",
276
503
  cwd: str | os.PathLike[str] | None = None,
277
504
  env: dict | None = None,
278
505
  model: str = "",
@@ -288,15 +515,26 @@ def run_automation_prompt(
288
515
  if selected_backend == BACKEND_NONE:
289
516
  raise AutomationBackendUnavailableError("Automation backend is disabled in config.")
290
517
 
518
+ if task_profile:
519
+ profile = resolve_automation_task_profile(task_profile, preferences=prefs)
520
+ selected_backend = profile["backend"] or selected_backend
521
+ if not model:
522
+ model = profile["model"]
523
+ if not reasoning_effort:
524
+ reasoning_effort = profile["reasoning_effort"]
525
+ selected_backend = _resolve_available_backend(selected_backend, preferences=prefs)
526
+
291
527
  cwd_path = Path(cwd).expanduser().resolve() if cwd else Path.cwd()
292
528
  run_env = _headless_env(env)
293
529
  extra_args = list(extra_args or [])
530
+ requested_output_format = output_format or "text"
294
531
  resolved_model, resolved_effort = _resolve_runtime_model_and_effort(
295
532
  selected_backend,
296
533
  model=model,
297
534
  reasoning_effort=reasoning_effort,
298
535
  preferences=prefs,
299
536
  )
537
+ started_at = time.perf_counter()
300
538
 
301
539
  if selected_backend == CLIENT_CLAUDE_CODE:
302
540
  claude_bin = _resolve_claude_cli()
@@ -309,14 +547,13 @@ def run_automation_prompt(
309
547
  cmd.extend(["--model", resolved_model])
310
548
  if resolved_effort:
311
549
  cmd.extend(["--effort", resolved_effort])
312
- if output_format:
313
- cmd.extend(["--output-format", output_format])
550
+ cmd.extend(["--output-format", "json"])
314
551
  if append_system_prompt:
315
552
  cmd.extend(["--append-system-prompt", append_system_prompt])
316
553
  if allowed_tools:
317
554
  cmd.extend(["--allowedTools", allowed_tools])
318
555
  cmd.extend(extra_args)
319
- return subprocess.run(
556
+ result = subprocess.run(
320
557
  cmd,
321
558
  cwd=str(cwd_path),
322
559
  capture_output=True,
@@ -324,6 +561,31 @@ def run_automation_prompt(
324
561
  timeout=timeout,
325
562
  env=run_env,
326
563
  )
564
+ final_stdout, telemetry = _extract_claude_telemetry(
565
+ result.stdout or "",
566
+ requested_output_format=requested_output_format,
567
+ )
568
+ recorded, record_error = _record_automation_run(
569
+ backend=selected_backend,
570
+ task_profile=task_profile,
571
+ model=resolved_model,
572
+ reasoning_effort=resolved_effort,
573
+ cwd=cwd_path,
574
+ output_format=requested_output_format,
575
+ prompt=prompt,
576
+ returncode=result.returncode,
577
+ duration_ms=int((time.perf_counter() - started_at) * 1000),
578
+ telemetry=telemetry,
579
+ )
580
+ stderr = result.stderr or ""
581
+ if not recorded:
582
+ stderr = _append_stderr(stderr, record_error)
583
+ return subprocess.CompletedProcess(
584
+ cmd,
585
+ result.returncode,
586
+ final_stdout,
587
+ stderr,
588
+ )
327
589
 
328
590
  if selected_backend == CLIENT_CODEX:
329
591
  codex_bin = _resolve_codex_cli()
@@ -339,6 +601,7 @@ def run_automation_prompt(
339
601
  "--skip-git-repo-check",
340
602
  "--dangerously-bypass-approvals-and-sandbox",
341
603
  "--ephemeral",
604
+ "--json",
342
605
  "-C",
343
606
  str(cwd_path),
344
607
  "-o",
@@ -368,12 +631,33 @@ def run_automation_prompt(
368
631
  timeout=timeout,
369
632
  env=run_env,
370
633
  )
371
- stdout = output_path.read_text() if output_path.exists() else (result.stdout or "")
634
+ raw_stdout = result.stdout or ""
635
+ stdout = output_path.read_text() if output_path.exists() else raw_stdout
636
+ final_stdout, telemetry = _extract_codex_telemetry(
637
+ raw_stdout,
638
+ final_stdout=stdout,
639
+ model=resolved_model,
640
+ )
641
+ recorded, record_error = _record_automation_run(
642
+ backend=selected_backend,
643
+ task_profile=task_profile,
644
+ model=resolved_model,
645
+ reasoning_effort=resolved_effort,
646
+ cwd=cwd_path,
647
+ output_format=requested_output_format,
648
+ prompt=prompt,
649
+ returncode=result.returncode,
650
+ duration_ms=int((time.perf_counter() - started_at) * 1000),
651
+ telemetry=telemetry,
652
+ )
653
+ stderr = result.stderr or ""
654
+ if not recorded:
655
+ stderr = _append_stderr(stderr, record_error)
372
656
  return subprocess.CompletedProcess(
373
657
  cmd,
374
658
  result.returncode,
375
- stdout,
376
- result.stderr,
659
+ final_stdout,
660
+ stderr,
377
661
  )
378
662
 
379
663
  raise AutomationBackendUnavailableError(f"Unsupported automation backend: {selected_backend}")