nexo-brain 2.7.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +66 -12
- package/hooks/hooks.json +79 -0
- package/package.json +1 -1
- package/src/agent_runner.py +290 -6
- package/src/cli.py +111 -0
- package/src/client_preferences.py +94 -0
- package/src/client_sync.py +202 -2
- package/src/cognitive/__init__.py +1 -1
- package/src/cognitive/_search.py +39 -19
- package/src/dashboard/app.py +140 -0
- package/src/dashboard/templates/base.html +4 -0
- package/src/dashboard/templates/protocol.html +199 -0
- package/src/db/__init__.py +23 -1
- package/src/db/_learnings.py +31 -4
- package/src/db/_personal_scripts.py +12 -0
- package/src/db/_protocol.py +303 -0
- package/src/db/_schema.py +248 -0
- package/src/db/_watchers.py +173 -0
- package/src/db/_workflow.py +952 -0
- package/src/doctor/providers/runtime.py +918 -7
- package/src/evolution_cycle.py +62 -0
- package/src/hook_guardrails.py +308 -0
- package/src/hooks/protocol-guardrail.sh +10 -0
- package/src/nexo_sdk.py +103 -0
- package/src/plugins/cognitive_memory.py +18 -0
- package/src/plugins/cortex.py +55 -35
- package/src/plugins/guard.py +132 -16
- package/src/plugins/protocol.py +911 -0
- package/src/plugins/schedule.py +40 -6
- package/src/plugins/simple_api.py +103 -0
- package/src/plugins/skills.py +67 -0
- package/src/plugins/state_watchers.py +79 -0
- package/src/plugins/workflow.py +588 -0
- package/src/public_contribution.py +86 -12
- package/src/script_registry.py +142 -0
- package/src/scripts/deep-sleep/apply_findings.py +204 -0
- package/src/scripts/deep-sleep/collect.py +49 -4
- package/src/scripts/nexo-agent-run.py +2 -0
- package/src/scripts/nexo-daily-self-audit.py +843 -5
- package/src/scripts/nexo-evolution-run.py +343 -1
- package/src/server.py +92 -6
- package/src/skills_runtime.py +151 -0
- package/src/state_watchers_runtime.py +334 -0
- package/src/tools_learnings.py +345 -7
- package/src/tools_sessions.py +183 -0
- package/templates/CLAUDE.md.template +9 -1
- package/templates/CODEX.AGENTS.md.template +10 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
[](https://github.com/wazionapps/nexo/stargazers)
|
|
7
7
|
[](https://www.gnu.org/licenses/agpl-3.0)
|
|
8
8
|
|
|
9
|
-
> Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
|
|
9
|
+
> Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, durable workflow runs, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
|
|
10
10
|
|
|
11
11
|
**NEXO Brain transforms any MCP-compatible AI agent from a stateless assistant into a cognitive partner that remembers, learns, forgets, adapts, and builds a relationship with you over time.**
|
|
12
12
|
|
|
@@ -18,6 +18,16 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview on YouTube](https://www.youtube.com/watch?v=IBs7zh7ZMG0) · [Watch the full deep-dive](https://www.youtube.com/watch?v=bKAfowyyy5M)
|
|
20
20
|
|
|
21
|
+
Start here:
|
|
22
|
+
- [5-minute quickstart](docs/quickstart-5-minutes.md)
|
|
23
|
+
- [Architecture visuals](docs/architecture-visuals.md)
|
|
24
|
+
- [Memory classes](docs/memory-classes.md)
|
|
25
|
+
- [Session portability](docs/session-portability.md)
|
|
26
|
+
- [Python SDK](docs/sdk-python.md)
|
|
27
|
+
- [Reference verticals](docs/reference-verticals.md)
|
|
28
|
+
- [Measured compare scorecard](compare/README.md)
|
|
29
|
+
- [Public contribution guide](docs/public-contribution.md)
|
|
30
|
+
|
|
21
31
|
Every time you close a session, everything is lost. Your agent doesn't remember yesterday's decisions, repeats the same mistakes, and starts from zero. NEXO Brain fixes this with a cognitive architecture modeled after how human memory actually works.
|
|
22
32
|
|
|
23
33
|
## Shared Brain Across Clients
|
|
@@ -38,15 +48,32 @@ That means NEXO now manages not only the shared runtime and MCP wiring, but also
|
|
|
38
48
|
- For Codex specifically, `nexo chat` and Codex headless automation inject the current bootstrap explicitly, so Codex starts as NEXO even when plain global Codex startup is inconsistent about global instructions.
|
|
39
49
|
- Deep Sleep now reads both Claude Code and Codex transcript stores, so overnight analysis still works even when the user spends the day in Codex.
|
|
40
50
|
|
|
41
|
-
Versions `2.6.14` through `2.
|
|
42
|
-
|
|
43
|
-
Version `
|
|
44
|
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
49
|
-
-
|
|
51
|
+
Versions `2.6.14` through `2.7.0` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first measured engineering loop.
|
|
52
|
+
|
|
53
|
+
Version `3.0.0` closes the next execution gap:
|
|
54
|
+
|
|
55
|
+
- protocol discipline is now a runtime contract, not just instructions:
|
|
56
|
+
- `nexo_task_open`
|
|
57
|
+
- `nexo_task_close`
|
|
58
|
+
- persistent `protocol_debt`
|
|
59
|
+
- enforceable `Cortex` gates
|
|
60
|
+
- durable execution is now first-class:
|
|
61
|
+
- resumable workflow runs
|
|
62
|
+
- checkpoints
|
|
63
|
+
- replay
|
|
64
|
+
- retries
|
|
65
|
+
- durable goals
|
|
66
|
+
- conditioned learnings on critical files are now real guardrails across Claude hooks, Codex transcript audits, and headless automation prompts
|
|
67
|
+
- repair/correction work now routes through canonical learning capture instead of depending on the model to remember to document after the fact
|
|
68
|
+
- runtime truth is stricter:
|
|
69
|
+
- no more healthy-looking warning storms
|
|
70
|
+
- no more silent Deep Sleep schema drift
|
|
71
|
+
- keep-alive jobs report alive/degraded/duplicated honestly
|
|
72
|
+
- public proof is stronger:
|
|
73
|
+
- measured compare scorecard
|
|
74
|
+
- external and internal ablations
|
|
75
|
+
- `cost_per_solved_task`
|
|
76
|
+
- SDK/API/quickstart surface
|
|
50
77
|
|
|
51
78
|
### Client Capability Matrix
|
|
52
79
|
|
|
@@ -227,6 +254,20 @@ User message → Fast Path check → Simple chat? → Respond directly
|
|
|
227
254
|
|
|
228
255
|
The Cortex was designed through a 3-way AI debate (Claude Opus 4.6 + GPT-5.4 + Gemini 3.1 Pro) and validated against 6 months of real production failures.
|
|
229
256
|
|
|
257
|
+
## Durable Workflow Runtime
|
|
258
|
+
|
|
259
|
+
Memory and guardrails are not enough if long work still restarts from zero.
|
|
260
|
+
|
|
261
|
+
NEXO now ships a durable workflow runtime for multi-step and cross-session execution:
|
|
262
|
+
|
|
263
|
+
- `nexo_workflow_open` creates a persistent run with step metadata, idempotency key, priority, and shared state
|
|
264
|
+
- `nexo_workflow_update` records replayable checkpoints, retry metadata, approval gates, and the current actionable state
|
|
265
|
+
- `nexo_workflow_resume` tells the agent what to do next without guessing
|
|
266
|
+
- `nexo_workflow_replay` reconstructs the recent execution history honestly instead of pretending the run is still in memory
|
|
267
|
+
- `nexo_workflow_list` keeps active and blocked work visible so it does not disappear into reminders or prose notes
|
|
268
|
+
|
|
269
|
+
This is the bridge between "good memory" and "reliable execution": tasks can now preserve state, retries, approval gates, and next action across interruptions.
|
|
270
|
+
|
|
230
271
|
## Context Continuity (Auto-Compaction)
|
|
231
272
|
|
|
232
273
|
NEXO Brain automatically preserves session context when Claude Code compacts conversations. Using PreCompact and PostCompact hooks:
|
|
@@ -642,6 +683,19 @@ nexo scripts list # See your personal scripts
|
|
|
642
683
|
|
|
643
684
|
During install, NEXO now asks which interactive clients you want to connect, which one `nexo chat` should suggest first when multiple terminal clients are available, whether to enable background automation, which backend should run that automation, and which model profile each active terminal/backend should use. Shared brain stays on in every mode.
|
|
644
685
|
|
|
686
|
+
Public entry points for the mental model now stay intentionally small:
|
|
687
|
+
- `nexo_remember`
|
|
688
|
+
- `nexo_memory_recall`
|
|
689
|
+
- `nexo_consolidate`
|
|
690
|
+
- `nexo_run_workflow`
|
|
691
|
+
|
|
692
|
+
If you want the shell or Python wrappers instead of raw MCP tools:
|
|
693
|
+
- [docs/quickstart-5-minutes.md](docs/quickstart-5-minutes.md)
|
|
694
|
+
- [docs/memory-classes.md](docs/memory-classes.md)
|
|
695
|
+
- [docs/sdk-python.md](docs/sdk-python.md)
|
|
696
|
+
- [docs/reference-verticals.md](docs/reference-verticals.md)
|
|
697
|
+
- [compare/README.md](compare/README.md)
|
|
698
|
+
|
|
645
699
|
Recommended defaults:
|
|
646
700
|
- Claude Code: `Opus 4.6 with 1M context`
|
|
647
701
|
- Codex: `gpt-5.4` with `xhigh` reasoning
|
|
@@ -714,7 +768,7 @@ nexo doctor --tier runtime --json # Machine-readable health report
|
|
|
714
768
|
nexo doctor --fix # Apply deterministic repairs
|
|
715
769
|
```
|
|
716
770
|
|
|
717
|
-
Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. See `docs/writing-scripts.md` for details.
|
|
771
|
+
Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. `nexo-agent-run.py` now also supports task profiles (`fast`, `balanced`, `deep`) plus safe backend fallback, so automations can prefer cheaper/faster Codex paths or deeper Claude paths without hardcoding one provider forever. See `docs/writing-scripts.md` for details.
|
|
718
772
|
|
|
719
773
|
Skills v2 combine procedural guides with optional executable scripts. Personal skills live in `NEXO_HOME/skills/`, packaged core skills live in `NEXO_CODE/skills/` during development and `NEXO_HOME/skills-core/` in installed environments, and staged runtime copies live in `NEXO_HOME/skills-runtime/`. Execution is fully autonomous: Deep Sleep can evolve mature guide skills into executable drafts automatically, and runtime execution no longer waits for manual approval. See `docs/skills-v2.md` for the full model.
|
|
720
774
|
|
|
@@ -840,7 +894,7 @@ When Claude Desktop is installed, `nexo-brain`, `nexo update`, and `nexo clients
|
|
|
840
894
|
|
|
841
895
|
### Codex
|
|
842
896
|
|
|
843
|
-
When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning.
|
|
897
|
+
When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning. Runtime Doctor also audits recent Codex sessions for NEXO startup markers and conditioned-file protocol discipline so parity drift does not hide behind the lack of native Claude-style hooks.
|
|
844
898
|
|
|
845
899
|
### OpenClaw
|
|
846
900
|
|
package/hooks/hooks.json
CHANGED
|
@@ -2,10 +2,89 @@
|
|
|
2
2
|
"hooks": {
|
|
3
3
|
"SessionStart": [
|
|
4
4
|
{
|
|
5
|
+
"matcher": "*",
|
|
5
6
|
"hooks": [
|
|
6
7
|
{
|
|
7
8
|
"type": "command",
|
|
8
9
|
"command": "diff -q \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\" >/dev/null 2>&1 || (python3 -m venv \"${CLAUDE_PLUGIN_DATA}/.venv\" 2>/dev/null; cp \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\"; \"${CLAUDE_PLUGIN_DATA}/.venv/bin/pip\" install --quiet -r \"${CLAUDE_PLUGIN_DATA}/requirements.txt\") || rm -f \"${CLAUDE_PLUGIN_DATA}/requirements.txt\""
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"type": "command",
|
|
13
|
+
"command": "mkdir -p \"${CLAUDE_PLUGIN_DATA}/operations\" && date +%s > \"${CLAUDE_PLUGIN_DATA}/operations/.session-start-ts\"",
|
|
14
|
+
"timeout": 2
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"type": "command",
|
|
18
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/daily-briefing-check.sh\"",
|
|
19
|
+
"timeout": 5
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"type": "command",
|
|
23
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-start.sh\"",
|
|
24
|
+
"timeout": 35
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
],
|
|
29
|
+
"Stop": [
|
|
30
|
+
{
|
|
31
|
+
"matcher": "*",
|
|
32
|
+
"hooks": [
|
|
33
|
+
{
|
|
34
|
+
"type": "command",
|
|
35
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-stop.sh\"",
|
|
36
|
+
"timeout": 10
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
],
|
|
41
|
+
"PostToolUse": [
|
|
42
|
+
{
|
|
43
|
+
"matcher": "*",
|
|
44
|
+
"hooks": [
|
|
45
|
+
{
|
|
46
|
+
"type": "command",
|
|
47
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-tool-logs.sh\"",
|
|
48
|
+
"timeout": 5
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"type": "command",
|
|
52
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-session.sh\"",
|
|
53
|
+
"timeout": 3
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"type": "command",
|
|
57
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/inbox-hook.sh\"",
|
|
58
|
+
"timeout": 5
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"type": "command",
|
|
62
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/protocol-guardrail.sh\"",
|
|
63
|
+
"timeout": 5
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|
|
67
|
+
],
|
|
68
|
+
"PreCompact": [
|
|
69
|
+
{
|
|
70
|
+
"matcher": "*",
|
|
71
|
+
"hooks": [
|
|
72
|
+
{
|
|
73
|
+
"type": "command",
|
|
74
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/pre-compact.sh\"",
|
|
75
|
+
"timeout": 10
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
],
|
|
80
|
+
"PostCompact": [
|
|
81
|
+
{
|
|
82
|
+
"matcher": "*",
|
|
83
|
+
"hooks": [
|
|
84
|
+
{
|
|
85
|
+
"type": "command",
|
|
86
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/post-compact.sh\"",
|
|
87
|
+
"timeout": 10
|
|
9
88
|
}
|
|
10
89
|
]
|
|
11
90
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/agent_runner.py
CHANGED
|
@@ -8,6 +8,7 @@ import shlex
|
|
|
8
8
|
import shutil
|
|
9
9
|
import subprocess
|
|
10
10
|
import tempfile
|
|
11
|
+
import time
|
|
11
12
|
import tomllib
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
|
|
@@ -18,6 +19,7 @@ from client_preferences import (
|
|
|
18
19
|
TERMINAL_CLIENT_KEYS,
|
|
19
20
|
load_client_preferences,
|
|
20
21
|
resolve_automation_backend,
|
|
22
|
+
resolve_automation_task_profile,
|
|
21
23
|
resolve_client_runtime_profile,
|
|
22
24
|
resolve_terminal_client,
|
|
23
25
|
)
|
|
@@ -25,6 +27,12 @@ from client_preferences import (
|
|
|
25
27
|
|
|
26
28
|
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
27
29
|
CLAUDE_LEGACY_MODEL_HINTS = {"opus", "sonnet"}
|
|
30
|
+
MODEL_PRICING_USD_PER_1M = {
|
|
31
|
+
# Pricing snapshot used only when the backend does not return explicit cost.
|
|
32
|
+
# Codex model names map to the current GPT-5 family pricing.
|
|
33
|
+
"gpt-5.4": {"input": 1.25, "cached_input": 0.125, "output": 10.0},
|
|
34
|
+
"gpt-5.4-mini": {"input": 0.25, "cached_input": 0.025, "output": 2.0},
|
|
35
|
+
}
|
|
28
36
|
|
|
29
37
|
|
|
30
38
|
class AgentRunnerError(RuntimeError):
|
|
@@ -39,6 +47,192 @@ class AutomationBackendUnavailableError(AgentRunnerError):
|
|
|
39
47
|
"""Raised when the configured automation backend is unavailable."""
|
|
40
48
|
|
|
41
49
|
|
|
50
|
+
def _canonical_pricing_model(model: str) -> str:
|
|
51
|
+
lowered = str(model or "").strip().lower()
|
|
52
|
+
lowered = lowered.split("[", 1)[0]
|
|
53
|
+
aliases = {
|
|
54
|
+
"gpt-5": "gpt-5.4",
|
|
55
|
+
"gpt-5.4": "gpt-5.4",
|
|
56
|
+
"gpt-5-mini": "gpt-5.4-mini",
|
|
57
|
+
"gpt-5.4-mini": "gpt-5.4-mini",
|
|
58
|
+
}
|
|
59
|
+
return aliases.get(lowered, lowered)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _estimate_openai_cost_usd(model: str, *, input_tokens: int, cached_input_tokens: int, output_tokens: int) -> tuple[float | None, str]:
|
|
63
|
+
pricing = MODEL_PRICING_USD_PER_1M.get(_canonical_pricing_model(model))
|
|
64
|
+
if not pricing:
|
|
65
|
+
return None, "pricing_unavailable"
|
|
66
|
+
total = 0.0
|
|
67
|
+
total += (max(0, int(input_tokens or 0)) / 1_000_000.0) * pricing["input"]
|
|
68
|
+
total += (max(0, int(cached_input_tokens or 0)) / 1_000_000.0) * pricing["cached_input"]
|
|
69
|
+
total += (max(0, int(output_tokens or 0)) / 1_000_000.0) * pricing["output"]
|
|
70
|
+
return round(total, 6), "pricing_snapshot"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _safe_json_loads(raw: str) -> dict | list | None:
|
|
74
|
+
try:
|
|
75
|
+
return json.loads(raw)
|
|
76
|
+
except Exception:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _extract_claude_telemetry(raw_stdout: str, *, requested_output_format: str) -> tuple[str, dict]:
|
|
81
|
+
payload = _safe_json_loads(raw_stdout) if str(raw_stdout or "").strip().startswith("{") else None
|
|
82
|
+
if not isinstance(payload, dict):
|
|
83
|
+
return raw_stdout or "", {
|
|
84
|
+
"telemetry_source": "missing",
|
|
85
|
+
"cost_source": "missing",
|
|
86
|
+
"usage": {},
|
|
87
|
+
"warnings": ["backend did not return parseable JSON telemetry"],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
result_payload = payload.get("result", "")
|
|
91
|
+
if requested_output_format and requested_output_format.lower() == "json" and not isinstance(result_payload, str):
|
|
92
|
+
final_stdout = json.dumps(result_payload, ensure_ascii=False)
|
|
93
|
+
else:
|
|
94
|
+
final_stdout = result_payload if isinstance(result_payload, str) else json.dumps(result_payload, ensure_ascii=False)
|
|
95
|
+
|
|
96
|
+
usage = payload.get("usage") or {}
|
|
97
|
+
model_usage = payload.get("modelUsage") or {}
|
|
98
|
+
explicit_cost = payload.get("total_cost_usd")
|
|
99
|
+
if explicit_cost is None and isinstance(model_usage, dict):
|
|
100
|
+
explicit_cost = sum(
|
|
101
|
+
float((item or {}).get("costUSD") or 0.0)
|
|
102
|
+
for item in model_usage.values()
|
|
103
|
+
if isinstance(item, dict)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return final_stdout, {
|
|
107
|
+
"telemetry_source": "claude_json",
|
|
108
|
+
"cost_source": "backend",
|
|
109
|
+
"usage": {
|
|
110
|
+
"input_tokens": int(usage.get("input_tokens") or 0),
|
|
111
|
+
"cached_input_tokens": int(usage.get("cache_read_input_tokens") or 0),
|
|
112
|
+
"output_tokens": int(usage.get("output_tokens") or 0),
|
|
113
|
+
},
|
|
114
|
+
"total_cost_usd": float(explicit_cost) if explicit_cost is not None else None,
|
|
115
|
+
"raw": payload,
|
|
116
|
+
"warnings": [],
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _extract_codex_telemetry(stream_stdout: str, *, final_stdout: str, model: str) -> tuple[str, dict]:
|
|
121
|
+
usage_payload: dict = {}
|
|
122
|
+
raw_events: list[dict] = []
|
|
123
|
+
for line in str(stream_stdout or "").splitlines():
|
|
124
|
+
line = line.strip()
|
|
125
|
+
if not line.startswith("{"):
|
|
126
|
+
continue
|
|
127
|
+
payload = _safe_json_loads(line)
|
|
128
|
+
if not isinstance(payload, dict):
|
|
129
|
+
continue
|
|
130
|
+
raw_events.append(payload)
|
|
131
|
+
if payload.get("type") == "turn.completed" and isinstance(payload.get("usage"), dict):
|
|
132
|
+
usage_payload = payload["usage"]
|
|
133
|
+
|
|
134
|
+
usage = {
|
|
135
|
+
"input_tokens": int(usage_payload.get("input_tokens") or 0),
|
|
136
|
+
"cached_input_tokens": int(usage_payload.get("cached_input_tokens") or 0),
|
|
137
|
+
"output_tokens": int(usage_payload.get("output_tokens") or 0),
|
|
138
|
+
}
|
|
139
|
+
total_cost_usd = usage_payload.get("total_cost_usd")
|
|
140
|
+
cost_source = "backend" if total_cost_usd is not None else "missing"
|
|
141
|
+
warnings: list[str] = []
|
|
142
|
+
if total_cost_usd is None:
|
|
143
|
+
estimated_cost, estimated_source = _estimate_openai_cost_usd(
|
|
144
|
+
model,
|
|
145
|
+
input_tokens=usage["input_tokens"],
|
|
146
|
+
cached_input_tokens=usage["cached_input_tokens"],
|
|
147
|
+
output_tokens=usage["output_tokens"],
|
|
148
|
+
)
|
|
149
|
+
total_cost_usd = estimated_cost
|
|
150
|
+
cost_source = estimated_source
|
|
151
|
+
if estimated_cost is None:
|
|
152
|
+
warnings.append(f"no pricing snapshot available for model `{model}`")
|
|
153
|
+
|
|
154
|
+
if not usage_payload:
|
|
155
|
+
warnings.append("backend did not return usage telemetry")
|
|
156
|
+
|
|
157
|
+
return final_stdout, {
|
|
158
|
+
"telemetry_source": "codex_jsonl",
|
|
159
|
+
"cost_source": cost_source,
|
|
160
|
+
"usage": usage,
|
|
161
|
+
"total_cost_usd": float(total_cost_usd) if total_cost_usd is not None else None,
|
|
162
|
+
"raw": raw_events[-8:],
|
|
163
|
+
"warnings": warnings,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _append_stderr(stderr: str, message: str) -> str:
|
|
168
|
+
bits = [part for part in [str(stderr or "").rstrip(), str(message or "").strip()] if part]
|
|
169
|
+
if not bits:
|
|
170
|
+
return ""
|
|
171
|
+
return "\n".join(bits) + "\n"
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _record_automation_run(
|
|
175
|
+
*,
|
|
176
|
+
backend: str,
|
|
177
|
+
task_profile: str,
|
|
178
|
+
model: str,
|
|
179
|
+
reasoning_effort: str,
|
|
180
|
+
cwd: Path,
|
|
181
|
+
output_format: str,
|
|
182
|
+
prompt: str,
|
|
183
|
+
returncode: int,
|
|
184
|
+
duration_ms: int,
|
|
185
|
+
telemetry: dict,
|
|
186
|
+
) -> tuple[bool, str]:
|
|
187
|
+
try:
|
|
188
|
+
from db._core import get_db
|
|
189
|
+
except Exception as exc:
|
|
190
|
+
return False, f"automation telemetry unavailable: {exc}"
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
conn = get_db()
|
|
194
|
+
usage = telemetry.get("usage") or {}
|
|
195
|
+
conn.execute(
|
|
196
|
+
"""
|
|
197
|
+
INSERT INTO automation_runs (
|
|
198
|
+
backend, task_profile, model, reasoning_effort, cwd, output_format,
|
|
199
|
+
prompt_chars, returncode, duration_ms,
|
|
200
|
+
input_tokens, cached_input_tokens, output_tokens,
|
|
201
|
+
total_cost_usd, telemetry_source, cost_source, status, metadata
|
|
202
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
203
|
+
""",
|
|
204
|
+
(
|
|
205
|
+
backend,
|
|
206
|
+
task_profile or "default",
|
|
207
|
+
model,
|
|
208
|
+
reasoning_effort,
|
|
209
|
+
str(cwd),
|
|
210
|
+
output_format or "text",
|
|
211
|
+
len(prompt or ""),
|
|
212
|
+
int(returncode),
|
|
213
|
+
int(duration_ms),
|
|
214
|
+
int(usage.get("input_tokens") or 0),
|
|
215
|
+
int(usage.get("cached_input_tokens") or 0),
|
|
216
|
+
int(usage.get("output_tokens") or 0),
|
|
217
|
+
telemetry.get("total_cost_usd"),
|
|
218
|
+
telemetry.get("telemetry_source", ""),
|
|
219
|
+
telemetry.get("cost_source", ""),
|
|
220
|
+
"ok" if int(returncode) == 0 else "failed",
|
|
221
|
+
json.dumps(
|
|
222
|
+
{
|
|
223
|
+
"warnings": telemetry.get("warnings") or [],
|
|
224
|
+
"raw": telemetry.get("raw") or {},
|
|
225
|
+
},
|
|
226
|
+
ensure_ascii=False,
|
|
227
|
+
),
|
|
228
|
+
),
|
|
229
|
+
)
|
|
230
|
+
conn.commit()
|
|
231
|
+
return True, ""
|
|
232
|
+
except Exception as exc:
|
|
233
|
+
return False, f"automation telemetry unavailable: {exc}"
|
|
234
|
+
|
|
235
|
+
|
|
42
236
|
def _resolve_claude_cli() -> str:
|
|
43
237
|
saved = NEXO_HOME / "config" / "claude-cli-path"
|
|
44
238
|
if saved.exists():
|
|
@@ -245,6 +439,27 @@ def _resolve_runtime_model_and_effort(
|
|
|
245
439
|
return requested_model, requested_effort
|
|
246
440
|
|
|
247
441
|
|
|
442
|
+
def _backend_is_available(backend: str) -> bool:
|
|
443
|
+
if backend == CLIENT_CLAUDE_CODE:
|
|
444
|
+
return bool(_resolve_claude_cli())
|
|
445
|
+
if backend == CLIENT_CODEX:
|
|
446
|
+
return bool(_resolve_codex_cli())
|
|
447
|
+
return False
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _resolve_available_backend(selected_backend: str, *, preferences: dict | None = None) -> str:
|
|
451
|
+
if _backend_is_available(selected_backend):
|
|
452
|
+
return selected_backend
|
|
453
|
+
prefs = preferences or load_client_preferences()
|
|
454
|
+
preferred = resolve_automation_backend(preferences=prefs)
|
|
455
|
+
for candidate in (preferred, CLIENT_CLAUDE_CODE, CLIENT_CODEX):
|
|
456
|
+
if candidate == selected_backend or candidate == BACKEND_NONE:
|
|
457
|
+
continue
|
|
458
|
+
if _backend_is_available(candidate):
|
|
459
|
+
return candidate
|
|
460
|
+
return selected_backend
|
|
461
|
+
|
|
462
|
+
|
|
248
463
|
def _build_codex_prompt(
|
|
249
464
|
prompt: str,
|
|
250
465
|
*,
|
|
@@ -252,7 +467,18 @@ def _build_codex_prompt(
|
|
|
252
467
|
append_system_prompt: str = "",
|
|
253
468
|
allowed_tools: str = "",
|
|
254
469
|
) -> str:
|
|
470
|
+
protocol_contract = (
|
|
471
|
+
"NEXO PROTOCOL (MANDATORY):\n"
|
|
472
|
+
"- Before non-trivial analyze/edit/execute/delegate work, call `nexo_task_open(...)`. "
|
|
473
|
+
"If that tool is unavailable, call `nexo_guard_check(...)` and `nexo_cortex_check(...)` first.\n"
|
|
474
|
+
"- For long multi-step or cross-session work, call `nexo_workflow_open(...)` and keep it updated with "
|
|
475
|
+
"`nexo_workflow_update(...)` so resume/replay use durable state instead of guesswork.\n"
|
|
476
|
+
"- If a target file has conditioned learnings or blocking guard rules, review them before any read/edit/delete step, and acknowledge guard before any edit/delete step.\n"
|
|
477
|
+
"- Do not claim done without explicit verification evidence. Close with `nexo_task_close(...)`; if unavailable, capture the change log and state the evidence explicitly.\n"
|
|
478
|
+
"- When a correction changes the canonical rule, capture or supersede the learning instead of leaving contradictory active rules behind."
|
|
479
|
+
)
|
|
255
480
|
instructions: list[str] = []
|
|
481
|
+
instructions.append(protocol_contract)
|
|
256
482
|
if append_system_prompt:
|
|
257
483
|
instructions.append(f"SYSTEM INSTRUCTIONS:\n{append_system_prompt}")
|
|
258
484
|
if output_format and output_format.lower() == "text":
|
|
@@ -273,6 +499,7 @@ def run_automation_prompt(
|
|
|
273
499
|
prompt: str,
|
|
274
500
|
*,
|
|
275
501
|
backend: str | None = None,
|
|
502
|
+
task_profile: str = "",
|
|
276
503
|
cwd: str | os.PathLike[str] | None = None,
|
|
277
504
|
env: dict | None = None,
|
|
278
505
|
model: str = "",
|
|
@@ -288,15 +515,26 @@ def run_automation_prompt(
|
|
|
288
515
|
if selected_backend == BACKEND_NONE:
|
|
289
516
|
raise AutomationBackendUnavailableError("Automation backend is disabled in config.")
|
|
290
517
|
|
|
518
|
+
if task_profile:
|
|
519
|
+
profile = resolve_automation_task_profile(task_profile, preferences=prefs)
|
|
520
|
+
selected_backend = profile["backend"] or selected_backend
|
|
521
|
+
if not model:
|
|
522
|
+
model = profile["model"]
|
|
523
|
+
if not reasoning_effort:
|
|
524
|
+
reasoning_effort = profile["reasoning_effort"]
|
|
525
|
+
selected_backend = _resolve_available_backend(selected_backend, preferences=prefs)
|
|
526
|
+
|
|
291
527
|
cwd_path = Path(cwd).expanduser().resolve() if cwd else Path.cwd()
|
|
292
528
|
run_env = _headless_env(env)
|
|
293
529
|
extra_args = list(extra_args or [])
|
|
530
|
+
requested_output_format = output_format or "text"
|
|
294
531
|
resolved_model, resolved_effort = _resolve_runtime_model_and_effort(
|
|
295
532
|
selected_backend,
|
|
296
533
|
model=model,
|
|
297
534
|
reasoning_effort=reasoning_effort,
|
|
298
535
|
preferences=prefs,
|
|
299
536
|
)
|
|
537
|
+
started_at = time.perf_counter()
|
|
300
538
|
|
|
301
539
|
if selected_backend == CLIENT_CLAUDE_CODE:
|
|
302
540
|
claude_bin = _resolve_claude_cli()
|
|
@@ -309,14 +547,13 @@ def run_automation_prompt(
|
|
|
309
547
|
cmd.extend(["--model", resolved_model])
|
|
310
548
|
if resolved_effort:
|
|
311
549
|
cmd.extend(["--effort", resolved_effort])
|
|
312
|
-
|
|
313
|
-
cmd.extend(["--output-format", output_format])
|
|
550
|
+
cmd.extend(["--output-format", "json"])
|
|
314
551
|
if append_system_prompt:
|
|
315
552
|
cmd.extend(["--append-system-prompt", append_system_prompt])
|
|
316
553
|
if allowed_tools:
|
|
317
554
|
cmd.extend(["--allowedTools", allowed_tools])
|
|
318
555
|
cmd.extend(extra_args)
|
|
319
|
-
|
|
556
|
+
result = subprocess.run(
|
|
320
557
|
cmd,
|
|
321
558
|
cwd=str(cwd_path),
|
|
322
559
|
capture_output=True,
|
|
@@ -324,6 +561,31 @@ def run_automation_prompt(
|
|
|
324
561
|
timeout=timeout,
|
|
325
562
|
env=run_env,
|
|
326
563
|
)
|
|
564
|
+
final_stdout, telemetry = _extract_claude_telemetry(
|
|
565
|
+
result.stdout or "",
|
|
566
|
+
requested_output_format=requested_output_format,
|
|
567
|
+
)
|
|
568
|
+
recorded, record_error = _record_automation_run(
|
|
569
|
+
backend=selected_backend,
|
|
570
|
+
task_profile=task_profile,
|
|
571
|
+
model=resolved_model,
|
|
572
|
+
reasoning_effort=resolved_effort,
|
|
573
|
+
cwd=cwd_path,
|
|
574
|
+
output_format=requested_output_format,
|
|
575
|
+
prompt=prompt,
|
|
576
|
+
returncode=result.returncode,
|
|
577
|
+
duration_ms=int((time.perf_counter() - started_at) * 1000),
|
|
578
|
+
telemetry=telemetry,
|
|
579
|
+
)
|
|
580
|
+
stderr = result.stderr or ""
|
|
581
|
+
if not recorded:
|
|
582
|
+
stderr = _append_stderr(stderr, record_error)
|
|
583
|
+
return subprocess.CompletedProcess(
|
|
584
|
+
cmd,
|
|
585
|
+
result.returncode,
|
|
586
|
+
final_stdout,
|
|
587
|
+
stderr,
|
|
588
|
+
)
|
|
327
589
|
|
|
328
590
|
if selected_backend == CLIENT_CODEX:
|
|
329
591
|
codex_bin = _resolve_codex_cli()
|
|
@@ -339,6 +601,7 @@ def run_automation_prompt(
|
|
|
339
601
|
"--skip-git-repo-check",
|
|
340
602
|
"--dangerously-bypass-approvals-and-sandbox",
|
|
341
603
|
"--ephemeral",
|
|
604
|
+
"--json",
|
|
342
605
|
"-C",
|
|
343
606
|
str(cwd_path),
|
|
344
607
|
"-o",
|
|
@@ -368,12 +631,33 @@ def run_automation_prompt(
|
|
|
368
631
|
timeout=timeout,
|
|
369
632
|
env=run_env,
|
|
370
633
|
)
|
|
371
|
-
|
|
634
|
+
raw_stdout = result.stdout or ""
|
|
635
|
+
stdout = output_path.read_text() if output_path.exists() else raw_stdout
|
|
636
|
+
final_stdout, telemetry = _extract_codex_telemetry(
|
|
637
|
+
raw_stdout,
|
|
638
|
+
final_stdout=stdout,
|
|
639
|
+
model=resolved_model,
|
|
640
|
+
)
|
|
641
|
+
recorded, record_error = _record_automation_run(
|
|
642
|
+
backend=selected_backend,
|
|
643
|
+
task_profile=task_profile,
|
|
644
|
+
model=resolved_model,
|
|
645
|
+
reasoning_effort=resolved_effort,
|
|
646
|
+
cwd=cwd_path,
|
|
647
|
+
output_format=requested_output_format,
|
|
648
|
+
prompt=prompt,
|
|
649
|
+
returncode=result.returncode,
|
|
650
|
+
duration_ms=int((time.perf_counter() - started_at) * 1000),
|
|
651
|
+
telemetry=telemetry,
|
|
652
|
+
)
|
|
653
|
+
stderr = result.stderr or ""
|
|
654
|
+
if not recorded:
|
|
655
|
+
stderr = _append_stderr(stderr, record_error)
|
|
372
656
|
return subprocess.CompletedProcess(
|
|
373
657
|
cmd,
|
|
374
658
|
result.returncode,
|
|
375
|
-
|
|
376
|
-
|
|
659
|
+
final_stdout,
|
|
660
|
+
stderr,
|
|
377
661
|
)
|
|
378
662
|
|
|
379
663
|
raise AutomationBackendUnavailableError(f"Unsupported automation backend: {selected_backend}")
|