nexo-brain 2.7.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +66 -12
- package/hooks/hooks.json +79 -0
- package/package.json +1 -1
- package/src/agent_runner.py +295 -7
- package/src/cli.py +111 -0
- package/src/client_preferences.py +99 -1
- package/src/client_sync.py +207 -3
- package/src/cognitive/__init__.py +1 -1
- package/src/cognitive/_search.py +39 -19
- package/src/dashboard/app.py +141 -1
- package/src/dashboard/templates/base.html +4 -0
- package/src/dashboard/templates/protocol.html +199 -0
- package/src/db/__init__.py +23 -1
- package/src/db/_learnings.py +31 -4
- package/src/db/_personal_scripts.py +12 -0
- package/src/db/_protocol.py +303 -0
- package/src/db/_schema.py +248 -0
- package/src/db/_watchers.py +173 -0
- package/src/db/_workflow.py +952 -0
- package/src/doctor/providers/boot.py +45 -19
- package/src/doctor/providers/runtime.py +923 -8
- package/src/evolution_cycle.py +62 -0
- package/src/hook_guardrails.py +308 -0
- package/src/hooks/protocol-guardrail.sh +10 -0
- package/src/nexo_sdk.py +103 -0
- package/src/plugins/cognitive_memory.py +18 -0
- package/src/plugins/cortex.py +55 -35
- package/src/plugins/guard.py +132 -16
- package/src/plugins/protocol.py +911 -0
- package/src/plugins/schedule.py +40 -6
- package/src/plugins/simple_api.py +103 -0
- package/src/plugins/skills.py +67 -0
- package/src/plugins/state_watchers.py +79 -0
- package/src/plugins/workflow.py +588 -0
- package/src/public_contribution.py +86 -12
- package/src/requirements.txt +1 -0
- package/src/script_registry.py +142 -0
- package/src/scripts/deep-sleep/apply_findings.py +204 -0
- package/src/scripts/deep-sleep/collect.py +49 -4
- package/src/scripts/nexo-agent-run.py +2 -0
- package/src/scripts/nexo-daily-self-audit.py +843 -5
- package/src/scripts/nexo-evolution-run.py +343 -1
- package/src/server.py +92 -6
- package/src/skills_runtime.py +151 -0
- package/src/state_watchers_runtime.py +334 -0
- package/src/tools_learnings.py +345 -7
- package/src/tools_sessions.py +183 -0
- package/templates/CLAUDE.md.template +9 -1
- package/templates/CODEX.AGENTS.md.template +10 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.1",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
[](https://github.com/wazionapps/nexo/stargazers)
|
|
7
7
|
[](https://www.gnu.org/licenses/agpl-3.0)
|
|
8
8
|
|
|
9
|
-
> Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
|
|
9
|
+
> Local cognitive runtime with a shared brain across Claude Code, Codex, Claude Desktop, and other MCP clients. Persistent memory, durable workflow runs, selectable terminal and automation backends, overnight learning, self-healing background jobs, startup preflight, and doctor diagnostics. 150+ MCP tools. Benchmarked on LoCoMo (F1 0.588, +55% vs GPT-4).
|
|
10
10
|
|
|
11
11
|
**NEXO Brain transforms any MCP-compatible AI agent from a stateless assistant into a cognitive partner that remembers, learns, forgets, adapts, and builds a relationship with you over time.**
|
|
12
12
|
|
|
@@ -18,6 +18,16 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview on YouTube](https://www.youtube.com/watch?v=IBs7zh7ZMG0) · [Watch the full deep-dive](https://www.youtube.com/watch?v=bKAfowyyy5M)
|
|
20
20
|
|
|
21
|
+
Start here:
|
|
22
|
+
- [5-minute quickstart](docs/quickstart-5-minutes.md)
|
|
23
|
+
- [Architecture visuals](docs/architecture-visuals.md)
|
|
24
|
+
- [Memory classes](docs/memory-classes.md)
|
|
25
|
+
- [Session portability](docs/session-portability.md)
|
|
26
|
+
- [Python SDK](docs/sdk-python.md)
|
|
27
|
+
- [Reference verticals](docs/reference-verticals.md)
|
|
28
|
+
- [Measured compare scorecard](compare/README.md)
|
|
29
|
+
- [Public contribution guide](docs/public-contribution.md)
|
|
30
|
+
|
|
21
31
|
Every time you close a session, everything is lost. Your agent doesn't remember yesterday's decisions, repeats the same mistakes, and starts from zero. NEXO Brain fixes this with a cognitive architecture modeled after how human memory actually works.
|
|
22
32
|
|
|
23
33
|
## Shared Brain Across Clients
|
|
@@ -38,15 +48,32 @@ That means NEXO now manages not only the shared runtime and MCP wiring, but also
|
|
|
38
48
|
- For Codex specifically, `nexo chat` and Codex headless automation inject the current bootstrap explicitly, so Codex starts as NEXO even when plain global Codex startup is inconsistent about global instructions.
|
|
39
49
|
- Deep Sleep now reads both Claude Code and Codex transcript stores, so overnight analysis still works even when the user spends the day in Codex.
|
|
40
50
|
|
|
41
|
-
Versions `2.6.14` through `2.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
-
|
|
49
|
-
-
|
|
51
|
+
Versions `2.6.14` through `2.7.0` established the practical shared-brain baseline: managed Claude/Codex bootstrap, Codex config sync, transcript-aware Deep Sleep, 60-day long-horizon analysis, weekly/monthly summary artifacts, retrieval auto-mode, and the first measured engineering loop.
|
|
52
|
+
|
|
53
|
+
Versions `3.0.0` and `3.0.1` close the next execution gap:
|
|
54
|
+
|
|
55
|
+
- protocol discipline is now a runtime contract, not just instructions:
|
|
56
|
+
- `nexo_task_open`
|
|
57
|
+
- `nexo_task_close`
|
|
58
|
+
- persistent `protocol_debt`
|
|
59
|
+
- enforceable `Cortex` gates
|
|
60
|
+
- durable execution is now first-class:
|
|
61
|
+
- resumable workflow runs
|
|
62
|
+
- checkpoints
|
|
63
|
+
- replay
|
|
64
|
+
- retries
|
|
65
|
+
- durable goals
|
|
66
|
+
- conditioned learnings on critical files are now real guardrails across Claude hooks, Codex transcript audits, and headless automation prompts
|
|
67
|
+
- repair/correction work now routes through canonical learning capture instead of depending on the model to remember to document after the fact
|
|
68
|
+
- runtime truth is stricter:
|
|
69
|
+
- no more healthy-looking warning storms
|
|
70
|
+
- no more silent Deep Sleep schema drift
|
|
71
|
+
- keep-alive jobs report alive/degraded/duplicated honestly
|
|
72
|
+
- public proof is stronger:
|
|
73
|
+
- measured compare scorecard
|
|
74
|
+
- external and internal ablations
|
|
75
|
+
- `cost_per_solved_task`
|
|
76
|
+
- SDK/API/quickstart surface
|
|
50
77
|
|
|
51
78
|
### Client Capability Matrix
|
|
52
79
|
|
|
@@ -227,6 +254,20 @@ User message → Fast Path check → Simple chat? → Respond directly
|
|
|
227
254
|
|
|
228
255
|
The Cortex was designed through a 3-way AI debate (Claude Opus 4.6 + GPT-5.4 + Gemini 3.1 Pro) and validated against 6 months of real production failures.
|
|
229
256
|
|
|
257
|
+
## Durable Workflow Runtime
|
|
258
|
+
|
|
259
|
+
Memory and guardrails are not enough if long work still restarts from zero.
|
|
260
|
+
|
|
261
|
+
NEXO now ships a durable workflow runtime for multi-step and cross-session execution:
|
|
262
|
+
|
|
263
|
+
- `nexo_workflow_open` creates a persistent run with step metadata, idempotency key, priority, and shared state
|
|
264
|
+
- `nexo_workflow_update` records replayable checkpoints, retry metadata, approval gates, and the current actionable state
|
|
265
|
+
- `nexo_workflow_resume` tells the agent what to do next without guessing
|
|
266
|
+
- `nexo_workflow_replay` reconstructs the recent execution history honestly instead of pretending the run is still in memory
|
|
267
|
+
- `nexo_workflow_list` keeps active and blocked work visible so it does not disappear into reminders or prose notes
|
|
268
|
+
|
|
269
|
+
This is the bridge between "good memory" and "reliable execution": tasks can now preserve state, retries, approval gates, and next action across interruptions.
|
|
270
|
+
|
|
230
271
|
## Context Continuity (Auto-Compaction)
|
|
231
272
|
|
|
232
273
|
NEXO Brain automatically preserves session context when Claude Code compacts conversations. Using PreCompact and PostCompact hooks:
|
|
@@ -642,6 +683,19 @@ nexo scripts list # See your personal scripts
|
|
|
642
683
|
|
|
643
684
|
During install, NEXO now asks which interactive clients you want to connect, which one `nexo chat` should suggest first when multiple terminal clients are available, whether to enable background automation, which backend should run that automation, and which model profile each active terminal/backend should use. Shared brain stays on in every mode.
|
|
644
685
|
|
|
686
|
+
Public entry points for the mental model now stay intentionally small:
|
|
687
|
+
- `nexo_remember`
|
|
688
|
+
- `nexo_memory_recall`
|
|
689
|
+
- `nexo_consolidate`
|
|
690
|
+
- `nexo_run_workflow`
|
|
691
|
+
|
|
692
|
+
If you want the shell or Python wrappers instead of raw MCP tools:
|
|
693
|
+
- [docs/quickstart-5-minutes.md](docs/quickstart-5-minutes.md)
|
|
694
|
+
- [docs/memory-classes.md](docs/memory-classes.md)
|
|
695
|
+
- [docs/sdk-python.md](docs/sdk-python.md)
|
|
696
|
+
- [docs/reference-verticals.md](docs/reference-verticals.md)
|
|
697
|
+
- [compare/README.md](compare/README.md)
|
|
698
|
+
|
|
645
699
|
Recommended defaults:
|
|
646
700
|
- Claude Code: `Opus 4.6 with 1M context`
|
|
647
701
|
- Codex: `gpt-5.4` with `xhigh` reasoning
|
|
@@ -714,7 +768,7 @@ nexo doctor --tier runtime --json # Machine-readable health report
|
|
|
714
768
|
nexo doctor --fix # Apply deterministic repairs
|
|
715
769
|
```
|
|
716
770
|
|
|
717
|
-
Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. See `docs/writing-scripts.md` for details.
|
|
771
|
+
Personal scripts live in `NEXO_HOME/scripts/` with inline metadata. Their Python templates now include `run_automation_text(...)`, which routes work through the configured NEXO automation backend instead of hardcoding `claude -p` or provider-specific model names. `nexo-agent-run.py` now also supports task profiles (`fast`, `balanced`, `deep`) plus safe backend fallback, so automations can prefer cheaper/faster Codex paths or deeper Claude paths without hardcoding one provider forever. See `docs/writing-scripts.md` for details.
|
|
718
772
|
|
|
719
773
|
Skills v2 combine procedural guides with optional executable scripts. Personal skills live in `NEXO_HOME/skills/`, packaged core skills live in `NEXO_CODE/skills/` during development and `NEXO_HOME/skills-core/` in installed environments, and staged runtime copies live in `NEXO_HOME/skills-runtime/`. Execution is fully autonomous: Deep Sleep can evolve mature guide skills into executable drafts automatically, and runtime execution no longer waits for manual approval. See `docs/skills-v2.md` for the full model.
|
|
720
774
|
|
|
@@ -840,7 +894,7 @@ When Claude Desktop is installed, `nexo-brain`, `nexo update`, and `nexo clients
|
|
|
840
894
|
|
|
841
895
|
### Codex
|
|
842
896
|
|
|
843
|
-
When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning.
|
|
897
|
+
When Codex CLI is available, `nexo-brain`, `nexo update`, and `nexo clients sync` register the same `nexo` MCP server via `codex mcp add`, so Codex uses the same local memory store as Claude Code and Claude Desktop. If selected during install, `nexo chat` can open Codex directly and background automation can also run through Codex. Interactive `nexo chat` launches use Codex's aggressive no-confirmation mode so the session does not stall on repetitive approval prompts. The current recommended Codex profile is `gpt-5.4` with `xhigh` reasoning. Runtime Doctor also audits recent Codex sessions for NEXO startup markers and conditioned-file protocol discipline so parity drift does not hide behind the lack of native Claude-style hooks.
|
|
844
898
|
|
|
845
899
|
### OpenClaw
|
|
846
900
|
|
package/hooks/hooks.json
CHANGED
|
@@ -2,10 +2,89 @@
|
|
|
2
2
|
"hooks": {
|
|
3
3
|
"SessionStart": [
|
|
4
4
|
{
|
|
5
|
+
"matcher": "*",
|
|
5
6
|
"hooks": [
|
|
6
7
|
{
|
|
7
8
|
"type": "command",
|
|
8
9
|
"command": "diff -q \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\" >/dev/null 2>&1 || (python3 -m venv \"${CLAUDE_PLUGIN_DATA}/.venv\" 2>/dev/null; cp \"${CLAUDE_PLUGIN_ROOT}/src/requirements.txt\" \"${CLAUDE_PLUGIN_DATA}/requirements.txt\"; \"${CLAUDE_PLUGIN_DATA}/.venv/bin/pip\" install --quiet -r \"${CLAUDE_PLUGIN_DATA}/requirements.txt\") || rm -f \"${CLAUDE_PLUGIN_DATA}/requirements.txt\""
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"type": "command",
|
|
13
|
+
"command": "mkdir -p \"${CLAUDE_PLUGIN_DATA}/operations\" && date +%s > \"${CLAUDE_PLUGIN_DATA}/operations/.session-start-ts\"",
|
|
14
|
+
"timeout": 2
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"type": "command",
|
|
18
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/daily-briefing-check.sh\"",
|
|
19
|
+
"timeout": 5
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"type": "command",
|
|
23
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-start.sh\"",
|
|
24
|
+
"timeout": 35
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
],
|
|
29
|
+
"Stop": [
|
|
30
|
+
{
|
|
31
|
+
"matcher": "*",
|
|
32
|
+
"hooks": [
|
|
33
|
+
{
|
|
34
|
+
"type": "command",
|
|
35
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/session-stop.sh\"",
|
|
36
|
+
"timeout": 10
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
],
|
|
41
|
+
"PostToolUse": [
|
|
42
|
+
{
|
|
43
|
+
"matcher": "*",
|
|
44
|
+
"hooks": [
|
|
45
|
+
{
|
|
46
|
+
"type": "command",
|
|
47
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-tool-logs.sh\"",
|
|
48
|
+
"timeout": 5
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"type": "command",
|
|
52
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/capture-session.sh\"",
|
|
53
|
+
"timeout": 3
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"type": "command",
|
|
57
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/inbox-hook.sh\"",
|
|
58
|
+
"timeout": 5
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"type": "command",
|
|
62
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/protocol-guardrail.sh\"",
|
|
63
|
+
"timeout": 5
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|
|
67
|
+
],
|
|
68
|
+
"PreCompact": [
|
|
69
|
+
{
|
|
70
|
+
"matcher": "*",
|
|
71
|
+
"hooks": [
|
|
72
|
+
{
|
|
73
|
+
"type": "command",
|
|
74
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/pre-compact.sh\"",
|
|
75
|
+
"timeout": 10
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
],
|
|
80
|
+
"PostCompact": [
|
|
81
|
+
{
|
|
82
|
+
"matcher": "*",
|
|
83
|
+
"hooks": [
|
|
84
|
+
{
|
|
85
|
+
"type": "command",
|
|
86
|
+
"command": "NEXO_HOME=\"${CLAUDE_PLUGIN_DATA}\" NEXO_CODE=\"${CLAUDE_PLUGIN_ROOT}/src\" bash \"${CLAUDE_PLUGIN_ROOT}/src/hooks/post-compact.sh\"",
|
|
87
|
+
"timeout": 10
|
|
9
88
|
}
|
|
10
89
|
]
|
|
11
90
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.1",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/agent_runner.py
CHANGED
|
@@ -8,9 +8,14 @@ import shlex
|
|
|
8
8
|
import shutil
|
|
9
9
|
import subprocess
|
|
10
10
|
import tempfile
|
|
11
|
-
import
|
|
11
|
+
import time
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
|
|
14
|
+
try:
|
|
15
|
+
import tomllib
|
|
16
|
+
except ModuleNotFoundError: # Python < 3.11
|
|
17
|
+
import tomli as tomllib
|
|
18
|
+
|
|
14
19
|
from client_preferences import (
|
|
15
20
|
BACKEND_NONE,
|
|
16
21
|
CLIENT_CLAUDE_CODE,
|
|
@@ -18,6 +23,7 @@ from client_preferences import (
|
|
|
18
23
|
TERMINAL_CLIENT_KEYS,
|
|
19
24
|
load_client_preferences,
|
|
20
25
|
resolve_automation_backend,
|
|
26
|
+
resolve_automation_task_profile,
|
|
21
27
|
resolve_client_runtime_profile,
|
|
22
28
|
resolve_terminal_client,
|
|
23
29
|
)
|
|
@@ -25,6 +31,12 @@ from client_preferences import (
|
|
|
25
31
|
|
|
26
32
|
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
27
33
|
CLAUDE_LEGACY_MODEL_HINTS = {"opus", "sonnet"}
|
|
34
|
+
MODEL_PRICING_USD_PER_1M = {
|
|
35
|
+
# Pricing snapshot used only when the backend does not return explicit cost.
|
|
36
|
+
# Codex model names map to the current GPT-5 family pricing.
|
|
37
|
+
"gpt-5.4": {"input": 1.25, "cached_input": 0.125, "output": 10.0},
|
|
38
|
+
"gpt-5.4-mini": {"input": 0.25, "cached_input": 0.025, "output": 2.0},
|
|
39
|
+
}
|
|
28
40
|
|
|
29
41
|
|
|
30
42
|
class AgentRunnerError(RuntimeError):
|
|
@@ -39,6 +51,192 @@ class AutomationBackendUnavailableError(AgentRunnerError):
|
|
|
39
51
|
"""Raised when the configured automation backend is unavailable."""
|
|
40
52
|
|
|
41
53
|
|
|
54
|
+
def _canonical_pricing_model(model: str) -> str:
|
|
55
|
+
lowered = str(model or "").strip().lower()
|
|
56
|
+
lowered = lowered.split("[", 1)[0]
|
|
57
|
+
aliases = {
|
|
58
|
+
"gpt-5": "gpt-5.4",
|
|
59
|
+
"gpt-5.4": "gpt-5.4",
|
|
60
|
+
"gpt-5-mini": "gpt-5.4-mini",
|
|
61
|
+
"gpt-5.4-mini": "gpt-5.4-mini",
|
|
62
|
+
}
|
|
63
|
+
return aliases.get(lowered, lowered)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _estimate_openai_cost_usd(model: str, *, input_tokens: int, cached_input_tokens: int, output_tokens: int) -> tuple[float | None, str]:
|
|
67
|
+
pricing = MODEL_PRICING_USD_PER_1M.get(_canonical_pricing_model(model))
|
|
68
|
+
if not pricing:
|
|
69
|
+
return None, "pricing_unavailable"
|
|
70
|
+
total = 0.0
|
|
71
|
+
total += (max(0, int(input_tokens or 0)) / 1_000_000.0) * pricing["input"]
|
|
72
|
+
total += (max(0, int(cached_input_tokens or 0)) / 1_000_000.0) * pricing["cached_input"]
|
|
73
|
+
total += (max(0, int(output_tokens or 0)) / 1_000_000.0) * pricing["output"]
|
|
74
|
+
return round(total, 6), "pricing_snapshot"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _safe_json_loads(raw: str) -> dict | list | None:
|
|
78
|
+
try:
|
|
79
|
+
return json.loads(raw)
|
|
80
|
+
except Exception:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _extract_claude_telemetry(raw_stdout: str, *, requested_output_format: str) -> tuple[str, dict]:
|
|
85
|
+
payload = _safe_json_loads(raw_stdout) if str(raw_stdout or "").strip().startswith("{") else None
|
|
86
|
+
if not isinstance(payload, dict):
|
|
87
|
+
return raw_stdout or "", {
|
|
88
|
+
"telemetry_source": "missing",
|
|
89
|
+
"cost_source": "missing",
|
|
90
|
+
"usage": {},
|
|
91
|
+
"warnings": ["backend did not return parseable JSON telemetry"],
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
result_payload = payload.get("result", "")
|
|
95
|
+
if requested_output_format and requested_output_format.lower() == "json" and not isinstance(result_payload, str):
|
|
96
|
+
final_stdout = json.dumps(result_payload, ensure_ascii=False)
|
|
97
|
+
else:
|
|
98
|
+
final_stdout = result_payload if isinstance(result_payload, str) else json.dumps(result_payload, ensure_ascii=False)
|
|
99
|
+
|
|
100
|
+
usage = payload.get("usage") or {}
|
|
101
|
+
model_usage = payload.get("modelUsage") or {}
|
|
102
|
+
explicit_cost = payload.get("total_cost_usd")
|
|
103
|
+
if explicit_cost is None and isinstance(model_usage, dict):
|
|
104
|
+
explicit_cost = sum(
|
|
105
|
+
float((item or {}).get("costUSD") or 0.0)
|
|
106
|
+
for item in model_usage.values()
|
|
107
|
+
if isinstance(item, dict)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return final_stdout, {
|
|
111
|
+
"telemetry_source": "claude_json",
|
|
112
|
+
"cost_source": "backend",
|
|
113
|
+
"usage": {
|
|
114
|
+
"input_tokens": int(usage.get("input_tokens") or 0),
|
|
115
|
+
"cached_input_tokens": int(usage.get("cache_read_input_tokens") or 0),
|
|
116
|
+
"output_tokens": int(usage.get("output_tokens") or 0),
|
|
117
|
+
},
|
|
118
|
+
"total_cost_usd": float(explicit_cost) if explicit_cost is not None else None,
|
|
119
|
+
"raw": payload,
|
|
120
|
+
"warnings": [],
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _extract_codex_telemetry(stream_stdout: str, *, final_stdout: str, model: str) -> tuple[str, dict]:
|
|
125
|
+
usage_payload: dict = {}
|
|
126
|
+
raw_events: list[dict] = []
|
|
127
|
+
for line in str(stream_stdout or "").splitlines():
|
|
128
|
+
line = line.strip()
|
|
129
|
+
if not line.startswith("{"):
|
|
130
|
+
continue
|
|
131
|
+
payload = _safe_json_loads(line)
|
|
132
|
+
if not isinstance(payload, dict):
|
|
133
|
+
continue
|
|
134
|
+
raw_events.append(payload)
|
|
135
|
+
if payload.get("type") == "turn.completed" and isinstance(payload.get("usage"), dict):
|
|
136
|
+
usage_payload = payload["usage"]
|
|
137
|
+
|
|
138
|
+
usage = {
|
|
139
|
+
"input_tokens": int(usage_payload.get("input_tokens") or 0),
|
|
140
|
+
"cached_input_tokens": int(usage_payload.get("cached_input_tokens") or 0),
|
|
141
|
+
"output_tokens": int(usage_payload.get("output_tokens") or 0),
|
|
142
|
+
}
|
|
143
|
+
total_cost_usd = usage_payload.get("total_cost_usd")
|
|
144
|
+
cost_source = "backend" if total_cost_usd is not None else "missing"
|
|
145
|
+
warnings: list[str] = []
|
|
146
|
+
if total_cost_usd is None:
|
|
147
|
+
estimated_cost, estimated_source = _estimate_openai_cost_usd(
|
|
148
|
+
model,
|
|
149
|
+
input_tokens=usage["input_tokens"],
|
|
150
|
+
cached_input_tokens=usage["cached_input_tokens"],
|
|
151
|
+
output_tokens=usage["output_tokens"],
|
|
152
|
+
)
|
|
153
|
+
total_cost_usd = estimated_cost
|
|
154
|
+
cost_source = estimated_source
|
|
155
|
+
if estimated_cost is None:
|
|
156
|
+
warnings.append(f"no pricing snapshot available for model `{model}`")
|
|
157
|
+
|
|
158
|
+
if not usage_payload:
|
|
159
|
+
warnings.append("backend did not return usage telemetry")
|
|
160
|
+
|
|
161
|
+
return final_stdout, {
|
|
162
|
+
"telemetry_source": "codex_jsonl",
|
|
163
|
+
"cost_source": cost_source,
|
|
164
|
+
"usage": usage,
|
|
165
|
+
"total_cost_usd": float(total_cost_usd) if total_cost_usd is not None else None,
|
|
166
|
+
"raw": raw_events[-8:],
|
|
167
|
+
"warnings": warnings,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _append_stderr(stderr: str, message: str) -> str:
|
|
172
|
+
bits = [part for part in [str(stderr or "").rstrip(), str(message or "").strip()] if part]
|
|
173
|
+
if not bits:
|
|
174
|
+
return ""
|
|
175
|
+
return "\n".join(bits) + "\n"
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _record_automation_run(
|
|
179
|
+
*,
|
|
180
|
+
backend: str,
|
|
181
|
+
task_profile: str,
|
|
182
|
+
model: str,
|
|
183
|
+
reasoning_effort: str,
|
|
184
|
+
cwd: Path,
|
|
185
|
+
output_format: str,
|
|
186
|
+
prompt: str,
|
|
187
|
+
returncode: int,
|
|
188
|
+
duration_ms: int,
|
|
189
|
+
telemetry: dict,
|
|
190
|
+
) -> tuple[bool, str]:
|
|
191
|
+
try:
|
|
192
|
+
from db._core import get_db
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
return False, f"automation telemetry unavailable: {exc}"
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
conn = get_db()
|
|
198
|
+
usage = telemetry.get("usage") or {}
|
|
199
|
+
conn.execute(
|
|
200
|
+
"""
|
|
201
|
+
INSERT INTO automation_runs (
|
|
202
|
+
backend, task_profile, model, reasoning_effort, cwd, output_format,
|
|
203
|
+
prompt_chars, returncode, duration_ms,
|
|
204
|
+
input_tokens, cached_input_tokens, output_tokens,
|
|
205
|
+
total_cost_usd, telemetry_source, cost_source, status, metadata
|
|
206
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
207
|
+
""",
|
|
208
|
+
(
|
|
209
|
+
backend,
|
|
210
|
+
task_profile or "default",
|
|
211
|
+
model,
|
|
212
|
+
reasoning_effort,
|
|
213
|
+
str(cwd),
|
|
214
|
+
output_format or "text",
|
|
215
|
+
len(prompt or ""),
|
|
216
|
+
int(returncode),
|
|
217
|
+
int(duration_ms),
|
|
218
|
+
int(usage.get("input_tokens") or 0),
|
|
219
|
+
int(usage.get("cached_input_tokens") or 0),
|
|
220
|
+
int(usage.get("output_tokens") or 0),
|
|
221
|
+
telemetry.get("total_cost_usd"),
|
|
222
|
+
telemetry.get("telemetry_source", ""),
|
|
223
|
+
telemetry.get("cost_source", ""),
|
|
224
|
+
"ok" if int(returncode) == 0 else "failed",
|
|
225
|
+
json.dumps(
|
|
226
|
+
{
|
|
227
|
+
"warnings": telemetry.get("warnings") or [],
|
|
228
|
+
"raw": telemetry.get("raw") or {},
|
|
229
|
+
},
|
|
230
|
+
ensure_ascii=False,
|
|
231
|
+
),
|
|
232
|
+
),
|
|
233
|
+
)
|
|
234
|
+
conn.commit()
|
|
235
|
+
return True, ""
|
|
236
|
+
except Exception as exc:
|
|
237
|
+
return False, f"automation telemetry unavailable: {exc}"
|
|
238
|
+
|
|
239
|
+
|
|
42
240
|
def _resolve_claude_cli() -> str:
|
|
43
241
|
saved = NEXO_HOME / "config" / "claude-cli-path"
|
|
44
242
|
if saved.exists():
|
|
@@ -245,6 +443,27 @@ def _resolve_runtime_model_and_effort(
|
|
|
245
443
|
return requested_model, requested_effort
|
|
246
444
|
|
|
247
445
|
|
|
446
|
+
def _backend_is_available(backend: str) -> bool:
|
|
447
|
+
if backend == CLIENT_CLAUDE_CODE:
|
|
448
|
+
return bool(_resolve_claude_cli())
|
|
449
|
+
if backend == CLIENT_CODEX:
|
|
450
|
+
return bool(_resolve_codex_cli())
|
|
451
|
+
return False
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def _resolve_available_backend(selected_backend: str, *, preferences: dict | None = None) -> str:
|
|
455
|
+
if _backend_is_available(selected_backend):
|
|
456
|
+
return selected_backend
|
|
457
|
+
prefs = preferences or load_client_preferences()
|
|
458
|
+
preferred = resolve_automation_backend(preferences=prefs)
|
|
459
|
+
for candidate in (preferred, CLIENT_CLAUDE_CODE, CLIENT_CODEX):
|
|
460
|
+
if candidate == selected_backend or candidate == BACKEND_NONE:
|
|
461
|
+
continue
|
|
462
|
+
if _backend_is_available(candidate):
|
|
463
|
+
return candidate
|
|
464
|
+
return selected_backend
|
|
465
|
+
|
|
466
|
+
|
|
248
467
|
def _build_codex_prompt(
|
|
249
468
|
prompt: str,
|
|
250
469
|
*,
|
|
@@ -252,7 +471,18 @@ def _build_codex_prompt(
|
|
|
252
471
|
append_system_prompt: str = "",
|
|
253
472
|
allowed_tools: str = "",
|
|
254
473
|
) -> str:
|
|
474
|
+
protocol_contract = (
|
|
475
|
+
"NEXO PROTOCOL (MANDATORY):\n"
|
|
476
|
+
"- Before non-trivial analyze/edit/execute/delegate work, call `nexo_task_open(...)`. "
|
|
477
|
+
"If that tool is unavailable, call `nexo_guard_check(...)` and `nexo_cortex_check(...)` first.\n"
|
|
478
|
+
"- For long multi-step or cross-session work, call `nexo_workflow_open(...)` and keep it updated with "
|
|
479
|
+
"`nexo_workflow_update(...)` so resume/replay use durable state instead of guesswork.\n"
|
|
480
|
+
"- If a target file has conditioned learnings or blocking guard rules, review them before any read/edit/delete step, and acknowledge guard before any edit/delete step.\n"
|
|
481
|
+
"- Do not claim done without explicit verification evidence. Close with `nexo_task_close(...)`; if unavailable, capture the change log and state the evidence explicitly.\n"
|
|
482
|
+
"- When a correction changes the canonical rule, capture or supersede the learning instead of leaving contradictory active rules behind."
|
|
483
|
+
)
|
|
255
484
|
instructions: list[str] = []
|
|
485
|
+
instructions.append(protocol_contract)
|
|
256
486
|
if append_system_prompt:
|
|
257
487
|
instructions.append(f"SYSTEM INSTRUCTIONS:\n{append_system_prompt}")
|
|
258
488
|
if output_format and output_format.lower() == "text":
|
|
@@ -273,6 +503,7 @@ def run_automation_prompt(
|
|
|
273
503
|
prompt: str,
|
|
274
504
|
*,
|
|
275
505
|
backend: str | None = None,
|
|
506
|
+
task_profile: str = "",
|
|
276
507
|
cwd: str | os.PathLike[str] | None = None,
|
|
277
508
|
env: dict | None = None,
|
|
278
509
|
model: str = "",
|
|
@@ -288,15 +519,26 @@ def run_automation_prompt(
|
|
|
288
519
|
if selected_backend == BACKEND_NONE:
|
|
289
520
|
raise AutomationBackendUnavailableError("Automation backend is disabled in config.")
|
|
290
521
|
|
|
522
|
+
if task_profile:
|
|
523
|
+
profile = resolve_automation_task_profile(task_profile, preferences=prefs)
|
|
524
|
+
selected_backend = profile["backend"] or selected_backend
|
|
525
|
+
if not model:
|
|
526
|
+
model = profile["model"]
|
|
527
|
+
if not reasoning_effort:
|
|
528
|
+
reasoning_effort = profile["reasoning_effort"]
|
|
529
|
+
selected_backend = _resolve_available_backend(selected_backend, preferences=prefs)
|
|
530
|
+
|
|
291
531
|
cwd_path = Path(cwd).expanduser().resolve() if cwd else Path.cwd()
|
|
292
532
|
run_env = _headless_env(env)
|
|
293
533
|
extra_args = list(extra_args or [])
|
|
534
|
+
requested_output_format = output_format or "text"
|
|
294
535
|
resolved_model, resolved_effort = _resolve_runtime_model_and_effort(
|
|
295
536
|
selected_backend,
|
|
296
537
|
model=model,
|
|
297
538
|
reasoning_effort=reasoning_effort,
|
|
298
539
|
preferences=prefs,
|
|
299
540
|
)
|
|
541
|
+
started_at = time.perf_counter()
|
|
300
542
|
|
|
301
543
|
if selected_backend == CLIENT_CLAUDE_CODE:
|
|
302
544
|
claude_bin = _resolve_claude_cli()
|
|
@@ -309,14 +551,13 @@ def run_automation_prompt(
|
|
|
309
551
|
cmd.extend(["--model", resolved_model])
|
|
310
552
|
if resolved_effort:
|
|
311
553
|
cmd.extend(["--effort", resolved_effort])
|
|
312
|
-
|
|
313
|
-
cmd.extend(["--output-format", output_format])
|
|
554
|
+
cmd.extend(["--output-format", "json"])
|
|
314
555
|
if append_system_prompt:
|
|
315
556
|
cmd.extend(["--append-system-prompt", append_system_prompt])
|
|
316
557
|
if allowed_tools:
|
|
317
558
|
cmd.extend(["--allowedTools", allowed_tools])
|
|
318
559
|
cmd.extend(extra_args)
|
|
319
|
-
|
|
560
|
+
result = subprocess.run(
|
|
320
561
|
cmd,
|
|
321
562
|
cwd=str(cwd_path),
|
|
322
563
|
capture_output=True,
|
|
@@ -324,6 +565,31 @@ def run_automation_prompt(
|
|
|
324
565
|
timeout=timeout,
|
|
325
566
|
env=run_env,
|
|
326
567
|
)
|
|
568
|
+
final_stdout, telemetry = _extract_claude_telemetry(
|
|
569
|
+
result.stdout or "",
|
|
570
|
+
requested_output_format=requested_output_format,
|
|
571
|
+
)
|
|
572
|
+
recorded, record_error = _record_automation_run(
|
|
573
|
+
backend=selected_backend,
|
|
574
|
+
task_profile=task_profile,
|
|
575
|
+
model=resolved_model,
|
|
576
|
+
reasoning_effort=resolved_effort,
|
|
577
|
+
cwd=cwd_path,
|
|
578
|
+
output_format=requested_output_format,
|
|
579
|
+
prompt=prompt,
|
|
580
|
+
returncode=result.returncode,
|
|
581
|
+
duration_ms=int((time.perf_counter() - started_at) * 1000),
|
|
582
|
+
telemetry=telemetry,
|
|
583
|
+
)
|
|
584
|
+
stderr = result.stderr or ""
|
|
585
|
+
if not recorded:
|
|
586
|
+
stderr = _append_stderr(stderr, record_error)
|
|
587
|
+
return subprocess.CompletedProcess(
|
|
588
|
+
cmd,
|
|
589
|
+
result.returncode,
|
|
590
|
+
final_stdout,
|
|
591
|
+
stderr,
|
|
592
|
+
)
|
|
327
593
|
|
|
328
594
|
if selected_backend == CLIENT_CODEX:
|
|
329
595
|
codex_bin = _resolve_codex_cli()
|
|
@@ -339,6 +605,7 @@ def run_automation_prompt(
|
|
|
339
605
|
"--skip-git-repo-check",
|
|
340
606
|
"--dangerously-bypass-approvals-and-sandbox",
|
|
341
607
|
"--ephemeral",
|
|
608
|
+
"--json",
|
|
342
609
|
"-C",
|
|
343
610
|
str(cwd_path),
|
|
344
611
|
"-o",
|
|
@@ -368,12 +635,33 @@ def run_automation_prompt(
|
|
|
368
635
|
timeout=timeout,
|
|
369
636
|
env=run_env,
|
|
370
637
|
)
|
|
371
|
-
|
|
638
|
+
raw_stdout = result.stdout or ""
|
|
639
|
+
stdout = output_path.read_text() if output_path.exists() else raw_stdout
|
|
640
|
+
final_stdout, telemetry = _extract_codex_telemetry(
|
|
641
|
+
raw_stdout,
|
|
642
|
+
final_stdout=stdout,
|
|
643
|
+
model=resolved_model,
|
|
644
|
+
)
|
|
645
|
+
recorded, record_error = _record_automation_run(
|
|
646
|
+
backend=selected_backend,
|
|
647
|
+
task_profile=task_profile,
|
|
648
|
+
model=resolved_model,
|
|
649
|
+
reasoning_effort=resolved_effort,
|
|
650
|
+
cwd=cwd_path,
|
|
651
|
+
output_format=requested_output_format,
|
|
652
|
+
prompt=prompt,
|
|
653
|
+
returncode=result.returncode,
|
|
654
|
+
duration_ms=int((time.perf_counter() - started_at) * 1000),
|
|
655
|
+
telemetry=telemetry,
|
|
656
|
+
)
|
|
657
|
+
stderr = result.stderr or ""
|
|
658
|
+
if not recorded:
|
|
659
|
+
stderr = _append_stderr(stderr, record_error)
|
|
372
660
|
return subprocess.CompletedProcess(
|
|
373
661
|
cmd,
|
|
374
662
|
result.returncode,
|
|
375
|
-
|
|
376
|
-
|
|
663
|
+
final_stdout,
|
|
664
|
+
stderr,
|
|
377
665
|
)
|
|
378
666
|
|
|
379
667
|
raise AutomationBackendUnavailableError(f"Unsupported automation backend: {selected_backend}")
|