axnwork-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- axnwork_cli-0.2.0.dist-info/METADATA +13 -0
- axnwork_cli-0.2.0.dist-info/RECORD +23 -0
- axnwork_cli-0.2.0.dist-info/WHEEL +5 -0
- axnwork_cli-0.2.0.dist-info/entry_points.txt +2 -0
- axnwork_cli-0.2.0.dist-info/top_level.txt +1 -0
- axon/__init__.py +0 -0
- axon/api.py +83 -0
- axon/backends/__init__.py +5 -0
- axon/backends/base.py +23 -0
- axon/backends/claude_cli.py +290 -0
- axon/backends/codex_cli.py +223 -0
- axon/backends/litellm_backend.py +51 -0
- axon/backends/registry.py +61 -0
- axon/cli.py +595 -0
- axon/config.py +55 -0
- axon/display.py +364 -0
- axon/history.py +133 -0
- axon/llm.py +214 -0
- axon/log.py +44 -0
- axon/mining.py +671 -0
- axon/providers.py +44 -0
- axon/session.py +26 -0
- axon/wallet.py +45 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: axnwork-cli
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Axon mining CLI for the Proof of Useful Work platform
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: typer>=0.15.0
|
|
7
|
+
Requires-Dist: httpx>=0.28.0
|
|
8
|
+
Requires-Dist: litellm>=1.60.0
|
|
9
|
+
Requires-Dist: rich>=13.9.0
|
|
10
|
+
Requires-Dist: eth-account>=0.13.0
|
|
11
|
+
Requires-Dist: simple-term-menu>=1.6.0
|
|
12
|
+
Provides-Extra: test
|
|
13
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
axon/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
axon/api.py,sha256=jYtTEMy41Il8KOcUDxgtdUjHx_yE77nkV8sKiImQFl0,2654
|
|
3
|
+
axon/cli.py,sha256=a4y0TzJ2XUaqK2QuMukh0aDUHPbKnm0d_PhggUPh6nI,22314
|
|
4
|
+
axon/config.py,sha256=ggyGAKgrHvWIvW4gtP9YVJtKdF2Dft6lPfWmTKu8JyE,1613
|
|
5
|
+
axon/display.py,sha256=8LYSNXA1lza8hzcCIqBxAP91ZNrzhtle_7_98drbg2I,14524
|
|
6
|
+
axon/history.py,sha256=01ci1lR3wjFKibXzrizVi9YZwazc3wputtx5XtAIyaI,4546
|
|
7
|
+
axon/llm.py,sha256=d59C_trHn8q226s6oHZluosZy67i5IDuC1-Tk5Frt7M,10757
|
|
8
|
+
axon/log.py,sha256=-d0UvJFwyrTIFrtFDsLoOB9nExlvzEfefAh3qBpJNmY,1413
|
|
9
|
+
axon/mining.py,sha256=7lTOVYCVv1kW530bpHxLDuOWukTEKqedYbIxu3zzd2M,30873
|
|
10
|
+
axon/providers.py,sha256=Un5XPzrE2E0RWOJ897ZVbS_8S5vRHf-sDPQY4Mf_12A,2130
|
|
11
|
+
axon/session.py,sha256=Jfw_1x4Gyod5XBPo0b7PCVI5QI3gEhQgjTiw7SiPsQ4,643
|
|
12
|
+
axon/wallet.py,sha256=7ycCxPynUJM81TQL6DxoKjEepxw0-0FplvzypnPpLWY,1338
|
|
13
|
+
axon/backends/__init__.py,sha256=G4O08tKQSJKQ7a2BkNwhHvqo3CmpXMHVPiXWpnPIKiE,269
|
|
14
|
+
axon/backends/base.py,sha256=dsYvf43Qmpsa-XEtohkF2b3Oscn68nS0iJc8US6p2W8,665
|
|
15
|
+
axon/backends/claude_cli.py,sha256=LuPe3yBDNAn5acNgyXnYratycWA9gvwUAYio08ziplo,10378
|
|
16
|
+
axon/backends/codex_cli.py,sha256=zFJoyk32cLNXVXF8wzEFVqLnd_17f9ZO6kDYfGe16zY,7295
|
|
17
|
+
axon/backends/litellm_backend.py,sha256=jggn6vC676UApLUV-nOQOeUS-jFz5Zz8eC3lIGJu7c0,1636
|
|
18
|
+
axon/backends/registry.py,sha256=H-eaZdVKtowktXv9AB67O9hb55mwYMsrb1YGN1ZE2oc,1648
|
|
19
|
+
axnwork_cli-0.2.0.dist-info/METADATA,sha256=jca0xsyONsiOqA-rvGdxFSlhIC-how1YoSfI43hpTIM,398
|
|
20
|
+
axnwork_cli-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
21
|
+
axnwork_cli-0.2.0.dist-info/entry_points.txt,sha256=PvVxY7m3doaRsgAFAWnCXdkZsdzvMtT9Rdx53_Mzvpg,38
|
|
22
|
+
axnwork_cli-0.2.0.dist-info/top_level.txt,sha256=vRkPFUYD0GspqpZfeuY7c2fzJmpHIpgjOY_nnOHNwVc,5
|
|
23
|
+
axnwork_cli-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
axon
|
axon/__init__.py
ADDED
|
File without changes
|
axon/api.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Backend HTTP client with automatic wallet auth."""
|
|
2
|
+
import httpx
|
|
3
|
+
from axon.config import load_config, get_token, save_config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _ensure_auth():
|
|
7
|
+
"""Auto-authenticate with wallet if no valid token."""
|
|
8
|
+
transport = httpx.HTTPTransport(proxy=None)
|
|
9
|
+
|
|
10
|
+
token = get_token()
|
|
11
|
+
if token:
|
|
12
|
+
config = load_config()
|
|
13
|
+
try:
|
|
14
|
+
with httpx.Client(base_url=config["server_url"], timeout=5, transport=transport) as c:
|
|
15
|
+
resp = c.get("/api/auth/me", headers={"Authorization": f"Bearer {token}"})
|
|
16
|
+
if resp.status_code == 200:
|
|
17
|
+
return # Token still valid
|
|
18
|
+
except httpx.ConnectError:
|
|
19
|
+
raise
|
|
20
|
+
except Exception:
|
|
21
|
+
pass # Token expired or invalid, try re-auth below
|
|
22
|
+
|
|
23
|
+
# Token missing or expired — re-auth with wallet
|
|
24
|
+
from axon.wallet import load_wallet, sign_message
|
|
25
|
+
wallet = load_wallet()
|
|
26
|
+
if not wallet:
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
config = load_config()
|
|
30
|
+
with httpx.Client(base_url=config["server_url"], timeout=10, transport=transport) as c:
|
|
31
|
+
# Get nonce
|
|
32
|
+
resp = c.get(f"/api/auth/nonce?address={wallet['address']}")
|
|
33
|
+
if resp.status_code != 200:
|
|
34
|
+
return
|
|
35
|
+
nonce_data = resp.json()
|
|
36
|
+
|
|
37
|
+
# Sign
|
|
38
|
+
signature = sign_message(nonce_data["message"], wallet["private_key"])
|
|
39
|
+
|
|
40
|
+
# Verify
|
|
41
|
+
resp = c.post("/api/auth/verify", json={
|
|
42
|
+
"address": wallet["address"],
|
|
43
|
+
"signature": signature,
|
|
44
|
+
})
|
|
45
|
+
if resp.status_code == 200:
|
|
46
|
+
save_config({"auth_token": resp.json()["access_token"]})
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _client(auth: bool = True, timeout: int = 120) -> httpx.Client:
|
|
50
|
+
if auth:
|
|
51
|
+
_ensure_auth()
|
|
52
|
+
config = load_config()
|
|
53
|
+
headers = {"Content-Type": "application/json"}
|
|
54
|
+
token = get_token()
|
|
55
|
+
if auth and token:
|
|
56
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
57
|
+
return httpx.Client(
|
|
58
|
+
base_url=config["server_url"],
|
|
59
|
+
headers=headers,
|
|
60
|
+
timeout=timeout,
|
|
61
|
+
transport=httpx.HTTPTransport(proxy=None),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def api_get(path: str, auth: bool = True) -> dict | list:
|
|
66
|
+
with _client(auth=auth) as c:
|
|
67
|
+
resp = c.get(path)
|
|
68
|
+
resp.raise_for_status()
|
|
69
|
+
return resp.json()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def api_post(path: str, body: dict, auth: bool = True) -> dict:
|
|
73
|
+
with _client(auth=auth) as c:
|
|
74
|
+
resp = c.post(path, json=body)
|
|
75
|
+
resp.raise_for_status()
|
|
76
|
+
return resp.json()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def api_patch(path: str, body: dict | None = None, auth: bool = True) -> dict:
|
|
80
|
+
with _client(auth=auth) as c:
|
|
81
|
+
resp = c.patch(path, json=body) if body else c.patch(path)
|
|
82
|
+
resp.raise_for_status()
|
|
83
|
+
return resp.json()
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
"""Axon mining backends — litellm, claude-cli, codex-cli."""
|
|
2
|
+
from axon.backends.base import Backend, BackendResult
|
|
3
|
+
from axon.backends.registry import auto_detect_backend, create_backend
|
|
4
|
+
|
|
5
|
+
__all__ = ["Backend", "BackendResult", "auto_detect_backend", "create_backend"]
|
axon/backends/base.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Backend protocol and result type for mining LLM calls."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Protocol, TypedDict, runtime_checkable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BackendResult(TypedDict):
|
|
8
|
+
thinking: str
|
|
9
|
+
answer: str
|
|
10
|
+
usage: dict # {billing_mode, tokens, cost_usd, total_tokens, prompt_tokens, completion_tokens, cost}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@runtime_checkable
|
|
14
|
+
class Backend(Protocol):
|
|
15
|
+
name: str
|
|
16
|
+
|
|
17
|
+
def call(self, prompt: str, task: dict) -> BackendResult:
|
|
18
|
+
"""Call the backend with a prompt and task context. Returns structured result."""
|
|
19
|
+
...
|
|
20
|
+
|
|
21
|
+
def display_name(self) -> str:
|
|
22
|
+
"""Human-readable name for display in status lines."""
|
|
23
|
+
...
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Claude Code CLI backend — runs `claude -p` as subprocess."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import shlex
|
|
9
|
+
import signal
|
|
10
|
+
import subprocess
|
|
11
|
+
import time
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
from axon.backends.base import BackendResult
|
|
15
|
+
from axon.backends.registry import register
|
|
16
|
+
from axon.config import resolve_cli_timeout
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger("axon.backend.claude")
|
|
19
|
+
_STREAM_SAMPLE_LIMIT = 20
|
|
20
|
+
_STREAM_SAMPLE_BYTES = 240
|
|
21
|
+
_SUBSCRIPTION_USAGE = {
|
|
22
|
+
"billing_mode": "subscription",
|
|
23
|
+
"tokens": None,
|
|
24
|
+
"cost_usd": None,
|
|
25
|
+
"total_tokens": None,
|
|
26
|
+
"prompt_tokens": None,
|
|
27
|
+
"completion_tokens": None,
|
|
28
|
+
"cost": None,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _now_iso() -> str:
|
|
33
|
+
return datetime.now().astimezone().isoformat(timespec="seconds")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _normalize_output(output: str | bytes | None) -> str:
|
|
37
|
+
if output is None:
|
|
38
|
+
return ""
|
|
39
|
+
if isinstance(output, bytes):
|
|
40
|
+
return output.decode("utf-8", errors="replace")
|
|
41
|
+
return output
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _log_output_sample(stream_name: str, output: str):
|
|
45
|
+
if not output:
|
|
46
|
+
return
|
|
47
|
+
lines = output.splitlines()
|
|
48
|
+
for line_count, line in enumerate(lines[:_STREAM_SAMPLE_LIMIT], start=1):
|
|
49
|
+
log.info("Claude CLI %s[%d]: %s", stream_name, line_count, line[:_STREAM_SAMPLE_BYTES])
|
|
50
|
+
if len(lines) > _STREAM_SAMPLE_LIMIT:
|
|
51
|
+
log.info("Claude CLI %s: further output truncated after %d lines", stream_name, _STREAM_SAMPLE_LIMIT)
|
|
52
|
+
|
|
53
|
+
# Tool sets by eval_type
|
|
54
|
+
_TOOLS_BY_EVAL_TYPE = {
|
|
55
|
+
"code_output": "Bash,Read,Write,Grep,Glob",
|
|
56
|
+
"llm_judge": "Read,WebSearch,WebFetch,Grep,Glob",
|
|
57
|
+
}
|
|
58
|
+
_DEFAULT_TOOLS = "Read,WebSearch,Grep,Glob"
|
|
59
|
+
|
|
60
|
+
# System prompts include output format instructions (no --json-schema, which
|
|
61
|
+
# conflicts with agentic multi-turn tool use and can cause infinite retries).
|
|
62
|
+
_SYSTEM_PROMPTS = {
|
|
63
|
+
"code_output": (
|
|
64
|
+
"You are solving a coding task. Write executable code that produces the correct output. "
|
|
65
|
+
"Use Bash to test your code before submitting your final answer. "
|
|
66
|
+
"Iterate until tests pass.\n\n"
|
|
67
|
+
"YOUR FINAL MESSAGE must contain ONLY raw executable Python code.\n"
|
|
68
|
+
"Do NOT wrap it in <answer> tags. Do NOT use markdown fences. Do NOT add explanation.\n"
|
|
69
|
+
"The evaluator writes your submission directly to solution.py and executes it."
|
|
70
|
+
),
|
|
71
|
+
"llm_judge": (
|
|
72
|
+
"You are solving a research/reasoning task. Use WebSearch and WebFetch to find "
|
|
73
|
+
"accurate information. Verify facts before submitting. "
|
|
74
|
+
"Provide a thorough, well-reasoned answer in your final message."
|
|
75
|
+
),
|
|
76
|
+
}
|
|
77
|
+
_DEFAULT_SYSTEM = (
|
|
78
|
+
"You are solving a task. Use available tools to research and verify your answer. "
|
|
79
|
+
"Be thorough and accurate. Put your final answer in your last message."
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@register("claude-cli")
|
|
84
|
+
class ClaudeCLIBackend:
|
|
85
|
+
name = "claude-cli"
|
|
86
|
+
|
|
87
|
+
def __init__(self, config: dict):
|
|
88
|
+
self._timeout = resolve_cli_timeout(config)
|
|
89
|
+
self._model = config.get("claude_cli_model", "")
|
|
90
|
+
|
|
91
|
+
def call(self, prompt: str, task: dict) -> BackendResult:
|
|
92
|
+
eval_type = task.get("eval_type", "")
|
|
93
|
+
tools = _TOOLS_BY_EVAL_TYPE.get(eval_type, _DEFAULT_TOOLS)
|
|
94
|
+
system_prompt = _SYSTEM_PROMPTS.get(eval_type, _DEFAULT_SYSTEM)
|
|
95
|
+
|
|
96
|
+
# Prompt is passed via stdin (no positional arg) to avoid OS arg-length limits.
|
|
97
|
+
# No --json-schema: it conflicts with agentic tool-use and causes hangs.
|
|
98
|
+
cmd = [
|
|
99
|
+
"claude", "-p",
|
|
100
|
+
"--output-format", "json",
|
|
101
|
+
"--allowedTools", tools,
|
|
102
|
+
"--system-prompt", system_prompt,
|
|
103
|
+
"--dangerously-skip-permissions",
|
|
104
|
+
]
|
|
105
|
+
if self._model:
|
|
106
|
+
cmd.extend(["--model", self._model])
|
|
107
|
+
|
|
108
|
+
started_at = _now_iso()
|
|
109
|
+
started_mono = time.monotonic()
|
|
110
|
+
timeout_label = "none" if self._timeout is None else f"{self._timeout}s"
|
|
111
|
+
log.info(
|
|
112
|
+
"Claude CLI start started_at=%s eval_type=%s tools=%s timeout=%s prompt_chars=%d cmd=%s",
|
|
113
|
+
started_at,
|
|
114
|
+
eval_type,
|
|
115
|
+
tools,
|
|
116
|
+
timeout_label,
|
|
117
|
+
len(prompt),
|
|
118
|
+
shlex.join(cmd),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Clear env vars that make Claude CLI refuse to run inside another session
|
|
122
|
+
_blocked_env = {"CLAUDECODE", "CLAUDE_CODE_ENTRYPOINT"}
|
|
123
|
+
env = {k: v for k, v in os.environ.items() if k not in _blocked_env}
|
|
124
|
+
|
|
125
|
+
# start_new_session creates a process group so we can kill all children
|
|
126
|
+
proc = subprocess.Popen(
|
|
127
|
+
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
128
|
+
text=True, start_new_session=True, env=env,
|
|
129
|
+
)
|
|
130
|
+
try:
|
|
131
|
+
stdout, stderr = proc.communicate(input=prompt, timeout=self._timeout)
|
|
132
|
+
except subprocess.TimeoutExpired as exc:
|
|
133
|
+
stdout = _normalize_output(exc.stdout)
|
|
134
|
+
stderr = _normalize_output(exc.stderr)
|
|
135
|
+
_kill_process_group(proc)
|
|
136
|
+
_log_output_sample("stdout", stdout)
|
|
137
|
+
_log_output_sample("stderr", stderr)
|
|
138
|
+
log.error(
|
|
139
|
+
"Claude CLI timeout started_at=%s finished_at=%s duration_s=%.2f cmd=%s",
|
|
140
|
+
started_at,
|
|
141
|
+
_now_iso(),
|
|
142
|
+
time.monotonic() - started_mono,
|
|
143
|
+
shlex.join(cmd),
|
|
144
|
+
)
|
|
145
|
+
raise TimeoutError(f"Claude CLI timed out after {self._timeout}s") from None
|
|
146
|
+
|
|
147
|
+
_log_output_sample("stdout", stdout)
|
|
148
|
+
_log_output_sample("stderr", stderr)
|
|
149
|
+
if proc.returncode != 0:
|
|
150
|
+
log.error(
|
|
151
|
+
"Claude CLI failed started_at=%s finished_at=%s duration_s=%.2f returncode=%s cmd=%s stderr=%s",
|
|
152
|
+
started_at,
|
|
153
|
+
_now_iso(),
|
|
154
|
+
time.monotonic() - started_mono,
|
|
155
|
+
proc.returncode,
|
|
156
|
+
shlex.join(cmd),
|
|
157
|
+
stderr[:1000],
|
|
158
|
+
)
|
|
159
|
+
raise RuntimeError(f"Claude CLI exited with code {proc.returncode}: {stderr[:500]}")
|
|
160
|
+
|
|
161
|
+
if stderr:
|
|
162
|
+
log.debug("Claude CLI stderr: %s", stderr[:500])
|
|
163
|
+
|
|
164
|
+
log.info(
|
|
165
|
+
"Claude CLI finished started_at=%s finished_at=%s duration_s=%.2f returncode=%s stdout_chars=%d stderr_chars=%d",
|
|
166
|
+
started_at,
|
|
167
|
+
_now_iso(),
|
|
168
|
+
time.monotonic() - started_mono,
|
|
169
|
+
proc.returncode,
|
|
170
|
+
len(stdout),
|
|
171
|
+
len(stderr),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
return _parse_response(stdout)
|
|
175
|
+
|
|
176
|
+
def display_name(self) -> str:
|
|
177
|
+
return f"claude-cli{f' ({self._model})' if self._model else ''}"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _kill_process_group(proc: subprocess.Popen):
|
|
181
|
+
"""Kill the process and its entire process group."""
|
|
182
|
+
try:
|
|
183
|
+
pgid = os.getpgid(proc.pid)
|
|
184
|
+
os.killpg(pgid, signal.SIGTERM)
|
|
185
|
+
except (ProcessLookupError, OSError):
|
|
186
|
+
try:
|
|
187
|
+
proc.kill()
|
|
188
|
+
except OSError:
|
|
189
|
+
pass
|
|
190
|
+
try:
|
|
191
|
+
proc.wait(timeout=5)
|
|
192
|
+
except subprocess.TimeoutExpired:
|
|
193
|
+
try:
|
|
194
|
+
pgid = os.getpgid(proc.pid)
|
|
195
|
+
os.killpg(pgid, signal.SIGKILL)
|
|
196
|
+
except (ProcessLookupError, OSError):
|
|
197
|
+
try:
|
|
198
|
+
proc.kill()
|
|
199
|
+
except OSError:
|
|
200
|
+
pass
|
|
201
|
+
try:
|
|
202
|
+
proc.wait(timeout=3)
|
|
203
|
+
except subprocess.TimeoutExpired:
|
|
204
|
+
log.warning("Process %d did not exit after SIGKILL", proc.pid)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _parse_response(stdout: str) -> BackendResult:
|
|
208
|
+
"""Parse Claude CLI JSON output.
|
|
209
|
+
|
|
210
|
+
`claude -p --output-format json` returns a single JSON object:
|
|
211
|
+
{type: "result", result: "...", total_cost_usd: ..., usage: {...}}
|
|
212
|
+
"""
|
|
213
|
+
stdout = stdout.strip()
|
|
214
|
+
if not stdout:
|
|
215
|
+
raise RuntimeError("Claude CLI returned empty output")
|
|
216
|
+
|
|
217
|
+
data = json.loads(stdout)
|
|
218
|
+
|
|
219
|
+
# Extract the result text and usage from the response envelope
|
|
220
|
+
if isinstance(data, dict) and data.get("type") == "result":
|
|
221
|
+
content = data.get("result", "")
|
|
222
|
+
usage = _extract_usage(data)
|
|
223
|
+
elif isinstance(data, list):
|
|
224
|
+
# Fallback: older array format [{type:"system",...}, {type:"result",...}]
|
|
225
|
+
result_block = None
|
|
226
|
+
for block in data:
|
|
227
|
+
if isinstance(block, dict) and block.get("type") == "result":
|
|
228
|
+
result_block = block
|
|
229
|
+
break
|
|
230
|
+
if result_block is None:
|
|
231
|
+
raise RuntimeError("No result block found in Claude CLI output")
|
|
232
|
+
content = result_block.get("result", "")
|
|
233
|
+
usage = _extract_usage(result_block)
|
|
234
|
+
elif isinstance(data, dict):
|
|
235
|
+
# Direct dict with thinking/answer keys
|
|
236
|
+
return BackendResult(
|
|
237
|
+
thinking=data.get("thinking", ""),
|
|
238
|
+
answer=data.get("answer", str(data)),
|
|
239
|
+
usage=dict(_SUBSCRIPTION_USAGE),
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
raise RuntimeError(f"Unexpected Claude CLI output type: {type(data)}")
|
|
243
|
+
|
|
244
|
+
return _extract_answer(content, usage)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _extract_usage(envelope: dict) -> dict:
|
|
248
|
+
"""Return subscription usage — Claude CLI is subscription-based, not metered."""
|
|
249
|
+
return dict(_SUBSCRIPTION_USAGE)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _extract_answer(content, usage: dict) -> BackendResult:
|
|
253
|
+
"""Parse the result field into thinking + answer."""
|
|
254
|
+
# If content is already a dict, extract fields
|
|
255
|
+
if isinstance(content, dict):
|
|
256
|
+
return BackendResult(
|
|
257
|
+
thinking=content.get("thinking", ""),
|
|
258
|
+
answer=content.get("answer", str(content)),
|
|
259
|
+
usage=usage,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
text = str(content).strip()
|
|
263
|
+
|
|
264
|
+
# Try 1: Parse as JSON object with thinking/answer
|
|
265
|
+
try:
|
|
266
|
+
parsed = json.loads(text)
|
|
267
|
+
if isinstance(parsed, dict) and "answer" in parsed:
|
|
268
|
+
return BackendResult(
|
|
269
|
+
thinking=parsed.get("thinking", ""),
|
|
270
|
+
answer=parsed["answer"],
|
|
271
|
+
usage=usage,
|
|
272
|
+
)
|
|
273
|
+
except (json.JSONDecodeError, TypeError):
|
|
274
|
+
pass
|
|
275
|
+
|
|
276
|
+
# Try 2: Extract <thinking> and <answer> XML tags
|
|
277
|
+
think_match = re.search(r"<thinking>(.*?)</thinking>", text, re.DOTALL)
|
|
278
|
+
answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
|
|
279
|
+
if answer_match:
|
|
280
|
+
return BackendResult(
|
|
281
|
+
thinking=think_match.group(1).strip() if think_match else "",
|
|
282
|
+
answer=answer_match.group(1).strip(),
|
|
283
|
+
usage=usage,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Try 3: Strip markdown fences (common in code responses)
|
|
287
|
+
cleaned = re.sub(r"^```[\w]*\s*\n?", "", text)
|
|
288
|
+
cleaned = re.sub(r"\n?\s*```\s*$", "", cleaned)
|
|
289
|
+
|
|
290
|
+
return BackendResult(thinking="", answer=cleaned.strip(), usage=usage)
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""OpenAI Codex CLI backend — runs `codex exec` as subprocess."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import shlex
|
|
9
|
+
import signal
|
|
10
|
+
import subprocess
|
|
11
|
+
import time
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
from axon.backends.base import BackendResult
|
|
15
|
+
from axon.backends.registry import register
|
|
16
|
+
from axon.config import resolve_cli_timeout
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger("axon.backend.codex")
|
|
19
|
+
_STREAM_SAMPLE_LIMIT = 20
|
|
20
|
+
_STREAM_SAMPLE_BYTES = 240
|
|
21
|
+
_SUBSCRIPTION_USAGE = {
|
|
22
|
+
"billing_mode": "subscription",
|
|
23
|
+
"tokens": None,
|
|
24
|
+
"cost_usd": None,
|
|
25
|
+
"total_tokens": None,
|
|
26
|
+
"prompt_tokens": None,
|
|
27
|
+
"completion_tokens": None,
|
|
28
|
+
"cost": None,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _now_iso() -> str:
|
|
33
|
+
return datetime.now().astimezone().isoformat(timespec="seconds")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _normalize_output(output: str | bytes | None) -> str:
|
|
37
|
+
if output is None:
|
|
38
|
+
return ""
|
|
39
|
+
if isinstance(output, bytes):
|
|
40
|
+
return output.decode("utf-8", errors="replace")
|
|
41
|
+
return output
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _log_output_sample(stream_name: str, output: str):
|
|
45
|
+
if not output:
|
|
46
|
+
return
|
|
47
|
+
lines = output.splitlines()
|
|
48
|
+
for line_count, line in enumerate(lines[:_STREAM_SAMPLE_LIMIT], start=1):
|
|
49
|
+
log.info("Codex CLI %s[%d]: %s", stream_name, line_count, line[:_STREAM_SAMPLE_BYTES])
|
|
50
|
+
if len(lines) > _STREAM_SAMPLE_LIMIT:
|
|
51
|
+
log.info("Codex CLI %s: further output truncated after %d lines", stream_name, _STREAM_SAMPLE_LIMIT)
|
|
52
|
+
|
|
53
|
+
# Codex doesn't support --json-schema, so we embed format instructions in the prompt
|
|
54
|
+
_FORMAT_WRAPPER = """
|
|
55
|
+
{prompt}
|
|
56
|
+
|
|
57
|
+
## OUTPUT FORMAT
|
|
58
|
+
You MUST output your response as valid JSON with exactly this structure:
|
|
59
|
+
{{"thinking": "your step-by-step reasoning", "answer": "your final answer"}}
|
|
60
|
+
|
|
61
|
+
If the task expects code, the "answer" field must contain ONLY raw executable Python code.
|
|
62
|
+
Do NOT include XML tags, markdown fences, or prose in the "answer" field.
|
|
63
|
+
|
|
64
|
+
Output ONLY the JSON object, nothing else.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@register("codex-cli")
|
|
69
|
+
class CodexCLIBackend:
|
|
70
|
+
name = "codex-cli"
|
|
71
|
+
|
|
72
|
+
def __init__(self, config: dict):
|
|
73
|
+
self._timeout = resolve_cli_timeout(config)
|
|
74
|
+
self._model = config.get("codex_cli_model", "")
|
|
75
|
+
|
|
76
|
+
def call(self, prompt: str, task: dict) -> BackendResult:
|
|
77
|
+
wrapped_prompt = _FORMAT_WRAPPER.format(prompt=prompt).strip()
|
|
78
|
+
|
|
79
|
+
# Prompt passed via stdin ("-" flag) to avoid OS arg-length limits
|
|
80
|
+
cmd = ["codex", "exec", "-", "--full-auto"]
|
|
81
|
+
if self._model:
|
|
82
|
+
cmd.extend(["--model", self._model])
|
|
83
|
+
|
|
84
|
+
started_at = _now_iso()
|
|
85
|
+
started_mono = time.monotonic()
|
|
86
|
+
timeout_label = "none" if self._timeout is None else f"{self._timeout}s"
|
|
87
|
+
log.info(
|
|
88
|
+
"Codex CLI start started_at=%s timeout=%s prompt_chars=%d cmd=%s",
|
|
89
|
+
started_at,
|
|
90
|
+
timeout_label,
|
|
91
|
+
len(wrapped_prompt),
|
|
92
|
+
shlex.join(cmd),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# start_new_session creates a process group so we can kill all children
|
|
96
|
+
proc = subprocess.Popen(
|
|
97
|
+
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
98
|
+
text=True, start_new_session=True,
|
|
99
|
+
)
|
|
100
|
+
try:
|
|
101
|
+
stdout, stderr = proc.communicate(input=wrapped_prompt, timeout=self._timeout)
|
|
102
|
+
except subprocess.TimeoutExpired as exc:
|
|
103
|
+
stdout = _normalize_output(exc.stdout)
|
|
104
|
+
stderr = _normalize_output(exc.stderr)
|
|
105
|
+
_kill_process_group(proc)
|
|
106
|
+
_log_output_sample("stdout", stdout)
|
|
107
|
+
_log_output_sample("stderr", stderr)
|
|
108
|
+
log.error(
|
|
109
|
+
"Codex CLI timeout started_at=%s finished_at=%s duration_s=%.2f cmd=%s",
|
|
110
|
+
started_at,
|
|
111
|
+
_now_iso(),
|
|
112
|
+
time.monotonic() - started_mono,
|
|
113
|
+
shlex.join(cmd),
|
|
114
|
+
)
|
|
115
|
+
raise TimeoutError(f"Codex CLI timed out after {self._timeout}s")
|
|
116
|
+
|
|
117
|
+
_log_output_sample("stdout", stdout)
|
|
118
|
+
_log_output_sample("stderr", stderr)
|
|
119
|
+
if proc.returncode != 0:
|
|
120
|
+
log.error(
|
|
121
|
+
"Codex CLI failed started_at=%s finished_at=%s duration_s=%.2f returncode=%s cmd=%s stderr=%s",
|
|
122
|
+
started_at,
|
|
123
|
+
_now_iso(),
|
|
124
|
+
time.monotonic() - started_mono,
|
|
125
|
+
proc.returncode,
|
|
126
|
+
shlex.join(cmd),
|
|
127
|
+
stderr[:1000],
|
|
128
|
+
)
|
|
129
|
+
raise RuntimeError(f"Codex CLI exited with code {proc.returncode}: {stderr[:500]}")
|
|
130
|
+
|
|
131
|
+
if stderr:
|
|
132
|
+
log.debug("Codex CLI stderr: %s", stderr[:500])
|
|
133
|
+
|
|
134
|
+
log.info(
|
|
135
|
+
"Codex CLI finished started_at=%s finished_at=%s duration_s=%.2f returncode=%s stdout_chars=%d stderr_chars=%d",
|
|
136
|
+
started_at,
|
|
137
|
+
_now_iso(),
|
|
138
|
+
time.monotonic() - started_mono,
|
|
139
|
+
proc.returncode,
|
|
140
|
+
len(stdout),
|
|
141
|
+
len(stderr),
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return _parse_response(stdout)
|
|
145
|
+
|
|
146
|
+
def display_name(self) -> str:
|
|
147
|
+
return f"codex-cli{f' ({self._model})' if self._model else ''}"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _kill_process_group(proc: subprocess.Popen):
|
|
151
|
+
"""Kill the process and its entire process group."""
|
|
152
|
+
try:
|
|
153
|
+
pgid = os.getpgid(proc.pid)
|
|
154
|
+
os.killpg(pgid, signal.SIGTERM)
|
|
155
|
+
except (ProcessLookupError, OSError):
|
|
156
|
+
try:
|
|
157
|
+
proc.kill()
|
|
158
|
+
except OSError:
|
|
159
|
+
pass
|
|
160
|
+
try:
|
|
161
|
+
proc.wait(timeout=5)
|
|
162
|
+
except subprocess.TimeoutExpired:
|
|
163
|
+
try:
|
|
164
|
+
pgid = os.getpgid(proc.pid)
|
|
165
|
+
os.killpg(pgid, signal.SIGKILL)
|
|
166
|
+
except (ProcessLookupError, OSError):
|
|
167
|
+
try:
|
|
168
|
+
proc.kill()
|
|
169
|
+
except OSError:
|
|
170
|
+
pass
|
|
171
|
+
proc.wait(timeout=3)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _parse_response(stdout: str) -> BackendResult:
|
|
175
|
+
"""Parse Codex CLI output. Tries to extract JSON {thinking, answer}."""
|
|
176
|
+
stdout = stdout.strip()
|
|
177
|
+
if not stdout:
|
|
178
|
+
raise RuntimeError("Codex CLI returned empty output")
|
|
179
|
+
|
|
180
|
+
# Try to parse as JSON directly
|
|
181
|
+
try:
|
|
182
|
+
data = json.loads(stdout)
|
|
183
|
+
if isinstance(data, dict) and "answer" in data:
|
|
184
|
+
return BackendResult(
|
|
185
|
+
thinking=data.get("thinking", ""),
|
|
186
|
+
answer=data["answer"],
|
|
187
|
+
usage=dict(_SUBSCRIPTION_USAGE),
|
|
188
|
+
)
|
|
189
|
+
except json.JSONDecodeError:
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
# Try to find JSON object within the output
|
|
193
|
+
match = re.search(r'\{[^{}]*"answer"\s*:\s*"[^"]*"[^{}]*\}', stdout, re.DOTALL)
|
|
194
|
+
if match:
|
|
195
|
+
try:
|
|
196
|
+
data = json.loads(match.group())
|
|
197
|
+
return BackendResult(
|
|
198
|
+
thinking=data.get("thinking", ""),
|
|
199
|
+
answer=data["answer"],
|
|
200
|
+
usage=dict(_SUBSCRIPTION_USAGE),
|
|
201
|
+
)
|
|
202
|
+
except json.JSONDecodeError:
|
|
203
|
+
pass
|
|
204
|
+
|
|
205
|
+
# Try more aggressive JSON extraction with multiline
|
|
206
|
+
match = re.search(r'\{[\s\S]*?"thinking"[\s\S]*?"answer"[\s\S]*?\}', stdout)
|
|
207
|
+
if match:
|
|
208
|
+
try:
|
|
209
|
+
data = json.loads(match.group())
|
|
210
|
+
return BackendResult(
|
|
211
|
+
thinking=data.get("thinking", ""),
|
|
212
|
+
answer=data["answer"],
|
|
213
|
+
usage=dict(_SUBSCRIPTION_USAGE),
|
|
214
|
+
)
|
|
215
|
+
except json.JSONDecodeError:
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
# Fallback: treat entire output as answer
|
|
219
|
+
return BackendResult(
|
|
220
|
+
thinking="",
|
|
221
|
+
answer=stdout,
|
|
222
|
+
usage=dict(_SUBSCRIPTION_USAGE),
|
|
223
|
+
)
|