axnwork-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- axnwork_cli-0.2.0.dist-info/METADATA +13 -0
- axnwork_cli-0.2.0.dist-info/RECORD +23 -0
- axnwork_cli-0.2.0.dist-info/WHEEL +5 -0
- axnwork_cli-0.2.0.dist-info/entry_points.txt +2 -0
- axnwork_cli-0.2.0.dist-info/top_level.txt +1 -0
- axon/__init__.py +0 -0
- axon/api.py +83 -0
- axon/backends/__init__.py +5 -0
- axon/backends/base.py +23 -0
- axon/backends/claude_cli.py +290 -0
- axon/backends/codex_cli.py +223 -0
- axon/backends/litellm_backend.py +51 -0
- axon/backends/registry.py +61 -0
- axon/cli.py +595 -0
- axon/config.py +55 -0
- axon/display.py +364 -0
- axon/history.py +133 -0
- axon/llm.py +214 -0
- axon/log.py +44 -0
- axon/mining.py +671 -0
- axon/providers.py +44 -0
- axon/session.py +26 -0
- axon/wallet.py +45 -0
axon/llm.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""LLM integration — call Anthropic/OpenAI/Ollama via litellm."""
|
|
2
|
+
import re
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from axon.config import load_config
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_prompt(task, my_best_answer, my_best_score, platform_best_score, last_feedback=None, community_subs=None, my_past_subs=None):
|
|
9
|
+
direction_text = "higher is better" if task.get("direction") == "maximize" else "lower is better"
|
|
10
|
+
is_code = task.get("eval_type") == "code_output"
|
|
11
|
+
|
|
12
|
+
prompt = f"# Task: {task['title']}\n\n## Description\n{task['description']}\n\n"
|
|
13
|
+
prompt += f"## Evaluation\nScored using: {task['eval_type']}\nDirection: {task['direction']} ({direction_text})\n"
|
|
14
|
+
prompt += f"Completion threshold: {task['completion_threshold']}\n"
|
|
15
|
+
prompt += f"Platform best: {platform_best_score if platform_best_score is not None else 'None (no submissions yet)'}\n"
|
|
16
|
+
|
|
17
|
+
if community_subs:
|
|
18
|
+
prompt += "\n## Community Submissions (top answers from other miners)\n"
|
|
19
|
+
for i, sub in enumerate(community_subs[:3], 1):
|
|
20
|
+
score = sub.get("score")
|
|
21
|
+
score_str = f"{score:.4f}" if score is not None else "N/A"
|
|
22
|
+
prompt += f"\n### Submission #{i} (score: {score_str})\n"
|
|
23
|
+
if sub.get("answer"):
|
|
24
|
+
answer_preview = sub["answer"][:2000]
|
|
25
|
+
prompt += f"```\n{answer_preview}\n```\n"
|
|
26
|
+
|
|
27
|
+
if my_past_subs:
|
|
28
|
+
recent_subs = my_past_subs[-10:]
|
|
29
|
+
prompt += "\n## Your Past Submissions (DO NOT repeat these answers)\n"
|
|
30
|
+
for i, sub in enumerate(recent_subs, 1):
|
|
31
|
+
score = sub.get("score")
|
|
32
|
+
eval_status = sub.get("eval_status", "unknown")
|
|
33
|
+
error = sub.get("eval_error")
|
|
34
|
+
if error:
|
|
35
|
+
prompt += f" #{i} score=error status={eval_status} error={error[:120]}\n"
|
|
36
|
+
elif score is not None:
|
|
37
|
+
prompt += f" #{i} score={score:.4f} status={eval_status}\n"
|
|
38
|
+
else:
|
|
39
|
+
prompt += f" #{i} score=N/A status={eval_status}\n"
|
|
40
|
+
answer_text = sub.get("answer")
|
|
41
|
+
if answer_text:
|
|
42
|
+
preview = answer_text[:500]
|
|
43
|
+
if len(answer_text) > 500:
|
|
44
|
+
preview += "... (truncated)"
|
|
45
|
+
prompt += f" Answer: {preview}\n"
|
|
46
|
+
prompt += "\nAvoid submitting answers similar to the ones above. Try a different approach.\n"
|
|
47
|
+
|
|
48
|
+
if my_best_answer is not None and my_best_score is not None:
|
|
49
|
+
prompt += f"\n## Your Current Best\nScore: {my_best_score}\nAnswer:\n{my_best_answer}\n"
|
|
50
|
+
|
|
51
|
+
if last_feedback:
|
|
52
|
+
prompt += "\n## Last Round Feedback\n"
|
|
53
|
+
if last_feedback.get("error"):
|
|
54
|
+
prompt += f"Status: ERROR\nError: {last_feedback['error']}\n"
|
|
55
|
+
if last_feedback.get("details", {}).get("stderr"):
|
|
56
|
+
prompt += f"Stderr:\n{str(last_feedback['details']['stderr'])[:500]}\n"
|
|
57
|
+
prompt += f"\nYour submission that caused this error:\n{last_feedback.get('answer', '')}\n"
|
|
58
|
+
prompt += "\nFix the error and try again.\n"
|
|
59
|
+
else:
|
|
60
|
+
status = "improved (but not yet completed)" if last_feedback.get("improved") else "no improvement"
|
|
61
|
+
prompt += f"Score: {last_feedback.get('score')}\nStatus: {status}\n"
|
|
62
|
+
if last_feedback.get("details", {}).get("stdout"):
|
|
63
|
+
prompt += f"Eval output:\n{str(last_feedback['details']['stdout'])[:300]}\n"
|
|
64
|
+
prompt += "\nAnalyze the eval output and your score. Find ways to improve.\n"
|
|
65
|
+
elif not my_best_answer:
|
|
66
|
+
prompt += "\nThis is your first attempt. Think carefully.\n"
|
|
67
|
+
|
|
68
|
+
if is_code:
|
|
69
|
+
prompt += (
|
|
70
|
+
"\n## CRITICAL RULES\n"
|
|
71
|
+
"1. Do NOT hardcode test data. Input comes via function parameters.\n"
|
|
72
|
+
"2. Functions must work with ANY input.\n"
|
|
73
|
+
"3. The evaluator writes your submission directly to solution.py and executes it.\n"
|
|
74
|
+
"4. Inside <answer>, include ONLY raw executable Python code.\n"
|
|
75
|
+
"5. Do NOT include XML tags, prose, or markdown fences inside the submitted code.\n\n"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
prompt += f"## OUTPUT FORMAT\n<thinking>your reasoning</thinking>\n<answer>{'RAW EXECUTABLE PYTHON CODE ONLY' if is_code else 'your answer'}</answer>\n"
|
|
79
|
+
return prompt
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def build_agent_prompt(task, my_best_answer, my_best_score, platform_best_score, last_feedback=None, community_subs=None, my_past_subs=None):
|
|
83
|
+
"""Build prompt for CLI agent backends (claude-cli, codex-cli).
|
|
84
|
+
|
|
85
|
+
Unlike build_prompt(), this omits XML output format instructions (CLI backends
|
|
86
|
+
use --json-schema or embedded JSON format) and adds tool usage guidance.
|
|
87
|
+
"""
|
|
88
|
+
direction_text = "higher is better" if task.get("direction") == "maximize" else "lower is better"
|
|
89
|
+
is_code = task.get("eval_type") == "code_output"
|
|
90
|
+
eval_type = task.get("eval_type", "")
|
|
91
|
+
|
|
92
|
+
prompt = f"# Task: {task['title']}\n\n## Description\n{task['description']}\n\n"
|
|
93
|
+
prompt += f"## Evaluation\nScored using: {eval_type}\nDirection: {task['direction']} ({direction_text})\n"
|
|
94
|
+
prompt += f"Completion threshold: {task['completion_threshold']}\n"
|
|
95
|
+
prompt += f"Platform best: {platform_best_score if platform_best_score is not None else 'None (no submissions yet)'}\n"
|
|
96
|
+
|
|
97
|
+
if community_subs:
|
|
98
|
+
prompt += "\n## Community Submissions (top answers from other miners)\n"
|
|
99
|
+
for i, sub in enumerate(community_subs[:3], 1):
|
|
100
|
+
score = sub.get("score")
|
|
101
|
+
score_str = f"{score:.4f}" if score is not None else "N/A"
|
|
102
|
+
prompt += f"\n### Submission #{i} (score: {score_str})\n"
|
|
103
|
+
if sub.get("answer"):
|
|
104
|
+
answer_preview = sub["answer"][:2000]
|
|
105
|
+
prompt += f"```\n{answer_preview}\n```\n"
|
|
106
|
+
|
|
107
|
+
if my_past_subs:
|
|
108
|
+
recent_subs = my_past_subs[-10:]
|
|
109
|
+
prompt += "\n## Your Past Submissions (DO NOT repeat these answers)\n"
|
|
110
|
+
for i, sub in enumerate(recent_subs, 1):
|
|
111
|
+
score = sub.get("score")
|
|
112
|
+
eval_status = sub.get("eval_status", "unknown")
|
|
113
|
+
error = sub.get("eval_error")
|
|
114
|
+
if error:
|
|
115
|
+
prompt += f" #{i} score=error status={eval_status} error={error[:120]}\n"
|
|
116
|
+
elif score is not None:
|
|
117
|
+
prompt += f" #{i} score={score:.4f} status={eval_status}\n"
|
|
118
|
+
else:
|
|
119
|
+
prompt += f" #{i} score=N/A status={eval_status}\n"
|
|
120
|
+
answer_text = sub.get("answer")
|
|
121
|
+
if answer_text:
|
|
122
|
+
preview = answer_text[:500]
|
|
123
|
+
if len(answer_text) > 500:
|
|
124
|
+
preview += "... (truncated)"
|
|
125
|
+
prompt += f" Answer: {preview}\n"
|
|
126
|
+
prompt += "\nAvoid submitting answers similar to the ones above. Try a different approach.\n"
|
|
127
|
+
|
|
128
|
+
if my_best_answer is not None and my_best_score is not None:
|
|
129
|
+
prompt += f"\n## Your Current Best\nScore: {my_best_score}\nAnswer:\n{my_best_answer}\n"
|
|
130
|
+
|
|
131
|
+
if last_feedback:
|
|
132
|
+
prompt += "\n## Last Round Feedback\n"
|
|
133
|
+
if last_feedback.get("error"):
|
|
134
|
+
prompt += f"Status: ERROR\nError: {last_feedback['error']}\n"
|
|
135
|
+
if last_feedback.get("details", {}).get("stderr"):
|
|
136
|
+
prompt += f"Stderr:\n{str(last_feedback['details']['stderr'])[:500]}\n"
|
|
137
|
+
prompt += f"\nYour submission that caused this error:\n{last_feedback.get('answer', '')}\n"
|
|
138
|
+
prompt += "\nFix the error and try again.\n"
|
|
139
|
+
else:
|
|
140
|
+
status = "improved (but not yet completed)" if last_feedback.get("improved") else "no improvement"
|
|
141
|
+
prompt += f"Score: {last_feedback.get('score')}\nStatus: {status}\n"
|
|
142
|
+
if last_feedback.get("details", {}).get("stdout"):
|
|
143
|
+
prompt += f"Eval output:\n{str(last_feedback['details']['stdout'])[:300]}\n"
|
|
144
|
+
prompt += "\nAnalyze the eval output and your score. Find ways to improve.\n"
|
|
145
|
+
elif not my_best_answer:
|
|
146
|
+
prompt += "\nThis is your first attempt. Think carefully.\n"
|
|
147
|
+
|
|
148
|
+
# Tool usage guidance and evaluator-aligned submission rules.
|
|
149
|
+
if is_code:
|
|
150
|
+
prompt += (
|
|
151
|
+
"\n## CRITICAL RULES\n"
|
|
152
|
+
"1. Do NOT hardcode test data. Input comes via function parameters.\n"
|
|
153
|
+
"2. Functions must work with ANY input.\n"
|
|
154
|
+
"3. Use Bash to test your code before submitting your final answer.\n"
|
|
155
|
+
"4. Your final answer must be raw executable Python code only.\n"
|
|
156
|
+
"5. Do NOT wrap it in XML tags, markdown fences, or prose.\n"
|
|
157
|
+
"6. The evaluator writes your submission directly to solution.py and executes it.\n\n"
|
|
158
|
+
)
|
|
159
|
+
elif eval_type == "llm_judge":
|
|
160
|
+
prompt += "\n## APPROACH\nUse WebSearch and WebFetch to research the topic. Verify facts before answering.\n\n"
|
|
161
|
+
|
|
162
|
+
return prompt
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _parse_response(text):
|
|
166
|
+
think_match = re.search(r"<thinking>(.*?)</thinking>", text, re.DOTALL)
|
|
167
|
+
answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
|
|
168
|
+
thinking = think_match.group(1).strip() if think_match else text
|
|
169
|
+
answer = answer_match.group(1).strip() if answer_match else text.strip()
|
|
170
|
+
# Strip code fences
|
|
171
|
+
answer = re.sub(r"```[\w]*\s*\n?", "", answer)
|
|
172
|
+
answer = re.sub(r"\n?\s*```", "", answer)
|
|
173
|
+
return thinking, answer.strip()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def call_llm(prompt, model, api_base=""):
|
|
177
|
+
"""Call LLM via litellm. Returns (thinking, answer, usage)."""
|
|
178
|
+
# Set API keys from config
|
|
179
|
+
config = load_config()
|
|
180
|
+
keys = config.get("api_keys", {})
|
|
181
|
+
for provider, env_var in [("anthropic", "ANTHROPIC_API_KEY"), ("openai", "OPENAI_API_KEY"), ("deepseek", "DEEPSEEK_API_KEY")]:
|
|
182
|
+
if keys.get(provider) and not os.environ.get(env_var):
|
|
183
|
+
os.environ[env_var] = keys[provider]
|
|
184
|
+
|
|
185
|
+
# Guard: litellm's proxy_cli.py calls os.getcwd() at import time,
|
|
186
|
+
# which crashes if the CWD no longer exists (e.g. deleted temp dir).
|
|
187
|
+
try:
|
|
188
|
+
os.getcwd()
|
|
189
|
+
except (FileNotFoundError, OSError):
|
|
190
|
+
os.chdir(os.path.expanduser("~"))
|
|
191
|
+
|
|
192
|
+
import litellm
|
|
193
|
+
litellm.suppress_debug_info = True
|
|
194
|
+
from litellm import completion, completion_cost
|
|
195
|
+
|
|
196
|
+
kwargs = {"model": model, "messages": [{"role": "user", "content": prompt}], "max_tokens": 4096}
|
|
197
|
+
if api_base:
|
|
198
|
+
kwargs["api_base"] = api_base
|
|
199
|
+
|
|
200
|
+
response = completion(**kwargs)
|
|
201
|
+
text = response.choices[0].message.content
|
|
202
|
+
|
|
203
|
+
usage = {}
|
|
204
|
+
if hasattr(response, "usage") and response.usage:
|
|
205
|
+
usage["prompt_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
|
|
206
|
+
usage["completion_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
|
|
207
|
+
usage["total_tokens"] = usage["prompt_tokens"] + usage["completion_tokens"]
|
|
208
|
+
try:
|
|
209
|
+
usage["cost"] = completion_cost(completion_response=response) or 0.0
|
|
210
|
+
except Exception:
|
|
211
|
+
usage["cost"] = 0.0
|
|
212
|
+
|
|
213
|
+
thinking, answer = _parse_response(text)
|
|
214
|
+
return thinking, answer, usage
|
axon/log.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Centralized logging — all errors auto-saved to ~/.axon/logs/."""
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from axon.config import AXON_HOME
|
|
6
|
+
|
|
7
|
+
LOG_DIR = AXON_HOME / "logs"
|
|
8
|
+
LOG_FILE = LOG_DIR / "axon.log"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def setup_logging():
|
|
12
|
+
"""Configure logging: errors go to file, always. Call once at startup."""
|
|
13
|
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
|
|
15
|
+
file_handler = logging.FileHandler(LOG_FILE, encoding="utf-8")
|
|
16
|
+
file_handler.setLevel(logging.DEBUG)
|
|
17
|
+
file_handler.setFormatter(logging.Formatter(
|
|
18
|
+
"%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
19
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
20
|
+
))
|
|
21
|
+
|
|
22
|
+
root = logging.getLogger()
|
|
23
|
+
root.setLevel(logging.DEBUG)
|
|
24
|
+
root.addHandler(file_handler)
|
|
25
|
+
|
|
26
|
+
# Suppress noisy third-party loggers from leaking to stdout
|
|
27
|
+
for name in ("LiteLLM", "litellm", "httpx", "httpcore"):
|
|
28
|
+
logging.getLogger(name).setLevel(logging.WARNING)
|
|
29
|
+
|
|
30
|
+
# Also catch unhandled exceptions
|
|
31
|
+
_original_excepthook = sys.excepthook
|
|
32
|
+
|
|
33
|
+
def _excepthook(exc_type, exc_value, exc_tb):
|
|
34
|
+
if not issubclass(exc_type, (KeyboardInterrupt, SystemExit)):
|
|
35
|
+
logging.getLogger("axon").critical(
|
|
36
|
+
"Unhandled exception", exc_info=(exc_type, exc_value, exc_tb),
|
|
37
|
+
)
|
|
38
|
+
_original_excepthook(exc_type, exc_value, exc_tb)
|
|
39
|
+
|
|
40
|
+
sys.excepthook = _excepthook
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_logger(name: str = "axon") -> logging.Logger:
|
|
44
|
+
return logging.getLogger(name)
|