axnwork-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- axnwork_cli-0.2.0.dist-info/METADATA +13 -0
- axnwork_cli-0.2.0.dist-info/RECORD +23 -0
- axnwork_cli-0.2.0.dist-info/WHEEL +5 -0
- axnwork_cli-0.2.0.dist-info/entry_points.txt +2 -0
- axnwork_cli-0.2.0.dist-info/top_level.txt +1 -0
- axon/__init__.py +0 -0
- axon/api.py +83 -0
- axon/backends/__init__.py +5 -0
- axon/backends/base.py +23 -0
- axon/backends/claude_cli.py +290 -0
- axon/backends/codex_cli.py +223 -0
- axon/backends/litellm_backend.py +51 -0
- axon/backends/registry.py +61 -0
- axon/cli.py +595 -0
- axon/config.py +55 -0
- axon/display.py +364 -0
- axon/history.py +133 -0
- axon/llm.py +214 -0
- axon/log.py +44 -0
- axon/mining.py +671 -0
- axon/providers.py +44 -0
- axon/session.py +26 -0
- axon/wallet.py +45 -0
axon/mining.py
ADDED
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
"""Mining loop — KeyWatcher, MiningDisplay, and run_mining (multi-task model)."""
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from rich.live import Live
|
|
10
|
+
|
|
11
|
+
from axon.api import api_get, api_post
|
|
12
|
+
from axon.backends import create_backend
|
|
13
|
+
from axon.config import AXON_HOME, load_config
|
|
14
|
+
from axon.display import console, build_mining_panel, fmt_round, print_mining_summary, _fmt_usdc
|
|
15
|
+
from axon.history import merge_server_history, build_local_record, build_error_record, append_record
|
|
16
|
+
from axon.llm import build_prompt, build_agent_prompt
|
|
17
|
+
from axon.session import load_session, save_session, delete_session
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
log = logging.getLogger("axon.mine")
|
|
21
|
+
PROMPT_LOG_DIR = AXON_HOME / "logs" / "prompts"
|
|
22
|
+
_UNSET = object()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _now_iso() -> str:
|
|
26
|
+
return datetime.now().astimezone().isoformat(timespec="seconds")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _write_prompt_snapshot(task_id: str, round_num: int, prompt: str) -> Path | None:
|
|
30
|
+
try:
|
|
31
|
+
PROMPT_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
stamp = datetime.now().astimezone().strftime("%Y%m%d-%H%M%S")
|
|
33
|
+
path = PROMPT_LOG_DIR / f"{task_id}-round{round_num:03d}-{stamp}.txt"
|
|
34
|
+
path.write_text(prompt, encoding="utf-8")
|
|
35
|
+
return path
|
|
36
|
+
except Exception:
|
|
37
|
+
log.exception("Failed to write prompt snapshot task=%s round=%d", task_id, round_num)
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _usage_tokens(usage: dict) -> int | None:
|
|
42
|
+
value = usage.get("tokens", usage.get("total_tokens"))
|
|
43
|
+
return value if value is not None else None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _usage_cost_usd(usage: dict) -> float | None:
|
|
47
|
+
value = usage.get("cost_usd", usage.get("cost"))
|
|
48
|
+
return value if value is not None else None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _usage_billing_mode(usage: dict, backend_name: str) -> str:
|
|
52
|
+
return usage.get("billing_mode") or ("subscription" if backend_name in ("codex-cli", "claude-cli") else "metered")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class KeyWatcher:
|
|
56
|
+
"""Background thread watching for ctrl+o / arrow keypresses."""
|
|
57
|
+
|
|
58
|
+
def __init__(self):
|
|
59
|
+
self._show = False
|
|
60
|
+
self._stop_event = None
|
|
61
|
+
self.detail_idx: int = -1 # -1 = latest
|
|
62
|
+
self.detail_count: int = 0 # set by mining loop
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def show_details(self) -> bool:
|
|
66
|
+
return self._show
|
|
67
|
+
|
|
68
|
+
def start(self):
|
|
69
|
+
try:
|
|
70
|
+
if not sys.stdin.isatty():
|
|
71
|
+
return
|
|
72
|
+
except Exception:
|
|
73
|
+
return
|
|
74
|
+
import threading
|
|
75
|
+
self._stop_event = threading.Event()
|
|
76
|
+
t = threading.Thread(target=self._run, daemon=True)
|
|
77
|
+
t.start()
|
|
78
|
+
|
|
79
|
+
def _run(self):
|
|
80
|
+
import os, termios, select
|
|
81
|
+
fd = sys.stdin.fileno()
|
|
82
|
+
try:
|
|
83
|
+
old = termios.tcgetattr(fd)
|
|
84
|
+
except Exception:
|
|
85
|
+
return
|
|
86
|
+
try:
|
|
87
|
+
# cbreak + disable IEXTEN so ctrl+o isn't swallowed as DISCARD
|
|
88
|
+
new = termios.tcgetattr(fd)
|
|
89
|
+
new[3] = new[3] & ~(termios.ECHO | termios.ICANON | termios.IEXTEN)
|
|
90
|
+
new[6][termios.VMIN] = 1
|
|
91
|
+
new[6][termios.VTIME] = 0
|
|
92
|
+
termios.tcsetattr(fd, termios.TCSADRAIN, new)
|
|
93
|
+
while not self._stop_event.is_set():
|
|
94
|
+
r, _, _ = select.select([sys.stdin], [], [], 0.15)
|
|
95
|
+
if r:
|
|
96
|
+
ch = os.read(fd, 1)
|
|
97
|
+
if ch == b'\x0f': # ctrl+o
|
|
98
|
+
self._show = not self._show
|
|
99
|
+
if self._show:
|
|
100
|
+
self.detail_idx = self.detail_count - 1
|
|
101
|
+
elif ch == b'\x1b': # escape sequence (arrow keys)
|
|
102
|
+
r2, _, _ = select.select([sys.stdin], [], [], 0.05)
|
|
103
|
+
if r2:
|
|
104
|
+
seq = os.read(fd, 2)
|
|
105
|
+
if seq == b'[D' and self._show: # left
|
|
106
|
+
self.detail_idx = max(0, self.detail_idx - 1)
|
|
107
|
+
elif seq == b'[C' and self._show: # right
|
|
108
|
+
self.detail_idx = min(self.detail_count - 1, self.detail_idx + 1)
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
111
|
+
finally:
|
|
112
|
+
try:
|
|
113
|
+
termios.tcsetattr(fd, termios.TCSADRAIN, old)
|
|
114
|
+
except Exception:
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
def stop(self):
|
|
118
|
+
if self._stop_event:
|
|
119
|
+
self._stop_event.set()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class MiningDisplay:
|
|
123
|
+
"""Dynamic renderable for Rich Live — compact status panel at bottom."""
|
|
124
|
+
|
|
125
|
+
def __init__(self, watcher: KeyWatcher):
|
|
126
|
+
self._watcher = watcher
|
|
127
|
+
self.task_title: str = ""
|
|
128
|
+
self.model: str = ""
|
|
129
|
+
self.pool: int = 0
|
|
130
|
+
self.threshold: float = 0.0
|
|
131
|
+
self.best_score: float | None = None
|
|
132
|
+
self.total_earned: int = 0
|
|
133
|
+
self.round_count: int = 0
|
|
134
|
+
self.status: str = ""
|
|
135
|
+
self.total_tokens: int | None = 0
|
|
136
|
+
self.total_cost: float | None = 0.0
|
|
137
|
+
self.billing_mode: str = "metered"
|
|
138
|
+
self.rounds: list[dict] = []
|
|
139
|
+
self.all_details: list[dict] = []
|
|
140
|
+
self.community_subs: list[dict] = []
|
|
141
|
+
self.my_miner_id: str = ""
|
|
142
|
+
self.call_started_at: float | None = None
|
|
143
|
+
|
|
144
|
+
def __rich_console__(self, rconsole, options):
|
|
145
|
+
# Append elapsed time to status if LLM call is in progress
|
|
146
|
+
status = self.status
|
|
147
|
+
if self.call_started_at is not None:
|
|
148
|
+
elapsed = int(time.monotonic() - self.call_started_at)
|
|
149
|
+
status = f"{status} ({elapsed}s)"
|
|
150
|
+
|
|
151
|
+
# Pick detail by watcher index, clamping to valid range
|
|
152
|
+
idx = self._watcher.detail_idx
|
|
153
|
+
if self.all_details:
|
|
154
|
+
if idx < 0 or idx >= len(self.all_details):
|
|
155
|
+
idx = len(self.all_details) - 1
|
|
156
|
+
detail = self.all_details[idx]
|
|
157
|
+
else:
|
|
158
|
+
detail = None
|
|
159
|
+
detail_nav = (idx + 1, len(self.all_details)) if self.all_details else None
|
|
160
|
+
yield build_mining_panel(
|
|
161
|
+
self.task_title, self.model, self.pool,
|
|
162
|
+
self.threshold, self.best_score, self.total_earned,
|
|
163
|
+
self.round_count, status, self._watcher.show_details,
|
|
164
|
+
detail, self.rounds, detail_nav,
|
|
165
|
+
total_tokens=self.total_tokens, total_cost=self.total_cost, billing_mode=self.billing_mode,
|
|
166
|
+
community_subs=self.community_subs,
|
|
167
|
+
my_miner_id=self.my_miner_id,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def __rich_measure__(self, rconsole, options):
|
|
171
|
+
from rich.measure import Measurement
|
|
172
|
+
return Measurement(40, options.max_width)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def run_mining(task: dict, max_rounds: int, *, cli_timeout_override: int | None | object = _UNSET):
|
|
176
|
+
"""Mining loop: rounds scroll above, status panel stays at bottom."""
|
|
177
|
+
config = load_config()
|
|
178
|
+
backend_config = dict(config)
|
|
179
|
+
if cli_timeout_override is not _UNSET:
|
|
180
|
+
backend_config["cli_timeout"] = cli_timeout_override
|
|
181
|
+
backend = create_backend(backend_config.get("backend", "litellm"), backend_config)
|
|
182
|
+
model_name = config.get("default_model", "anthropic/claude-sonnet-4-20250514")
|
|
183
|
+
|
|
184
|
+
task_id = task["id"]
|
|
185
|
+
timeout_label = "none" if cli_timeout_override is None else (
|
|
186
|
+
backend_config.get("cli_timeout") if cli_timeout_override is not _UNSET else config.get("cli_timeout", "")
|
|
187
|
+
)
|
|
188
|
+
log.info(
|
|
189
|
+
"Mining config task=%s backend=%s max_rounds=%s cli_timeout=%s",
|
|
190
|
+
task_id,
|
|
191
|
+
backend.display_name(),
|
|
192
|
+
"unlimited" if max_rounds <= 0 else max_rounds,
|
|
193
|
+
timeout_label,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Get best info
|
|
197
|
+
try:
|
|
198
|
+
best_info = api_get(f"/api/tasks/{task_id}/submissions/best", auth=False)
|
|
199
|
+
except Exception:
|
|
200
|
+
best_info = {}
|
|
201
|
+
|
|
202
|
+
my_best_answer = None
|
|
203
|
+
my_best_score = None
|
|
204
|
+
round_num = 0
|
|
205
|
+
total_earned = 0
|
|
206
|
+
total_tokens: int | None = 0
|
|
207
|
+
total_cost: float | None = 0.0
|
|
208
|
+
rounds_data: list[dict] = []
|
|
209
|
+
consecutive_errors = 0
|
|
210
|
+
MAX_CONSECUTIVE_ERRORS = 5
|
|
211
|
+
|
|
212
|
+
# Restore session (keyed by task_id)
|
|
213
|
+
session_key = f"task-{task_id}"
|
|
214
|
+
session = load_session(session_key)
|
|
215
|
+
if session:
|
|
216
|
+
my_best_answer = session.get("my_best_answer")
|
|
217
|
+
my_best_score = session.get("my_best_score")
|
|
218
|
+
round_num = session.get("round_num", 0)
|
|
219
|
+
total_earned = session.get("total_earned", 0)
|
|
220
|
+
|
|
221
|
+
# Display state
|
|
222
|
+
watcher = KeyWatcher()
|
|
223
|
+
state = MiningDisplay(watcher)
|
|
224
|
+
state.task_title = task["title"]
|
|
225
|
+
state.model = backend.display_name()
|
|
226
|
+
state.pool = task.get("pool_balance", 0)
|
|
227
|
+
state.threshold = task.get("completion_threshold", 0)
|
|
228
|
+
state.best_score = my_best_score
|
|
229
|
+
state.total_earned = total_earned
|
|
230
|
+
state.round_count = round_num
|
|
231
|
+
state.rounds = rounds_data
|
|
232
|
+
state.billing_mode = "subscription" if backend.name in ("codex-cli", "claude-cli") else "metered"
|
|
233
|
+
|
|
234
|
+
# Get my miner ID for community leaderboard highlighting
|
|
235
|
+
try:
|
|
236
|
+
me_info = api_get("/api/auth/me")
|
|
237
|
+
state.my_miner_id = str(me_info.get("id", ""))
|
|
238
|
+
except Exception:
|
|
239
|
+
state.my_miner_id = ""
|
|
240
|
+
|
|
241
|
+
# Load local history + merge server submissions (dedup)
|
|
242
|
+
server_subs = []
|
|
243
|
+
try:
|
|
244
|
+
server_subs = api_get(f"/api/tasks/{task_id}/submissions/mine")
|
|
245
|
+
except Exception:
|
|
246
|
+
pass
|
|
247
|
+
my_past_subs = merge_server_history(task_id, server_subs)
|
|
248
|
+
|
|
249
|
+
last_feedback = None
|
|
250
|
+
consecutive_dups = 0
|
|
251
|
+
threshold = task.get("completion_threshold", 0)
|
|
252
|
+
|
|
253
|
+
# Save terminal settings for cleanup
|
|
254
|
+
old_tty = None
|
|
255
|
+
try:
|
|
256
|
+
if sys.stdin.isatty():
|
|
257
|
+
import termios
|
|
258
|
+
old_tty = termios.tcgetattr(sys.stdin.fileno())
|
|
259
|
+
except Exception:
|
|
260
|
+
pass
|
|
261
|
+
|
|
262
|
+
try:
|
|
263
|
+
watcher.start()
|
|
264
|
+
|
|
265
|
+
with Live(state, console=console, refresh_per_second=4) as live:
|
|
266
|
+
while True:
|
|
267
|
+
round_num += 1
|
|
268
|
+
if max_rounds > 0 and round_num > max_rounds:
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
# LLM stuck
|
|
272
|
+
if consecutive_dups >= 3:
|
|
273
|
+
console.print(" [yellow]3 consecutive duplicates — stopping.[/]")
|
|
274
|
+
break
|
|
275
|
+
|
|
276
|
+
state.round_count = round_num
|
|
277
|
+
state.status = f"[dim]► Round {round_num} calling {backend.display_name()}...[/]"
|
|
278
|
+
round_started_at = _now_iso()
|
|
279
|
+
round_started_mono = time.monotonic()
|
|
280
|
+
log.info(
|
|
281
|
+
"Round %d start task=%s title=%r backend=%s started_at=%s",
|
|
282
|
+
round_num,
|
|
283
|
+
task_id,
|
|
284
|
+
task.get("title", ""),
|
|
285
|
+
backend.display_name(),
|
|
286
|
+
round_started_at,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# --- Call Backend ---
|
|
290
|
+
try:
|
|
291
|
+
# Fetch community submissions for context
|
|
292
|
+
try:
|
|
293
|
+
all_subs = api_get(f"/api/tasks/{task_id}/submissions?limit=10", auth=False)
|
|
294
|
+
community_subs = [s for s in all_subs if s.get("score") is not None and s.get("is_improvement")]
|
|
295
|
+
community_subs.sort(key=lambda s: s.get("score", 0), reverse=(task.get("direction") == "maximize"))
|
|
296
|
+
except Exception:
|
|
297
|
+
all_subs = []
|
|
298
|
+
community_subs = []
|
|
299
|
+
|
|
300
|
+
state.community_subs = community_subs
|
|
301
|
+
|
|
302
|
+
if backend.name == "litellm":
|
|
303
|
+
prompt = build_prompt(task, my_best_answer, my_best_score, best_info.get("score"), last_feedback, community_subs=community_subs, my_past_subs=my_past_subs)
|
|
304
|
+
else:
|
|
305
|
+
prompt = build_agent_prompt(task, my_best_answer, my_best_score, best_info.get("score"), last_feedback, community_subs=community_subs, my_past_subs=my_past_subs)
|
|
306
|
+
|
|
307
|
+
prompt_path = _write_prompt_snapshot(task_id, round_num, prompt)
|
|
308
|
+
log.info(
|
|
309
|
+
"Round %d prompt task=%s chars=%d lines=%d community_subs=%d my_past_subs=%d path=%s",
|
|
310
|
+
round_num,
|
|
311
|
+
task_id,
|
|
312
|
+
len(prompt),
|
|
313
|
+
prompt.count("\n") + 1 if prompt else 0,
|
|
314
|
+
len(community_subs),
|
|
315
|
+
len(my_past_subs),
|
|
316
|
+
str(prompt_path) if prompt_path else "",
|
|
317
|
+
)
|
|
318
|
+
state.call_started_at = time.monotonic()
|
|
319
|
+
result = backend.call(prompt, task)
|
|
320
|
+
state.call_started_at = None
|
|
321
|
+
thinking, answer, usage = result["thinking"], result["answer"], result["usage"]
|
|
322
|
+
billing_mode = _usage_billing_mode(usage, backend.name)
|
|
323
|
+
state.billing_mode = billing_mode
|
|
324
|
+
tokens_used = _usage_tokens(usage)
|
|
325
|
+
cost_usd = _usage_cost_usd(usage)
|
|
326
|
+
if billing_mode == "metered":
|
|
327
|
+
total_tokens = (total_tokens or 0) + (tokens_used or 0)
|
|
328
|
+
total_cost = (total_cost or 0.0) + (cost_usd or 0.0)
|
|
329
|
+
else:
|
|
330
|
+
total_tokens = None
|
|
331
|
+
total_cost = None
|
|
332
|
+
state.total_tokens = total_tokens
|
|
333
|
+
state.total_cost = total_cost
|
|
334
|
+
log.info(
|
|
335
|
+
"Round %d backend_done task=%s finished_at=%s duration_s=%.2f answer_chars=%d thinking_chars=%d billing_mode=%s total_tokens=%s cost_usd=%s",
|
|
336
|
+
round_num,
|
|
337
|
+
task_id,
|
|
338
|
+
_now_iso(),
|
|
339
|
+
time.monotonic() - round_started_mono,
|
|
340
|
+
len(answer or ""),
|
|
341
|
+
len(thinking or ""),
|
|
342
|
+
billing_mode,
|
|
343
|
+
tokens_used,
|
|
344
|
+
cost_usd,
|
|
345
|
+
)
|
|
346
|
+
except Exception as e:
|
|
347
|
+
state.call_started_at = None
|
|
348
|
+
consecutive_errors += 1
|
|
349
|
+
record = build_error_record(task_id, None, None, None, None, round_num, state.billing_mode, "crash", str(e))
|
|
350
|
+
append_record(task_id, record)
|
|
351
|
+
rounds_data.append({"round": round_num, "score": None, "result": "crash", "earned": 0})
|
|
352
|
+
state.all_details.append({"score": None, "result": "crash", "earned": 0,
|
|
353
|
+
"error": str(e), "eval_details": None, "answer": None, "thinking": None})
|
|
354
|
+
watcher.detail_count = len(state.all_details)
|
|
355
|
+
state.status = ""
|
|
356
|
+
log.error(
|
|
357
|
+
"Round %d backend_error task=%s finished_at=%s duration_s=%.2f error=%s",
|
|
358
|
+
round_num,
|
|
359
|
+
task_id,
|
|
360
|
+
_now_iso(),
|
|
361
|
+
time.monotonic() - round_started_mono,
|
|
362
|
+
e,
|
|
363
|
+
exc_info=True,
|
|
364
|
+
)
|
|
365
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
366
|
+
console.print(f" [yellow]{MAX_CONSECUTIVE_ERRORS} consecutive errors — stopping.[/]")
|
|
367
|
+
break
|
|
368
|
+
sleep_time = 10 if isinstance(e, TimeoutError) else 2
|
|
369
|
+
time.sleep(sleep_time)
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
state.status = f"[dim]► Round {round_num} submitting...[/]"
|
|
373
|
+
|
|
374
|
+
# --- Submit to task ---
|
|
375
|
+
thinking = thinking or "(no reasoning provided)"
|
|
376
|
+
try:
|
|
377
|
+
sub = api_post(f"/api/tasks/{task_id}/submissions", {
|
|
378
|
+
"answer": answer, "thinking": thinking, "llm_model_used": backend.display_name(),
|
|
379
|
+
})
|
|
380
|
+
except httpx.HTTPStatusError as e:
|
|
381
|
+
code = e.response.status_code
|
|
382
|
+
if code == 429:
|
|
383
|
+
import re
|
|
384
|
+
detail_msg = e.response.json().get("detail", "")
|
|
385
|
+
wait = int(m.group(1)) if (m := re.search(r"(\d+)s", detail_msg)) else 10
|
|
386
|
+
record = build_error_record(task_id, answer, thinking,
|
|
387
|
+
_usage_tokens(usage), _usage_cost_usd(usage),
|
|
388
|
+
round_num, state.billing_mode, "rate limited", detail_msg)
|
|
389
|
+
append_record(task_id, record)
|
|
390
|
+
rounds_data.append({"round": round_num, "score": None, "result": "rate limited", "earned": 0})
|
|
391
|
+
state.all_details.append({"score": None, "result": "rate limited", "earned": 0,
|
|
392
|
+
"error": detail_msg, "eval_details": None, "answer": answer, "thinking": thinking})
|
|
393
|
+
watcher.detail_count = len(state.all_details)
|
|
394
|
+
state.status = f"[dim yellow]► rate limited, waiting {wait}s...[/]"
|
|
395
|
+
log.warning(
|
|
396
|
+
"Round %d end task=%s status=rate_limited finished_at=%s duration_s=%.2f wait_s=%d detail=%s",
|
|
397
|
+
round_num,
|
|
398
|
+
task_id,
|
|
399
|
+
_now_iso(),
|
|
400
|
+
time.monotonic() - round_started_mono,
|
|
401
|
+
wait,
|
|
402
|
+
detail_msg,
|
|
403
|
+
)
|
|
404
|
+
time.sleep(wait)
|
|
405
|
+
continue
|
|
406
|
+
if code == 409:
|
|
407
|
+
consecutive_dups += 1
|
|
408
|
+
record = build_error_record(task_id, answer, thinking,
|
|
409
|
+
_usage_tokens(usage), _usage_cost_usd(usage),
|
|
410
|
+
round_num, state.billing_mode, "duplicate", "duplicate answer")
|
|
411
|
+
append_record(task_id, record)
|
|
412
|
+
rounds_data.append({"round": round_num, "score": None, "result": "duplicate", "earned": 0})
|
|
413
|
+
state.all_details.append({"score": None, "result": "duplicate", "earned": 0,
|
|
414
|
+
"error": "duplicate answer", "eval_details": None, "answer": answer, "thinking": thinking})
|
|
415
|
+
watcher.detail_count = len(state.all_details)
|
|
416
|
+
state.status = ""
|
|
417
|
+
log.info(
|
|
418
|
+
"Round %d end task=%s status=duplicate finished_at=%s duration_s=%.2f consecutive_duplicates=%d",
|
|
419
|
+
round_num,
|
|
420
|
+
task_id,
|
|
421
|
+
_now_iso(),
|
|
422
|
+
time.monotonic() - round_started_mono,
|
|
423
|
+
consecutive_dups,
|
|
424
|
+
)
|
|
425
|
+
continue
|
|
426
|
+
if code == 422:
|
|
427
|
+
consecutive_errors += 1
|
|
428
|
+
detail_msg = ""
|
|
429
|
+
try:
|
|
430
|
+
detail_msg = e.response.text[:500]
|
|
431
|
+
except Exception:
|
|
432
|
+
pass
|
|
433
|
+
error_msg = f"422 validation error: {detail_msg}" if detail_msg else str(e)
|
|
434
|
+
record = build_error_record(task_id, answer, thinking,
|
|
435
|
+
_usage_tokens(usage), _usage_cost_usd(usage),
|
|
436
|
+
round_num, state.billing_mode, "validation error", error_msg)
|
|
437
|
+
append_record(task_id, record)
|
|
438
|
+
rounds_data.append({"round": round_num, "score": None, "result": "validation error", "earned": 0})
|
|
439
|
+
state.all_details.append({"score": None, "result": "validation error", "earned": 0,
|
|
440
|
+
"error": error_msg, "eval_details": None, "answer": answer, "thinking": thinking})
|
|
441
|
+
watcher.detail_count = len(state.all_details)
|
|
442
|
+
state.status = ""
|
|
443
|
+
log.warning(
|
|
444
|
+
"Round %d end task=%s status=validation_error finished_at=%s duration_s=%.2f error=%s",
|
|
445
|
+
round_num,
|
|
446
|
+
task_id,
|
|
447
|
+
_now_iso(),
|
|
448
|
+
time.monotonic() - round_started_mono,
|
|
449
|
+
error_msg,
|
|
450
|
+
)
|
|
451
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
452
|
+
console.print(f" [yellow]{MAX_CONSECUTIVE_ERRORS} consecutive errors — stopping.[/]")
|
|
453
|
+
break
|
|
454
|
+
continue
|
|
455
|
+
if code == 400:
|
|
456
|
+
detail_msg = ""
|
|
457
|
+
try:
|
|
458
|
+
detail_msg = e.response.json().get("detail", "")
|
|
459
|
+
except Exception:
|
|
460
|
+
pass
|
|
461
|
+
if "completed" in detail_msg or "closed" in detail_msg:
|
|
462
|
+
console.print(f" [yellow]Task is no longer open. Stopping.[/]")
|
|
463
|
+
break
|
|
464
|
+
if code == 404:
|
|
465
|
+
console.print(" [yellow]Task not found. Stopping.[/]")
|
|
466
|
+
break
|
|
467
|
+
# Log response body for debugging
|
|
468
|
+
consecutive_errors += 1
|
|
469
|
+
resp_detail = ""
|
|
470
|
+
try:
|
|
471
|
+
resp_detail = e.response.text[:500]
|
|
472
|
+
except Exception:
|
|
473
|
+
pass
|
|
474
|
+
error_msg = f"{e} — {resp_detail}" if resp_detail else str(e)
|
|
475
|
+
record = build_error_record(task_id, answer, thinking,
|
|
476
|
+
_usage_tokens(usage), _usage_cost_usd(usage),
|
|
477
|
+
round_num, state.billing_mode, "error", error_msg)
|
|
478
|
+
append_record(task_id, record)
|
|
479
|
+
rounds_data.append({"round": round_num, "score": None, "result": "error", "earned": 0})
|
|
480
|
+
state.all_details.append({"score": None, "result": "error", "earned": 0,
|
|
481
|
+
"error": error_msg, "eval_details": None, "answer": answer, "thinking": thinking})
|
|
482
|
+
watcher.detail_count = len(state.all_details)
|
|
483
|
+
state.status = ""
|
|
484
|
+
log.error(
|
|
485
|
+
"Round %d end task=%s status=submit_error finished_at=%s duration_s=%.2f error=%s body=%s",
|
|
486
|
+
round_num,
|
|
487
|
+
task_id,
|
|
488
|
+
_now_iso(),
|
|
489
|
+
time.monotonic() - round_started_mono,
|
|
490
|
+
e,
|
|
491
|
+
resp_detail,
|
|
492
|
+
)
|
|
493
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
494
|
+
console.print(f" [yellow]{MAX_CONSECUTIVE_ERRORS} consecutive errors — stopping.[/]")
|
|
495
|
+
break
|
|
496
|
+
time.sleep(2)
|
|
497
|
+
continue
|
|
498
|
+
except Exception as e:
|
|
499
|
+
consecutive_errors += 1
|
|
500
|
+
record = build_error_record(task_id, answer, thinking,
|
|
501
|
+
_usage_tokens(usage), _usage_cost_usd(usage),
|
|
502
|
+
round_num, state.billing_mode, "crash", str(e))
|
|
503
|
+
append_record(task_id, record)
|
|
504
|
+
rounds_data.append({"round": round_num, "score": None, "result": "crash", "earned": 0})
|
|
505
|
+
state.all_details.append({"score": None, "result": "crash", "earned": 0,
|
|
506
|
+
"error": str(e), "eval_details": None, "answer": answer, "thinking": thinking})
|
|
507
|
+
watcher.detail_count = len(state.all_details)
|
|
508
|
+
state.status = ""
|
|
509
|
+
log.error(
|
|
510
|
+
"Round %d end task=%s status=submit_crash finished_at=%s duration_s=%.2f error=%s",
|
|
511
|
+
round_num,
|
|
512
|
+
task_id,
|
|
513
|
+
_now_iso(),
|
|
514
|
+
time.monotonic() - round_started_mono,
|
|
515
|
+
e,
|
|
516
|
+
exc_info=True,
|
|
517
|
+
)
|
|
518
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
519
|
+
console.print(f" [yellow]{MAX_CONSECUTIVE_ERRORS} consecutive errors — stopping.[/]")
|
|
520
|
+
break
|
|
521
|
+
time.sleep(2)
|
|
522
|
+
continue
|
|
523
|
+
|
|
524
|
+
# --- Poll for async eval completion ---
|
|
525
|
+
if sub.get("eval_status") == "pending":
|
|
526
|
+
state.status = f"[dim]► Round {round_num} evaluating...[/]"
|
|
527
|
+
poll_interval = 1
|
|
528
|
+
max_wait = 120
|
|
529
|
+
waited = 0
|
|
530
|
+
while sub.get("eval_status") == "pending" and waited < max_wait:
|
|
531
|
+
time.sleep(poll_interval)
|
|
532
|
+
waited += poll_interval
|
|
533
|
+
try:
|
|
534
|
+
sub = api_get(f"/api/tasks/{task_id}/submissions/{sub['id']}")
|
|
535
|
+
except Exception:
|
|
536
|
+
pass # network hiccup, keep polling
|
|
537
|
+
|
|
538
|
+
if sub.get("eval_status") == "pending":
|
|
539
|
+
# Timed out waiting for eval
|
|
540
|
+
rounds_data.append({"round": round_num, "score": None, "result": "eval timeout", "earned": 0})
|
|
541
|
+
state.all_details.append({"score": None, "result": "eval timeout", "earned": 0,
|
|
542
|
+
"error": "eval timed out", "eval_details": None, "answer": answer, "thinking": thinking})
|
|
543
|
+
watcher.detail_count = len(state.all_details)
|
|
544
|
+
consecutive_errors += 1
|
|
545
|
+
state.status = ""
|
|
546
|
+
log.warning("Round %d eval timeout task=%s", round_num, task_id)
|
|
547
|
+
if consecutive_errors >= MAX_CONSECUTIVE_ERRORS:
|
|
548
|
+
console.print(f" [yellow]{MAX_CONSECUTIVE_ERRORS} consecutive errors — stopping.[/]")
|
|
549
|
+
break
|
|
550
|
+
continue
|
|
551
|
+
|
|
552
|
+
# --- Process result ---
|
|
553
|
+
consecutive_dups = 0
|
|
554
|
+
consecutive_errors = 0
|
|
555
|
+
score = sub.get("score")
|
|
556
|
+
earned = sub.get("reward_earned", 0)
|
|
557
|
+
total_earned += earned
|
|
558
|
+
is_completion = sub.get("is_completion", False)
|
|
559
|
+
is_improvement = sub.get("is_improvement", False)
|
|
560
|
+
error = sub.get("eval_error")
|
|
561
|
+
|
|
562
|
+
if error:
|
|
563
|
+
result_label = "eval error"
|
|
564
|
+
elif is_completion:
|
|
565
|
+
result_label = "COMPLETE"
|
|
566
|
+
elif is_improvement:
|
|
567
|
+
result_label = "improved"
|
|
568
|
+
else:
|
|
569
|
+
result_label = "no change"
|
|
570
|
+
|
|
571
|
+
record = build_local_record(sub, answer, thinking,
|
|
572
|
+
_usage_tokens(usage), _usage_cost_usd(usage),
|
|
573
|
+
round_num, state.billing_mode, result_label)
|
|
574
|
+
append_record(task_id, record)
|
|
575
|
+
my_past_subs.append(record)
|
|
576
|
+
|
|
577
|
+
rounds_data.append({"round": round_num, "score": score, "result": result_label, "earned": earned})
|
|
578
|
+
state.all_details.append({
|
|
579
|
+
"score": score, "result": result_label, "earned": earned,
|
|
580
|
+
"error": error, "eval_details": sub.get("eval_details"),
|
|
581
|
+
"answer": answer, "thinking": thinking,
|
|
582
|
+
})
|
|
583
|
+
watcher.detail_count = len(state.all_details)
|
|
584
|
+
state.total_earned = total_earned
|
|
585
|
+
|
|
586
|
+
# --- Update state ---
|
|
587
|
+
last_feedback = {
|
|
588
|
+
"score": score,
|
|
589
|
+
"error": error,
|
|
590
|
+
"details": sub.get("eval_details"),
|
|
591
|
+
"improved": is_improvement,
|
|
592
|
+
"answer": answer,
|
|
593
|
+
}
|
|
594
|
+
if is_improvement:
|
|
595
|
+
my_best_answer = answer
|
|
596
|
+
my_best_score = score
|
|
597
|
+
state.best_score = my_best_score
|
|
598
|
+
|
|
599
|
+
log.info(
|
|
600
|
+
"Round %d end task=%s status=%s finished_at=%s duration_s=%.2f score=%s earned=%s improvement=%s completion=%s eval_error=%s",
|
|
601
|
+
round_num,
|
|
602
|
+
task_id,
|
|
603
|
+
result_label,
|
|
604
|
+
_now_iso(),
|
|
605
|
+
time.monotonic() - round_started_mono,
|
|
606
|
+
score,
|
|
607
|
+
earned,
|
|
608
|
+
is_improvement,
|
|
609
|
+
is_completion,
|
|
610
|
+
error,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
save_session(session_key, {
|
|
614
|
+
"my_best_answer": my_best_answer,
|
|
615
|
+
"my_best_score": my_best_score,
|
|
616
|
+
"round_num": round_num,
|
|
617
|
+
"total_earned": total_earned,
|
|
618
|
+
"model": backend.display_name(),
|
|
619
|
+
})
|
|
620
|
+
|
|
621
|
+
# Refresh pool from server
|
|
622
|
+
try:
|
|
623
|
+
task_check = api_get(f"/api/tasks/{task_id}", auth=False)
|
|
624
|
+
state.pool = task_check.get("pool_balance", state.pool)
|
|
625
|
+
task_status = task_check.get("status", "open")
|
|
626
|
+
except Exception:
|
|
627
|
+
task_status = "open"
|
|
628
|
+
|
|
629
|
+
if is_completion:
|
|
630
|
+
delete_session(session_key)
|
|
631
|
+
state.status = f"[bold green]✓ Task completed! Earned {_fmt_usdc(total_earned)}[/]"
|
|
632
|
+
time.sleep(1)
|
|
633
|
+
break
|
|
634
|
+
|
|
635
|
+
# Task closed by pool exhaustion or admin
|
|
636
|
+
if task_status != "open":
|
|
637
|
+
delete_session(session_key)
|
|
638
|
+
console.print(" [yellow]Task ended. Stopping.[/]")
|
|
639
|
+
break
|
|
640
|
+
|
|
641
|
+
state.status = ""
|
|
642
|
+
|
|
643
|
+
except KeyboardInterrupt:
|
|
644
|
+
console.print("\n [yellow]Mining stopped. Session saved — run again to resume.[/]")
|
|
645
|
+
finally:
|
|
646
|
+
watcher.stop()
|
|
647
|
+
if old_tty is not None:
|
|
648
|
+
try:
|
|
649
|
+
import termios
|
|
650
|
+
termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN, old_tty)
|
|
651
|
+
except Exception:
|
|
652
|
+
pass
|
|
653
|
+
|
|
654
|
+
# --- Summary ---
|
|
655
|
+
notable = [r for r in rounds_data if r["result"] != "no change"]
|
|
656
|
+
if notable:
|
|
657
|
+
print_mining_summary(notable, my_best_score, total_earned, round_num,
|
|
658
|
+
total_tokens=total_tokens, total_cost=total_cost, billing_mode=state.billing_mode)
|
|
659
|
+
else:
|
|
660
|
+
best = f"{my_best_score:.6f}" if my_best_score is not None else "N/A"
|
|
661
|
+
if state.billing_mode == "subscription":
|
|
662
|
+
token_str = "unknown" if total_tokens is None else f"{total_tokens:,}"
|
|
663
|
+
cost_str = "subscription"
|
|
664
|
+
else:
|
|
665
|
+
token_str = f"{(total_tokens or 0):,}"
|
|
666
|
+
cost_str = f"${total_cost:.4f}" if total_cost else "$0"
|
|
667
|
+
console.print(f"\n [bold gold1]ψ Mining Summary[/]")
|
|
668
|
+
console.print(f" Best: {best}")
|
|
669
|
+
console.print(f" Earned: [green]{_fmt_usdc(total_earned)}[/]")
|
|
670
|
+
console.print(f" Tokens: {token_str} Cost: [yellow]{cost_str}[/]")
|
|
671
|
+
console.print(f" Rounds: {round_num}\n")
|