axnwork-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- axnwork_cli-0.2.0.dist-info/METADATA +13 -0
- axnwork_cli-0.2.0.dist-info/RECORD +23 -0
- axnwork_cli-0.2.0.dist-info/WHEEL +5 -0
- axnwork_cli-0.2.0.dist-info/entry_points.txt +2 -0
- axnwork_cli-0.2.0.dist-info/top_level.txt +1 -0
- axon/__init__.py +0 -0
- axon/api.py +83 -0
- axon/backends/__init__.py +5 -0
- axon/backends/base.py +23 -0
- axon/backends/claude_cli.py +290 -0
- axon/backends/codex_cli.py +223 -0
- axon/backends/litellm_backend.py +51 -0
- axon/backends/registry.py +61 -0
- axon/cli.py +595 -0
- axon/config.py +55 -0
- axon/display.py +364 -0
- axon/history.py +133 -0
- axon/llm.py +214 -0
- axon/log.py +44 -0
- axon/mining.py +671 -0
- axon/providers.py +44 -0
- axon/session.py +26 -0
- axon/wallet.py +45 -0
axon/display.py
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
"""Rich display helpers — all CLI formatting lives here."""
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.markup import escape
|
|
6
|
+
from rich.panel import Panel
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def print_banner():
|
|
13
|
+
console.print()
|
|
14
|
+
console.print(" [bold gold1]ψ A X O N[/]")
|
|
15
|
+
console.print(" [dim]─────────────────[/]")
|
|
16
|
+
console.print(" [dim]World Intelligence[/]")
|
|
17
|
+
console.print(" [dim]Proof of Useful Work[/]")
|
|
18
|
+
console.print()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _fmt_usdc(cents: int) -> str:
|
|
22
|
+
"""Format USDC cents as dollar string."""
|
|
23
|
+
return f"${cents / 100:.2f}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _format_usage_summary(
|
|
27
|
+
total_tokens: int | None = 0,
|
|
28
|
+
total_cost: float | None = 0.0,
|
|
29
|
+
billing_mode: str = "metered",
|
|
30
|
+
) -> tuple[str, str]:
|
|
31
|
+
"""Render token/cost labels across metered and subscription backends."""
|
|
32
|
+
if billing_mode == "subscription":
|
|
33
|
+
token_str = f"{total_tokens:,}" if total_tokens is not None else "unknown"
|
|
34
|
+
return token_str, "subscription"
|
|
35
|
+
token_str = f"{(total_tokens or 0):,}"
|
|
36
|
+
cost_str = f"${total_cost:.4f}" if total_cost else "$0"
|
|
37
|
+
return token_str, cost_str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _time_ago(iso_str: str) -> str:
|
|
41
|
+
"""Convert ISO timestamp to 'Nm ago' / 'Nh ago'."""
|
|
42
|
+
if not iso_str:
|
|
43
|
+
return ""
|
|
44
|
+
try:
|
|
45
|
+
from datetime import datetime, timezone
|
|
46
|
+
dt = datetime.fromisoformat(iso_str.replace("Z", "+00:00"))
|
|
47
|
+
delta = datetime.now(timezone.utc) - dt
|
|
48
|
+
mins = int(delta.total_seconds() / 60)
|
|
49
|
+
if mins < 1:
|
|
50
|
+
return "just now"
|
|
51
|
+
if mins < 60:
|
|
52
|
+
return f"{mins}m ago"
|
|
53
|
+
return f"{mins // 60}h ago"
|
|
54
|
+
except Exception:
|
|
55
|
+
return ""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _truncate_answer(answer: str | None, max_len: int = 70) -> str:
|
|
59
|
+
"""Collapse whitespace and truncate an answer for single-line preview."""
|
|
60
|
+
if not answer:
|
|
61
|
+
return ""
|
|
62
|
+
collapsed = re.sub(r"\s+", " ", answer).strip()
|
|
63
|
+
if not collapsed:
|
|
64
|
+
return ""
|
|
65
|
+
if len(collapsed) <= max_len:
|
|
66
|
+
return collapsed
|
|
67
|
+
return collapsed[:max_len - 1] + "\u2026"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _fmt_community(subs: list[dict], my_miner_id: str = "") -> list[str]:
|
|
71
|
+
"""Format community leaderboard lines for the mining panel."""
|
|
72
|
+
if not subs:
|
|
73
|
+
return []
|
|
74
|
+
unique = len(set(str(s.get("miner_id", "")) for s in subs))
|
|
75
|
+
lines = [f"[bold dim]── Leaderboard ({unique} miners) ──[/]"]
|
|
76
|
+
for i, s in enumerate(subs[:5], 1):
|
|
77
|
+
mid = str(s.get("miner_id", "?"))
|
|
78
|
+
short_id = f"{mid[:4]}..{mid[-4:]}" if len(mid) > 8 else mid
|
|
79
|
+
score = s.get("score")
|
|
80
|
+
score_str = f"{score:.4f}" if score is not None else " -"
|
|
81
|
+
model = (s.get("llm_model_used") or "?").split("/")[-1][:14]
|
|
82
|
+
ago = _time_ago(s.get("created_at", ""))
|
|
83
|
+
is_me = mid == my_miner_id
|
|
84
|
+
tag = " [cyan]← you[/]" if is_me else ""
|
|
85
|
+
style = "cyan" if is_me else "dim"
|
|
86
|
+
lines.append(f" [{style}]#{i} {short_id} {score_str} {model:<14s} {ago}[/]{tag}")
|
|
87
|
+
# Answer preview for non-self entries
|
|
88
|
+
if not is_me:
|
|
89
|
+
preview = _truncate_answer(s.get("answer"))
|
|
90
|
+
if preview:
|
|
91
|
+
lines.append(f" [dim italic]\u201c{escape(preview)}\u201d[/]")
|
|
92
|
+
return lines
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def print_task_list(tasks: list[dict]):
|
|
96
|
+
"""Display task list as a table."""
|
|
97
|
+
if not tasks:
|
|
98
|
+
console.print(" [dim]No tasks found.[/]")
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
table = Table(title="ψ Available Tasks", title_style="bold gold1", border_style="dim")
|
|
102
|
+
table.add_column("#", justify="right", style="dim")
|
|
103
|
+
table.add_column("Status", justify="center")
|
|
104
|
+
table.add_column("Pool (USDC)", justify="right")
|
|
105
|
+
table.add_column("Best", justify="right")
|
|
106
|
+
table.add_column("Eval", style="dim")
|
|
107
|
+
table.add_column("Title")
|
|
108
|
+
|
|
109
|
+
for i, t in enumerate(tasks, 1):
|
|
110
|
+
status_style = {"open": "green", "completed": "blue", "closed": "red"}.get(t.get("status", ""), "dim")
|
|
111
|
+
best = f"{t['best_score']:.4f}" if t.get("best_score") is not None else "-"
|
|
112
|
+
pool = _fmt_usdc(t.get("pool_balance", 0))
|
|
113
|
+
table.add_row(
|
|
114
|
+
str(i),
|
|
115
|
+
f"[{status_style}]{t.get('status', '?')}[/]",
|
|
116
|
+
f"[green]{pool}[/]",
|
|
117
|
+
best,
|
|
118
|
+
t.get("eval_type", "?"),
|
|
119
|
+
t.get("title", "?"),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
console.print()
|
|
123
|
+
console.print(table)
|
|
124
|
+
console.print()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def print_task_detail(t: dict):
|
|
128
|
+
"""Display a single task's full details."""
|
|
129
|
+
status = t.get("status", "?")
|
|
130
|
+
status_style = {"open": "green", "completed": "blue", "closed": "red"}.get(status, "dim")
|
|
131
|
+
direction = t.get("direction", "maximize")
|
|
132
|
+
arrow = "↓ lower is better" if direction == "minimize" else "↑ higher is better"
|
|
133
|
+
best = f"{t['best_score']:.6f}" if t.get("best_score") is not None else "-"
|
|
134
|
+
baseline = f"{t['baseline_score']:.6f}" if t.get("baseline_score") is not None else "-"
|
|
135
|
+
threshold = t.get("completion_threshold", "?")
|
|
136
|
+
|
|
137
|
+
console.print(f"\n[bold gold1]ψ {t.get('title', '?')}[/]\n")
|
|
138
|
+
console.print(f" ID [dim]{t.get('id', '?')}[/]")
|
|
139
|
+
console.print(f" Status [{status_style}]{status}[/]")
|
|
140
|
+
console.print(f" Eval {t.get('eval_type', '?')} [dim]({arrow})[/]")
|
|
141
|
+
console.print(f" Threshold {threshold}")
|
|
142
|
+
console.print(f" Pool [green]{_fmt_usdc(t.get('pool_balance', 0))}[/]")
|
|
143
|
+
console.print(f" Best Score {best}")
|
|
144
|
+
console.print(f" Baseline {baseline}")
|
|
145
|
+
|
|
146
|
+
desc = t.get("description", "")
|
|
147
|
+
if desc:
|
|
148
|
+
console.print(f"\n[bold]Description[/]\n")
|
|
149
|
+
for line in desc.strip().splitlines():
|
|
150
|
+
console.print(f" {line}")
|
|
151
|
+
|
|
152
|
+
console.print()
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def fmt_rounds_header() -> str:
|
|
156
|
+
"""Header row for the rounds list."""
|
|
157
|
+
return f" [bold dim]{'Round':<9} {'Score':>10} {'Result':<14} {'Earned'}[/]"
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def fmt_round(round_num: int, score: float | None, result: str, earned: int) -> str:
|
|
161
|
+
"""Format one round line as Rich markup."""
|
|
162
|
+
label = f"Round {round_num}"
|
|
163
|
+
score_str = f"{score:.6f}" if score is not None else " -"
|
|
164
|
+
earned_tag = f" +{_fmt_usdc(earned)}" if earned else ""
|
|
165
|
+
if result == "crash":
|
|
166
|
+
return f" [red]{label:<9} {score_str} crash[/]"
|
|
167
|
+
if result in ("error", "eval error"):
|
|
168
|
+
return f" [red]{label:<9} {score_str} {result}[/]"
|
|
169
|
+
if result == "rate limited":
|
|
170
|
+
return f" [yellow]{label:<9} {score_str} rate limited[/]"
|
|
171
|
+
if result == "duplicate":
|
|
172
|
+
return f" [yellow]{label:<9} {score_str} duplicate[/]"
|
|
173
|
+
if result == "COMPLETE":
|
|
174
|
+
return f" [bold green]{label:<9} {score_str} COMPLETE{earned_tag}[/]"
|
|
175
|
+
if result == "improved":
|
|
176
|
+
return f" [green]{label:<9} {score_str} improved{earned_tag}[/]"
|
|
177
|
+
return f" [dim]{label:<9} {score_str} no change[/]"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def print_mining_summary(rounds_data: list[dict], best_score: float | None,
|
|
181
|
+
total_earned: int, round_count: int,
|
|
182
|
+
total_tokens: int | None = 0, total_cost: float | None = 0.0,
|
|
183
|
+
billing_mode: str = "metered"):
|
|
184
|
+
"""Rich Table summary at end of mining."""
|
|
185
|
+
table = Table(title="ψ Mining Summary", title_style="bold gold1", border_style="dim")
|
|
186
|
+
table.add_column("Round", justify="right")
|
|
187
|
+
table.add_column("Score", justify="right")
|
|
188
|
+
table.add_column("Result")
|
|
189
|
+
table.add_column("Earned", justify="right")
|
|
190
|
+
|
|
191
|
+
for r in rounds_data:
|
|
192
|
+
score_str = f"{r['score']:.6f}" if r.get("score") is not None else "error"
|
|
193
|
+
earned_str = f"+{_fmt_usdc(r['earned'])}" if r.get("earned") else "-"
|
|
194
|
+
table.add_row(str(r["round"]), score_str, r["result"], earned_str)
|
|
195
|
+
|
|
196
|
+
console.print()
|
|
197
|
+
console.print(table)
|
|
198
|
+
best_str = f"{best_score:.6f}" if best_score is not None else "N/A"
|
|
199
|
+
token_str, cost_str = _format_usage_summary(total_tokens, total_cost, billing_mode)
|
|
200
|
+
console.print(f" Best: {best_str}")
|
|
201
|
+
console.print(f" Earned: [green]{_fmt_usdc(total_earned)}[/]")
|
|
202
|
+
console.print(f" Tokens: {token_str} Cost: [yellow]{cost_str}[/]")
|
|
203
|
+
console.print(f" Rounds: {round_count}")
|
|
204
|
+
console.print()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def build_mining_panel(
|
|
208
|
+
task_title: str,
|
|
209
|
+
model: str,
|
|
210
|
+
pool: int,
|
|
211
|
+
threshold: float,
|
|
212
|
+
best_score: float | None,
|
|
213
|
+
total_earned: int,
|
|
214
|
+
round_count: int,
|
|
215
|
+
status: str,
|
|
216
|
+
show_details: bool,
|
|
217
|
+
last_detail: dict | None,
|
|
218
|
+
rounds: list[dict] | None = None,
|
|
219
|
+
detail_nav: tuple[int, int] | None = None,
|
|
220
|
+
total_tokens: int | None = 0,
|
|
221
|
+
total_cost: float | None = 0.0,
|
|
222
|
+
billing_mode: str = "metered",
|
|
223
|
+
community_subs: list[dict] | None = None,
|
|
224
|
+
community_total: int = 0,
|
|
225
|
+
my_miner_id: str = "",
|
|
226
|
+
) -> Panel:
|
|
227
|
+
"""Compact mining status panel. All info lives inside the panel."""
|
|
228
|
+
model_short = model.split("/")[-1] if "/" in model else model
|
|
229
|
+
best_str = f"{best_score:.6f}" if best_score is not None else "-"
|
|
230
|
+
token_str, cost_str = _format_usage_summary(total_tokens, total_cost, billing_mode)
|
|
231
|
+
|
|
232
|
+
lines = [
|
|
233
|
+
f"Model: [cyan]{model_short}[/] Pool: [green]{_fmt_usdc(pool)}[/] Threshold: {threshold}",
|
|
234
|
+
f"Best: {best_str} Earned: [green]{_fmt_usdc(total_earned)}[/] Rounds: {round_count}",
|
|
235
|
+
f"Tokens: {token_str} Cost: [yellow]{cost_str}[/]",
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
# Recent rounds history (last 5)
|
|
239
|
+
if rounds:
|
|
240
|
+
lines.append("")
|
|
241
|
+
lines.append(fmt_rounds_header())
|
|
242
|
+
for r in rounds[-5:]:
|
|
243
|
+
lines.append(fmt_round(r["round"], r["score"], r["result"], r["earned"]))
|
|
244
|
+
|
|
245
|
+
# Community leaderboard
|
|
246
|
+
if community_subs:
|
|
247
|
+
lines.append("")
|
|
248
|
+
lines.extend(_fmt_community(community_subs, my_miner_id))
|
|
249
|
+
|
|
250
|
+
if status:
|
|
251
|
+
lines.append(status)
|
|
252
|
+
|
|
253
|
+
# ctrl+o expands full details; left/right navigates rounds
|
|
254
|
+
if show_details and last_detail:
|
|
255
|
+
nav_str = ""
|
|
256
|
+
if detail_nav:
|
|
257
|
+
nav_str = f" [bold cyan]Round {detail_nav[0]}/{detail_nav[1]}[/]"
|
|
258
|
+
lines.append("")
|
|
259
|
+
lines.append(f"[bold]── Details{nav_str} ──[/]")
|
|
260
|
+
result = last_detail.get("result", "")
|
|
261
|
+
result_styles = {"COMPLETE": "bold green", "improved": "green",
|
|
262
|
+
"eval error": "red", "crash": "red", "error": "red",
|
|
263
|
+
"rate limited": "yellow", "duplicate": "yellow", "no change": "dim"}
|
|
264
|
+
rstyle = result_styles.get(result, "dim")
|
|
265
|
+
score_str = f"{last_detail['score']:.6f}" if last_detail.get("score") is not None else "-"
|
|
266
|
+
earned = last_detail.get("earned", 0)
|
|
267
|
+
earned_tag = f" [green]+{_fmt_usdc(earned)}[/]" if earned else ""
|
|
268
|
+
lines.append(f"Result: [{rstyle}]{result}[/] Score: {score_str}{earned_tag}")
|
|
269
|
+
if last_detail.get("error"):
|
|
270
|
+
lines.append(f"Error: [red]{last_detail['error'][:200]}[/]")
|
|
271
|
+
details = last_detail.get("eval_details") or {}
|
|
272
|
+
if details.get("stdout"):
|
|
273
|
+
stdout_text = str(details["stdout"])[:300].replace("\n", "\n ")
|
|
274
|
+
lines.append(f"Output: [dim]{stdout_text}[/]")
|
|
275
|
+
if details.get("stderr"):
|
|
276
|
+
stderr_text = str(details["stderr"])[:200].replace("\n", "\n ")
|
|
277
|
+
lines.append(f"Stderr: [red]{stderr_text}[/]")
|
|
278
|
+
if last_detail.get("thinking"):
|
|
279
|
+
thinking_preview = last_detail["thinking"][:200].replace("\n", "\\n")
|
|
280
|
+
lines.append(f"Think: [dim]{thinking_preview}[/]")
|
|
281
|
+
if last_detail.get("answer"):
|
|
282
|
+
preview = last_detail["answer"][:200].replace("\n", "\\n")
|
|
283
|
+
lines.append(f"Answer: [dim]{preview}[/]")
|
|
284
|
+
|
|
285
|
+
if show_details:
|
|
286
|
+
hint = "ctrl+c stop · ← → browse · ctrl+o close"
|
|
287
|
+
else:
|
|
288
|
+
hint = "ctrl+c stop · ctrl+o details"
|
|
289
|
+
lines.append(f"\n[dim]{hint}[/]")
|
|
290
|
+
|
|
291
|
+
return Panel(
|
|
292
|
+
"\n".join(lines),
|
|
293
|
+
title=f"[bold gold1]ψ {task_title}[/]",
|
|
294
|
+
border_style="gold1",
|
|
295
|
+
padding=(1, 2),
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _progress_bar(progress: float, width: int = 8) -> str:
|
|
300
|
+
"""Render a Unicode progress bar like ███░░░░░."""
|
|
301
|
+
filled = round(progress * width)
|
|
302
|
+
return "█" * filled + "░" * (width - filled)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def print_network(data: dict):
|
|
306
|
+
"""Display global network overview + per-task competition table."""
|
|
307
|
+
console.print(f"\n[bold gold1]ψ Network Overview[/]\n")
|
|
308
|
+
console.print(f" Active miners (24h) {data.get('active_miners_24h', 0)}")
|
|
309
|
+
console.print(f" Submissions/hr {data.get('submissions_1h', 0)}")
|
|
310
|
+
console.print(f" Open reward pool [green]{_fmt_usdc(data.get('total_open_pool', 0))}[/]")
|
|
311
|
+
console.print(f" Total rewards paid [green]{_fmt_usdc(data.get('total_rewards_paid', 0))}[/]")
|
|
312
|
+
|
|
313
|
+
tasks = data.get("tasks", [])
|
|
314
|
+
if not tasks:
|
|
315
|
+
console.print("\n [dim]No open tasks.[/]\n")
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
table = Table(title="Open Task Competition", title_style="bold", border_style="dim")
|
|
319
|
+
table.add_column("Title", max_width=26)
|
|
320
|
+
table.add_column("Pool", justify="right")
|
|
321
|
+
table.add_column("Thrs", justify="right")
|
|
322
|
+
table.add_column("Best", justify="right")
|
|
323
|
+
table.add_column("Progress")
|
|
324
|
+
table.add_column("Miners", justify="right")
|
|
325
|
+
table.add_column("Subs/hr", justify="right")
|
|
326
|
+
|
|
327
|
+
for t in tasks:
|
|
328
|
+
pool = _fmt_usdc(t.get("pool_balance", 0))
|
|
329
|
+
threshold = f"{t.get('completion_threshold', 0):.2f}"
|
|
330
|
+
best = f"{t['best_score']:.4f}" if t.get("best_score") is not None else "-"
|
|
331
|
+
progress = t.get("progress", 0.0)
|
|
332
|
+
bar = _progress_bar(progress)
|
|
333
|
+
pct = f"{progress * 100:.0f}%"
|
|
334
|
+
table.add_row(
|
|
335
|
+
t.get("title", "?"),
|
|
336
|
+
f"[green]{pool}[/]",
|
|
337
|
+
threshold,
|
|
338
|
+
best,
|
|
339
|
+
f"{bar} {pct:>4s}",
|
|
340
|
+
str(t.get("active_miners_24h", 0)),
|
|
341
|
+
str(t.get("submissions_1h", 0)),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
console.print()
|
|
345
|
+
console.print(table)
|
|
346
|
+
console.print()
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def print_stats(user: dict, breakdown: dict, improvements: int):
|
|
350
|
+
"""Mining statistics display with transaction breakdown."""
|
|
351
|
+
pool_rewards = breakdown.get("pool_reward", 0)
|
|
352
|
+
completion_rewards = breakdown.get("completion_reward", 0)
|
|
353
|
+
|
|
354
|
+
console.print(f"\n[bold gold1]ψ Mining Stats[/]\n")
|
|
355
|
+
console.print(f" Wallet [cyan]{user.get('address', '?')}[/]")
|
|
356
|
+
console.print(f" Balance [bold green]{_fmt_usdc(user.get('balance', 0))}[/]")
|
|
357
|
+
console.print()
|
|
358
|
+
console.print(f" [bold]Income[/]")
|
|
359
|
+
console.print(f" Pool rewards [green]+{_fmt_usdc(pool_rewards)}[/]")
|
|
360
|
+
if completion_rewards:
|
|
361
|
+
console.print(f" Completion bonus [green]+{_fmt_usdc(completion_rewards)}[/]")
|
|
362
|
+
console.print()
|
|
363
|
+
console.print(f" Improvements {improvements}")
|
|
364
|
+
console.print()
|
axon/history.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Mining history persistence — JSONL-based per-task history."""
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
|
|
6
|
+
from axon.config import AXON_HOME
|
|
7
|
+
|
|
8
|
+
HISTORY_DIR = AXON_HOME / "history"
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger("axon.history")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def load_history(task_id: str) -> list[dict]:
|
|
14
|
+
"""Read all records from the JSONL file, skipping corrupt lines."""
|
|
15
|
+
path = HISTORY_DIR / f"{task_id}.jsonl"
|
|
16
|
+
if not path.exists():
|
|
17
|
+
return []
|
|
18
|
+
records = []
|
|
19
|
+
for lineno, line in enumerate(path.read_text().splitlines(), 1):
|
|
20
|
+
line = line.strip()
|
|
21
|
+
if not line:
|
|
22
|
+
continue
|
|
23
|
+
try:
|
|
24
|
+
records.append(json.loads(line))
|
|
25
|
+
except json.JSONDecodeError:
|
|
26
|
+
log.warning("Corrupt line %d in %s, skipping", lineno, path)
|
|
27
|
+
return records
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def append_record(task_id: str, record: dict):
|
|
31
|
+
"""Append a single JSON record to the task's history file."""
|
|
32
|
+
HISTORY_DIR.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
path = HISTORY_DIR / f"{task_id}.jsonl"
|
|
34
|
+
with open(path, "a") as f:
|
|
35
|
+
f.write(json.dumps(record, default=str) + "\n")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def merge_server_history(task_id: str, server_subs: list[dict]) -> list[dict]:
|
|
39
|
+
"""Load local history, merge in server submissions (dedup by id), return combined list."""
|
|
40
|
+
local = load_history(task_id)
|
|
41
|
+
local_ids = {r["id"] for r in local if r.get("id")}
|
|
42
|
+
|
|
43
|
+
for sub in server_subs:
|
|
44
|
+
sub_id = sub.get("id")
|
|
45
|
+
if not sub_id or sub_id in local_ids:
|
|
46
|
+
continue
|
|
47
|
+
record = {
|
|
48
|
+
"id": sub_id,
|
|
49
|
+
"score": sub.get("score"),
|
|
50
|
+
"eval_status": sub.get("eval_status"),
|
|
51
|
+
"eval_error": sub.get("eval_error"),
|
|
52
|
+
"eval_details": sub.get("eval_details"),
|
|
53
|
+
"is_improvement": sub.get("is_improvement"),
|
|
54
|
+
"is_completion": sub.get("is_completion", False),
|
|
55
|
+
"reward_earned": sub.get("reward_earned", 0),
|
|
56
|
+
"llm_model_used": sub.get("llm_model_used"),
|
|
57
|
+
"created_at": sub.get("created_at", ""),
|
|
58
|
+
"answer": None,
|
|
59
|
+
"thinking": None,
|
|
60
|
+
"billing_mode": "unknown",
|
|
61
|
+
"tokens": None,
|
|
62
|
+
"cost_usd": None,
|
|
63
|
+
"cost": None,
|
|
64
|
+
"round_num": None,
|
|
65
|
+
"result_label": None,
|
|
66
|
+
"source": "server",
|
|
67
|
+
}
|
|
68
|
+
append_record(task_id, record)
|
|
69
|
+
local.append(record)
|
|
70
|
+
|
|
71
|
+
return local
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def build_local_record(sub: dict, answer: str, thinking: str,
|
|
75
|
+
tokens: int | None, cost: float | None, round_num: int,
|
|
76
|
+
billing_mode: str,
|
|
77
|
+
result_label: str) -> dict:
|
|
78
|
+
"""Build a complete history record from a successful submission."""
|
|
79
|
+
return {
|
|
80
|
+
"id": sub.get("id"),
|
|
81
|
+
"score": sub.get("score"),
|
|
82
|
+
"eval_status": sub.get("eval_status"),
|
|
83
|
+
"eval_error": sub.get("eval_error"),
|
|
84
|
+
"eval_details": sub.get("eval_details"),
|
|
85
|
+
"is_improvement": sub.get("is_improvement"),
|
|
86
|
+
"is_completion": sub.get("is_completion", False),
|
|
87
|
+
"reward_earned": sub.get("reward_earned", 0),
|
|
88
|
+
"llm_model_used": sub.get("llm_model_used"),
|
|
89
|
+
"created_at": sub.get("created_at", ""),
|
|
90
|
+
"answer": answer,
|
|
91
|
+
"thinking": thinking,
|
|
92
|
+
"billing_mode": billing_mode,
|
|
93
|
+
"tokens": tokens,
|
|
94
|
+
"cost_usd": cost,
|
|
95
|
+
"cost": cost,
|
|
96
|
+
"round_num": round_num,
|
|
97
|
+
"result_label": result_label,
|
|
98
|
+
"source": "local",
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def build_error_record(task_id: str, answer: str | None, thinking: str | None,
|
|
103
|
+
tokens: int | None, cost: float | None, round_num: int,
|
|
104
|
+
billing_mode: str,
|
|
105
|
+
result_label: str, error: str) -> dict:
|
|
106
|
+
"""Build a history record for a failed round (no server response)."""
|
|
107
|
+
return {
|
|
108
|
+
"id": None,
|
|
109
|
+
"score": None,
|
|
110
|
+
"eval_status": "error",
|
|
111
|
+
"eval_error": error,
|
|
112
|
+
"eval_details": None,
|
|
113
|
+
"is_improvement": None,
|
|
114
|
+
"is_completion": False,
|
|
115
|
+
"reward_earned": 0,
|
|
116
|
+
"llm_model_used": None,
|
|
117
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
118
|
+
"answer": answer,
|
|
119
|
+
"thinking": thinking,
|
|
120
|
+
"billing_mode": billing_mode,
|
|
121
|
+
"tokens": tokens,
|
|
122
|
+
"cost_usd": cost,
|
|
123
|
+
"cost": cost,
|
|
124
|
+
"round_num": round_num,
|
|
125
|
+
"result_label": result_label,
|
|
126
|
+
"source": "local",
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def delete_history(task_id: str):
|
|
131
|
+
"""Delete a task's history file."""
|
|
132
|
+
path = HISTORY_DIR / f"{task_id}.jsonl"
|
|
133
|
+
path.unlink(missing_ok=True)
|