hte-cli 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hte_cli/cli.py +48 -481
- {hte_cli-0.2.21.dist-info → hte_cli-0.2.23.dist-info}/METADATA +1 -1
- {hte_cli-0.2.21.dist-info → hte_cli-0.2.23.dist-info}/RECORD +5 -5
- {hte_cli-0.2.21.dist-info → hte_cli-0.2.23.dist-info}/WHEEL +0 -0
- {hte_cli-0.2.21.dist-info → hte_cli-0.2.23.dist-info}/entry_points.txt +0 -0
hte_cli/cli.py
CHANGED
|
@@ -194,7 +194,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
194
194
|
elif e.status_code == 404:
|
|
195
195
|
console.print("[red]Session not found. Check the session ID and try again.[/red]")
|
|
196
196
|
elif e.status_code == 400 and "paused" in str(e).lower():
|
|
197
|
-
console.print(
|
|
197
|
+
console.print(
|
|
198
|
+
"[yellow]Session is paused. Please resume from the web UI first.[/yellow]"
|
|
199
|
+
)
|
|
198
200
|
else:
|
|
199
201
|
console.print(f"[red]Error: {e}[/red]")
|
|
200
202
|
sys.exit(1)
|
|
@@ -236,16 +238,16 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
236
238
|
try:
|
|
237
239
|
files_zip = api.get_session_files(session_id)
|
|
238
240
|
console.print(" [green]✓[/green] Task files downloaded")
|
|
239
|
-
except APIError
|
|
240
|
-
console.print(
|
|
241
|
+
except APIError:
|
|
242
|
+
console.print(" [dim]○ No task files (optional)[/dim]")
|
|
241
243
|
files_zip = None
|
|
242
244
|
|
|
243
245
|
with console.status("[dim]Fetching compose configuration...[/dim]"):
|
|
244
246
|
try:
|
|
245
247
|
compose_yaml = api.get_session_compose(session_id)
|
|
246
248
|
console.print(" [green]✓[/green] Docker compose downloaded")
|
|
247
|
-
except APIError
|
|
248
|
-
console.print(
|
|
249
|
+
except APIError:
|
|
250
|
+
console.print(" [dim]○ No compose file (optional)[/dim]")
|
|
249
251
|
compose_yaml = None
|
|
250
252
|
|
|
251
253
|
console.print()
|
|
@@ -258,9 +260,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
258
260
|
f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
|
|
259
261
|
)
|
|
260
262
|
console.print()
|
|
261
|
-
console.print(
|
|
262
|
-
f"Please contact support: {SUPPORT_EMAIL}"
|
|
263
|
-
)
|
|
263
|
+
console.print(f"Please contact support: {SUPPORT_EMAIL}")
|
|
264
264
|
sys.exit(1)
|
|
265
265
|
|
|
266
266
|
# Build assignment dict for runner compatibility
|
|
@@ -280,6 +280,14 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
280
280
|
},
|
|
281
281
|
}
|
|
282
282
|
|
|
283
|
+
# Send session_started event (records CLI version for debugging)
|
|
284
|
+
events.session_started(
|
|
285
|
+
{
|
|
286
|
+
"cli_version": __version__,
|
|
287
|
+
"task_id": session_info["task_id"],
|
|
288
|
+
}
|
|
289
|
+
)
|
|
290
|
+
|
|
283
291
|
# Step 3: Run setup (skip if reconnecting without force)
|
|
284
292
|
setup_start_time = time.monotonic()
|
|
285
293
|
images = []
|
|
@@ -313,7 +321,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
313
321
|
|
|
314
322
|
# Need to pull - show progress
|
|
315
323
|
last_status = ["connecting..."]
|
|
316
|
-
with console.status(
|
|
324
|
+
with console.status(
|
|
325
|
+
f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
|
|
326
|
+
) as status:
|
|
327
|
+
|
|
317
328
|
def show_progress(image: str, line: str):
|
|
318
329
|
# Show docker output directly - includes MB progress from PTY
|
|
319
330
|
# Lines look like: "abc123: Downloading 360.9MB/4.075GB"
|
|
@@ -325,7 +336,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
325
336
|
display = f"{layer_id}: {layer_status}"
|
|
326
337
|
if display != last_status[0]:
|
|
327
338
|
last_status[0] = display
|
|
328
|
-
status.update(
|
|
339
|
+
status.update(
|
|
340
|
+
f"[yellow]↓[/yellow] {short_name} [dim]{display}[/dim]"
|
|
341
|
+
)
|
|
329
342
|
|
|
330
343
|
success = pull_image_with_progress(img, on_progress=show_progress)
|
|
331
344
|
|
|
@@ -370,7 +383,13 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
370
383
|
console.print()
|
|
371
384
|
|
|
372
385
|
# Step 3: Run the task using TaskRunner
|
|
373
|
-
step_num =
|
|
386
|
+
step_num = (
|
|
387
|
+
"3"
|
|
388
|
+
if (not is_reconnect or force_setup) and images
|
|
389
|
+
else "2"
|
|
390
|
+
if (not is_reconnect or force_setup)
|
|
391
|
+
else "1"
|
|
392
|
+
)
|
|
374
393
|
console.print(f"[bold]Step {step_num}:[/bold] Starting task environment...")
|
|
375
394
|
console.print("[dim]Launching Docker containers...[/dim]")
|
|
376
395
|
console.print()
|
|
@@ -391,7 +410,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
391
410
|
except KeyboardInterrupt:
|
|
392
411
|
events.docker_stopped(exit_code=130)
|
|
393
412
|
console.print()
|
|
394
|
-
console.print(
|
|
413
|
+
console.print(
|
|
414
|
+
"[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
|
|
415
|
+
)
|
|
395
416
|
sys.exit(0)
|
|
396
417
|
except Exception as e:
|
|
397
418
|
events.docker_stopped(exit_code=1)
|
|
@@ -415,10 +436,12 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
415
436
|
try:
|
|
416
437
|
from io import BytesIO
|
|
417
438
|
from zipfile import ZipFile
|
|
439
|
+
|
|
418
440
|
with ZipFile(BytesIO(files_zip)) as zf:
|
|
419
441
|
if "difficulty_levels.json" in zf.namelist():
|
|
420
442
|
with zf.open("difficulty_levels.json") as f:
|
|
421
443
|
import json
|
|
444
|
+
|
|
422
445
|
difficulty_info = json.load(f)
|
|
423
446
|
agent_id = difficulty_info.get("agent_id")
|
|
424
447
|
except Exception:
|
|
@@ -429,13 +452,21 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
429
452
|
console.print(f"Answer: {result.answer}")
|
|
430
453
|
console.print(f"Time: {result.time_seconds:.1f}s")
|
|
431
454
|
|
|
455
|
+
# Track upload size and timing
|
|
456
|
+
upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
|
|
457
|
+
upload_size_kb = upload_size_bytes / 1024
|
|
458
|
+
|
|
459
|
+
events.upload_started(size_bytes=upload_size_bytes)
|
|
460
|
+
upload_start_time = time.monotonic()
|
|
461
|
+
|
|
432
462
|
# Upload to server
|
|
433
463
|
with Progress(
|
|
434
464
|
SpinnerColumn(),
|
|
435
465
|
TextColumn("[progress.description]{task.description}"),
|
|
436
466
|
console=console,
|
|
437
467
|
) as progress:
|
|
438
|
-
|
|
468
|
+
size_str = f" ({upload_size_kb:.0f} KB)" if upload_size_kb > 0 else ""
|
|
469
|
+
progress.add_task(f"Uploading result{size_str}...", total=None)
|
|
439
470
|
try:
|
|
440
471
|
upload_result = api.upload_result(
|
|
441
472
|
session_id=session_id,
|
|
@@ -450,6 +481,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
450
481
|
console.print(f"[red]Failed to upload result: {e}[/red]")
|
|
451
482
|
sys.exit(1)
|
|
452
483
|
|
|
484
|
+
# Record upload completion
|
|
485
|
+
upload_duration = time.monotonic() - upload_start_time
|
|
486
|
+
events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
|
|
487
|
+
|
|
453
488
|
if upload_result.get("score") is not None:
|
|
454
489
|
console.print(f"Score: {upload_result['score']}")
|
|
455
490
|
|
|
@@ -548,474 +583,6 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
548
583
|
sys.exit(1)
|
|
549
584
|
|
|
550
585
|
|
|
551
|
-
# Keep the old implementation as _tasks_run_legacy for testing if needed
|
|
552
|
-
def _tasks_run_legacy(ctx, task_id: str | None):
|
|
553
|
-
"""Legacy implementation of tasks run (for testing only)."""
|
|
554
|
-
config: Config = ctx.obj["config"]
|
|
555
|
-
|
|
556
|
-
if not config.is_authenticated():
|
|
557
|
-
console.print("[red]Not logged in. Run: hte-cli auth login[/red]")
|
|
558
|
-
sys.exit(1)
|
|
559
|
-
|
|
560
|
-
# Check Docker and Compose version
|
|
561
|
-
docker_ok, docker_error = _check_docker()
|
|
562
|
-
if not docker_ok:
|
|
563
|
-
console.print(f"[red]{docker_error}[/red]")
|
|
564
|
-
sys.exit(1)
|
|
565
|
-
|
|
566
|
-
api = APIClient(config)
|
|
567
|
-
|
|
568
|
-
# Get assignments
|
|
569
|
-
with Progress(
|
|
570
|
-
SpinnerColumn(),
|
|
571
|
-
TextColumn("[progress.description]{task.description}"),
|
|
572
|
-
console=console,
|
|
573
|
-
) as progress:
|
|
574
|
-
progress.add_task("Fetching assignments...", total=None)
|
|
575
|
-
try:
|
|
576
|
-
assignments = api.get_assignments()
|
|
577
|
-
except APIError as e:
|
|
578
|
-
console.print(f"[red]Error: {e}[/red]")
|
|
579
|
-
sys.exit(1)
|
|
580
|
-
|
|
581
|
-
if not assignments:
|
|
582
|
-
console.print("[yellow]No pending assignments[/yellow]")
|
|
583
|
-
return
|
|
584
|
-
|
|
585
|
-
# Find the assignment to run
|
|
586
|
-
assignment = None
|
|
587
|
-
if task_id:
|
|
588
|
-
for a in assignments:
|
|
589
|
-
if a["task_id"] == task_id:
|
|
590
|
-
assignment = a
|
|
591
|
-
break
|
|
592
|
-
if not assignment:
|
|
593
|
-
console.print(f"[red]Task not found in your assignments: {task_id}[/red]")
|
|
594
|
-
sys.exit(1)
|
|
595
|
-
else:
|
|
596
|
-
# Take highest priority (first in list, already sorted by server)
|
|
597
|
-
assignment = assignments[0]
|
|
598
|
-
|
|
599
|
-
console.print()
|
|
600
|
-
console.print(
|
|
601
|
-
Panel(
|
|
602
|
-
f"[bold]Task:[/bold] {assignment['task_id']}\n"
|
|
603
|
-
f"[bold]Benchmark:[/bold] {assignment['benchmark']}\n"
|
|
604
|
-
f"[bold]Mode:[/bold] {assignment['mode']}",
|
|
605
|
-
title="Starting Task",
|
|
606
|
-
)
|
|
607
|
-
)
|
|
608
|
-
console.print()
|
|
609
|
-
|
|
610
|
-
# Import runner and events
|
|
611
|
-
from hte_cli.runner import TaskRunner
|
|
612
|
-
from hte_cli.events import EventStreamer
|
|
613
|
-
|
|
614
|
-
# Step 1: Start session (or resume existing)
|
|
615
|
-
session_id = assignment.get("session_id")
|
|
616
|
-
if not session_id:
|
|
617
|
-
with Progress(
|
|
618
|
-
SpinnerColumn(),
|
|
619
|
-
TextColumn("[progress.description]{task.description}"),
|
|
620
|
-
console=console,
|
|
621
|
-
) as progress:
|
|
622
|
-
progress.add_task("Starting session...", total=None)
|
|
623
|
-
try:
|
|
624
|
-
session_info = api.start_session(assignment["assignment_id"])
|
|
625
|
-
session_id = session_info["session_id"]
|
|
626
|
-
except APIError as e:
|
|
627
|
-
console.print(f"[red]Failed to start session: {e}[/red]")
|
|
628
|
-
sys.exit(1)
|
|
629
|
-
else:
|
|
630
|
-
console.print(f"[yellow]Resuming session: {session_id}[/yellow]")
|
|
631
|
-
|
|
632
|
-
# Create event streamer
|
|
633
|
-
events = EventStreamer(api, session_id)
|
|
634
|
-
|
|
635
|
-
# Step 2: Download task files and compose
|
|
636
|
-
files_zip = None
|
|
637
|
-
compose_yaml = None
|
|
638
|
-
|
|
639
|
-
with Progress(
|
|
640
|
-
SpinnerColumn(),
|
|
641
|
-
TextColumn("[progress.description]{task.description}"),
|
|
642
|
-
console=console,
|
|
643
|
-
) as progress:
|
|
644
|
-
# Download files
|
|
645
|
-
task_id_display = progress.add_task("Downloading task files...", total=None)
|
|
646
|
-
try:
|
|
647
|
-
files_zip = api.get_assignment_files(assignment["assignment_id"])
|
|
648
|
-
except APIError as e:
|
|
649
|
-
# Files are optional for some benchmarks
|
|
650
|
-
console.print(f"[dim]Note: {e}[/dim]")
|
|
651
|
-
|
|
652
|
-
# Download compose
|
|
653
|
-
progress.update(task_id_display, description="Downloading compose file...")
|
|
654
|
-
try:
|
|
655
|
-
compose_yaml = api.get_assignment_compose(assignment["assignment_id"])
|
|
656
|
-
except APIError as e:
|
|
657
|
-
# Compose is optional for simple benchmarks
|
|
658
|
-
console.print(f"[dim]Note: {e}[/dim]")
|
|
659
|
-
|
|
660
|
-
# Validate compose for sandbox-required benchmarks
|
|
661
|
-
benchmark = assignment.get("benchmark", "").lower()
|
|
662
|
-
SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
|
|
663
|
-
if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml:
|
|
664
|
-
console.print(
|
|
665
|
-
f"[red]Error: This {benchmark} task requires a Docker sandbox but none is available.[/red]"
|
|
666
|
-
)
|
|
667
|
-
if benchmark == "cvebench":
|
|
668
|
-
console.print(
|
|
669
|
-
"[yellow]This CVE may have infrastructure issues preventing Docker support.[/yellow]"
|
|
670
|
-
)
|
|
671
|
-
console.print("[dim]Contact support if you believe this is an error.[/dim]")
|
|
672
|
-
sys.exit(1)
|
|
673
|
-
|
|
674
|
-
# Step 3: Send session_started event
|
|
675
|
-
events.session_started(
|
|
676
|
-
{
|
|
677
|
-
"cli_version": __version__,
|
|
678
|
-
"task_id": assignment["task_id"],
|
|
679
|
-
}
|
|
680
|
-
)
|
|
681
|
-
|
|
682
|
-
# Step 4: Show pre-task message (benchmark-specific)
|
|
683
|
-
single_shot_benchmarks = {"nl2bash", "cybashbench"}
|
|
684
|
-
|
|
685
|
-
console.print()
|
|
686
|
-
if benchmark in single_shot_benchmarks:
|
|
687
|
-
# Single-shot benchmarks - no task score available
|
|
688
|
-
console.print(
|
|
689
|
-
Panel(
|
|
690
|
-
"[bold]Instructions[/bold]\n\n"
|
|
691
|
-
"You are about to enter the task environment.\n\n"
|
|
692
|
-
"[yellow]IMPORTANT: This is a SINGLE-SHOT task.[/yellow]\n"
|
|
693
|
-
"[yellow]You get ONE submission attempt - no retries![/yellow]\n\n"
|
|
694
|
-
"Commands available:\n"
|
|
695
|
-
" [cyan]task status[/cyan] - Show elapsed time\n"
|
|
696
|
-
' [cyan]task submit "answer"[/cyan] - Submit your FINAL answer (ends task)\n'
|
|
697
|
-
" [cyan]task quit[/cyan] - Quit without submitting\n"
|
|
698
|
-
' [cyan]task note "text"[/cyan] - Record observations\n',
|
|
699
|
-
title="Task Environment",
|
|
700
|
-
)
|
|
701
|
-
)
|
|
702
|
-
else:
|
|
703
|
-
# CTF/sandbox benchmarks - task score available
|
|
704
|
-
console.print(
|
|
705
|
-
Panel(
|
|
706
|
-
"[bold]Instructions[/bold]\n\n"
|
|
707
|
-
"You are about to enter the task environment.\n\n"
|
|
708
|
-
"Commands available:\n"
|
|
709
|
-
" [cyan]task status[/cyan] - Show elapsed time\n"
|
|
710
|
-
' [cyan]task score "answer"[/cyan] - CHECK if correct (does NOT end task)\n'
|
|
711
|
-
' [cyan]task submit "answer"[/cyan] - Submit FINAL answer (ends task)\n'
|
|
712
|
-
" [cyan]task quit[/cyan] - Quit without submitting\n"
|
|
713
|
-
' [cyan]task note "text"[/cyan] - Record observations\n\n'
|
|
714
|
-
"[green]TIP: Use 'task score' to verify before submitting![/green]\n",
|
|
715
|
-
title="Task Environment",
|
|
716
|
-
)
|
|
717
|
-
)
|
|
718
|
-
console.print()
|
|
719
|
-
|
|
720
|
-
if not click.confirm("Ready to start?"):
|
|
721
|
-
console.print("[yellow]Cancelled[/yellow]")
|
|
722
|
-
return
|
|
723
|
-
|
|
724
|
-
# Step 5: Pre-pull Docker images with progress
|
|
725
|
-
from hte_cli.image_utils import extract_images_from_compose
|
|
726
|
-
import re
|
|
727
|
-
import time
|
|
728
|
-
|
|
729
|
-
setup_start_time = time.monotonic()
|
|
730
|
-
images: list[str] = []
|
|
731
|
-
results: list[tuple[str, bool, str]] = []
|
|
732
|
-
|
|
733
|
-
if compose_yaml:
|
|
734
|
-
images = extract_images_from_compose(compose_yaml)
|
|
735
|
-
if images:
|
|
736
|
-
events.setup_started(images)
|
|
737
|
-
console.print()
|
|
738
|
-
console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
|
|
739
|
-
|
|
740
|
-
# Track layer progress per image: {layer_id: (status, downloaded_mb, total_mb)}
|
|
741
|
-
image_layers: dict[str, dict[str, tuple[str, float, float]]] = {}
|
|
742
|
-
|
|
743
|
-
def parse_size(size_str: str) -> float:
|
|
744
|
-
"""Parse size string like '1.2MB' or '500kB' to MB."""
|
|
745
|
-
size_str = size_str.strip().upper()
|
|
746
|
-
if "GB" in size_str:
|
|
747
|
-
return float(size_str.replace("GB", "").strip()) * 1024
|
|
748
|
-
elif "MB" in size_str:
|
|
749
|
-
return float(size_str.replace("MB", "").strip())
|
|
750
|
-
elif "KB" in size_str:
|
|
751
|
-
return float(size_str.replace("KB", "").strip()) / 1024
|
|
752
|
-
elif "B" in size_str:
|
|
753
|
-
return float(size_str.replace("B", "").strip()) / (1024 * 1024)
|
|
754
|
-
return 0
|
|
755
|
-
|
|
756
|
-
def parse_docker_line(line: str) -> tuple[str | None, str, float, float]:
|
|
757
|
-
"""Parse Docker pull output to extract layer ID, status, and sizes.
|
|
758
|
-
|
|
759
|
-
Returns: (layer_id, status, downloaded_mb, total_mb)
|
|
760
|
-
"""
|
|
761
|
-
# Format: "79f742de2855: Downloading [==>] 1.2MB/50MB"
|
|
762
|
-
# Or: "79f742de2855: Pull complete"
|
|
763
|
-
match = re.match(r"([a-f0-9]+): (.+)", line)
|
|
764
|
-
if not match:
|
|
765
|
-
return None, "", 0, 0
|
|
766
|
-
|
|
767
|
-
layer_id = match.group(1)
|
|
768
|
-
status_part = match.group(2)
|
|
769
|
-
|
|
770
|
-
# Try to extract size info from "Downloading [==>] 1.2MB/50MB"
|
|
771
|
-
size_match = re.search(r"([\d.]+[kKmMgG]?[bB]?)/([\d.]+[kKmMgG]?[bB])", status_part)
|
|
772
|
-
if size_match:
|
|
773
|
-
downloaded = parse_size(size_match.group(1))
|
|
774
|
-
total = parse_size(size_match.group(2))
|
|
775
|
-
return layer_id, status_part, downloaded, total
|
|
776
|
-
|
|
777
|
-
return layer_id, status_part, 0, 0
|
|
778
|
-
|
|
779
|
-
def get_progress_summary(image: str) -> str:
|
|
780
|
-
"""Get a human-readable progress summary for an image with MB counts."""
|
|
781
|
-
if image not in image_layers or not image_layers[image]:
|
|
782
|
-
return "connecting..."
|
|
783
|
-
|
|
784
|
-
layers = image_layers[image]
|
|
785
|
-
total_layers = len(layers)
|
|
786
|
-
|
|
787
|
-
# Count layers in different states
|
|
788
|
-
complete = 0
|
|
789
|
-
downloading = 0
|
|
790
|
-
waiting = 0
|
|
791
|
-
total_downloaded_mb = 0
|
|
792
|
-
total_size_mb = 0
|
|
793
|
-
|
|
794
|
-
for status, downloaded, total in layers.values():
|
|
795
|
-
status_lower = status.lower()
|
|
796
|
-
if "complete" in status_lower:
|
|
797
|
-
complete += 1
|
|
798
|
-
total_downloaded_mb += total
|
|
799
|
-
total_size_mb += total
|
|
800
|
-
elif "downloading" in status_lower:
|
|
801
|
-
downloading += 1
|
|
802
|
-
total_downloaded_mb += downloaded
|
|
803
|
-
total_size_mb += total
|
|
804
|
-
elif "waiting" in status_lower:
|
|
805
|
-
waiting += 1
|
|
806
|
-
|
|
807
|
-
# Choose the most informative display
|
|
808
|
-
if complete == total_layers and total_layers > 0:
|
|
809
|
-
if total_size_mb > 0:
|
|
810
|
-
return f"done ({total_size_mb:.0f}MB)"
|
|
811
|
-
return f"done ({total_layers} layers)"
|
|
812
|
-
elif total_size_mb > 0:
|
|
813
|
-
# Show MB progress when available
|
|
814
|
-
pct = int(100 * total_downloaded_mb / total_size_mb) if total_size_mb > 0 else 0
|
|
815
|
-
return f"{total_downloaded_mb:.0f}/{total_size_mb:.0f}MB ({pct}%)"
|
|
816
|
-
elif downloading > 0:
|
|
817
|
-
return f"downloading ({complete}/{total_layers} done)"
|
|
818
|
-
elif complete > 0:
|
|
819
|
-
return f"extracting ({complete}/{total_layers} done)"
|
|
820
|
-
elif waiting > 0:
|
|
821
|
-
return f"queued ({total_layers} layers)"
|
|
822
|
-
else:
|
|
823
|
-
return f"preparing ({total_layers} layers)"
|
|
824
|
-
|
|
825
|
-
def on_image_progress(image: str, line: str):
|
|
826
|
-
"""Track layer-level progress with size info."""
|
|
827
|
-
if image not in image_layers:
|
|
828
|
-
image_layers[image] = {}
|
|
829
|
-
|
|
830
|
-
layer_id, status, downloaded, total = parse_docker_line(line)
|
|
831
|
-
if layer_id:
|
|
832
|
-
image_layers[image][layer_id] = (status, downloaded, total)
|
|
833
|
-
|
|
834
|
-
# Process images sequentially with clear output
|
|
835
|
-
results = []
|
|
836
|
-
for idx, img in enumerate(images, 1):
|
|
837
|
-
short_name = img.split("/")[-1] if "/" in img else img
|
|
838
|
-
|
|
839
|
-
# Check if cached first
|
|
840
|
-
from hte_cli.image_utils import check_image_exists_locally, pull_image_with_progress
|
|
841
|
-
|
|
842
|
-
if check_image_exists_locally(img):
|
|
843
|
-
console.print(f" [green]✓[/green] {short_name} [dim](cached)[/dim]")
|
|
844
|
-
results.append((img, True, "cached"))
|
|
845
|
-
continue
|
|
846
|
-
|
|
847
|
-
# Need to pull - use Rich Status for live updates
|
|
848
|
-
image_layers[img] = {}
|
|
849
|
-
|
|
850
|
-
with console.status(
|
|
851
|
-
f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
|
|
852
|
-
) as status:
|
|
853
|
-
|
|
854
|
-
def show_progress(image: str, line: str):
|
|
855
|
-
on_image_progress(image, line)
|
|
856
|
-
summary = get_progress_summary(image)
|
|
857
|
-
status.update(f"[yellow]↓[/yellow] {short_name} [dim]{summary}[/dim]")
|
|
858
|
-
|
|
859
|
-
success = pull_image_with_progress(img, on_progress=show_progress)
|
|
860
|
-
|
|
861
|
-
# Final status (printed after status context exits)
|
|
862
|
-
if success:
|
|
863
|
-
console.print(f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]")
|
|
864
|
-
results.append((img, True, "pulled"))
|
|
865
|
-
else:
|
|
866
|
-
console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
|
|
867
|
-
results.append((img, False, "failed"))
|
|
868
|
-
|
|
869
|
-
failed_count = sum(1 for _, ok, _ in results if not ok)
|
|
870
|
-
if failed_count > 0:
|
|
871
|
-
console.print(
|
|
872
|
-
f"[yellow]Warning: {failed_count} image(s) failed to pull. "
|
|
873
|
-
"Task may fail to start.[/yellow]"
|
|
874
|
-
)
|
|
875
|
-
console.print()
|
|
876
|
-
|
|
877
|
-
# Record image pull timing
|
|
878
|
-
if images:
|
|
879
|
-
pull_duration = time.monotonic() - setup_start_time
|
|
880
|
-
pulled = [img for img, ok, status in results if ok and status == "pulled"]
|
|
881
|
-
cached = [img for img, ok, status in results if ok and status == "cached"]
|
|
882
|
-
failed = [img for img, ok, status in results if not ok]
|
|
883
|
-
events.image_pull_completed(
|
|
884
|
-
duration_seconds=pull_duration,
|
|
885
|
-
pulled=pulled,
|
|
886
|
-
cached=cached,
|
|
887
|
-
failed=failed,
|
|
888
|
-
)
|
|
889
|
-
|
|
890
|
-
# Step 6: Run Inspect's human_cli
|
|
891
|
-
runner = TaskRunner()
|
|
892
|
-
console.print("[bold]Starting task environment...[/bold]")
|
|
893
|
-
console.print("[dim]Launching Docker containers...[/dim]")
|
|
894
|
-
console.print()
|
|
895
|
-
|
|
896
|
-
events.docker_started()
|
|
897
|
-
|
|
898
|
-
# Record total setup time (image pulls + compose up)
|
|
899
|
-
total_setup = time.monotonic() - setup_start_time
|
|
900
|
-
events.setup_completed(total_seconds=total_setup)
|
|
901
|
-
|
|
902
|
-
eval_log_bytes = None
|
|
903
|
-
local_eval_path = None
|
|
904
|
-
try:
|
|
905
|
-
result = runner.run_from_assignment(
|
|
906
|
-
assignment=assignment,
|
|
907
|
-
compose_yaml=compose_yaml,
|
|
908
|
-
files_zip=files_zip,
|
|
909
|
-
)
|
|
910
|
-
# Read eval log BEFORE cleanup (cleanup deletes the temp directory)
|
|
911
|
-
if result.eval_log_path and result.eval_log_path.exists():
|
|
912
|
-
eval_log_bytes = result.eval_log_path.read_bytes()
|
|
913
|
-
|
|
914
|
-
# Save local copy for safety
|
|
915
|
-
eval_logs_dir = get_eval_logs_dir()
|
|
916
|
-
eval_logs_dir.mkdir(parents=True, exist_ok=True)
|
|
917
|
-
local_eval_path = eval_logs_dir / result.eval_log_path.name
|
|
918
|
-
local_eval_path.write_bytes(eval_log_bytes)
|
|
919
|
-
except Exception as e:
|
|
920
|
-
events.docker_stopped(exit_code=1)
|
|
921
|
-
console.print(f"[red]Task execution failed: {e}[/red]")
|
|
922
|
-
sys.exit(1)
|
|
923
|
-
finally:
|
|
924
|
-
runner.cleanup()
|
|
925
|
-
|
|
926
|
-
events.docker_stopped(exit_code=0)
|
|
927
|
-
|
|
928
|
-
# Step 6: Show post-task summary
|
|
929
|
-
console.print()
|
|
930
|
-
console.print(
|
|
931
|
-
Panel(
|
|
932
|
-
f"[bold]Time spent:[/bold] {result.time_seconds / 60:.1f} minutes\n"
|
|
933
|
-
f"[bold]Answer:[/bold] {result.answer or '(none)'}\n"
|
|
934
|
-
f"[bold]Score:[/bold] {result.score if result.score is not None else 'pending'}",
|
|
935
|
-
title="Task Complete",
|
|
936
|
-
)
|
|
937
|
-
)
|
|
938
|
-
|
|
939
|
-
# Defensive check: don't upload if task didn't actually run
|
|
940
|
-
# (catches edge cases where runner returned without proper error)
|
|
941
|
-
if result.time_seconds == 0.0 and result.answer is None:
|
|
942
|
-
console.print()
|
|
943
|
-
console.print("[red]Task did not complete successfully (0 time, no answer).[/red]")
|
|
944
|
-
console.print("[yellow]Session preserved - run 'hte-cli tasks run' to retry.[/yellow]")
|
|
945
|
-
sys.exit(1)
|
|
946
|
-
|
|
947
|
-
# Step 7: Upload result
|
|
948
|
-
events.session_completed(
|
|
949
|
-
elapsed_seconds=result.time_seconds,
|
|
950
|
-
answer=result.answer,
|
|
951
|
-
)
|
|
952
|
-
|
|
953
|
-
# Extract agent_id from task files for CyberGym post-hoc verification
|
|
954
|
-
agent_id = None
|
|
955
|
-
if files_zip:
|
|
956
|
-
try:
|
|
957
|
-
with ZipFile(BytesIO(files_zip)) as zf:
|
|
958
|
-
if "difficulty_levels.json" in zf.namelist():
|
|
959
|
-
with zf.open("difficulty_levels.json") as f:
|
|
960
|
-
difficulty_info = json.load(f)
|
|
961
|
-
agent_id = difficulty_info.get("agent_id")
|
|
962
|
-
except Exception:
|
|
963
|
-
pass # Not a CyberGym task or malformed zip
|
|
964
|
-
|
|
965
|
-
# Show upload size info and track timing
|
|
966
|
-
upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
|
|
967
|
-
upload_size_kb = upload_size_bytes / 1024
|
|
968
|
-
if upload_size_kb / 1024 > 50:
|
|
969
|
-
console.print(f"[yellow]Warning: Large eval log ({upload_size_kb / 1024:.1f} MB)[/yellow]")
|
|
970
|
-
|
|
971
|
-
events.upload_started(size_bytes=upload_size_bytes)
|
|
972
|
-
upload_start_time = time.monotonic()
|
|
973
|
-
|
|
974
|
-
with Progress(
|
|
975
|
-
SpinnerColumn(),
|
|
976
|
-
TextColumn("[progress.description]{task.description}"),
|
|
977
|
-
console=console,
|
|
978
|
-
) as progress:
|
|
979
|
-
size_str = f" ({upload_size_kb:.0f} KB)" if upload_size_kb > 0 else ""
|
|
980
|
-
progress.add_task(f"Uploading result{size_str}...", total=None)
|
|
981
|
-
|
|
982
|
-
try:
|
|
983
|
-
upload_result = api.upload_result(
|
|
984
|
-
session_id=session_id,
|
|
985
|
-
answer=result.answer or "",
|
|
986
|
-
client_active_seconds=result.time_seconds,
|
|
987
|
-
eval_log_bytes=eval_log_bytes,
|
|
988
|
-
score=result.score,
|
|
989
|
-
score_binarized=result.score_binarized,
|
|
990
|
-
agent_id=agent_id,
|
|
991
|
-
)
|
|
992
|
-
except APIError as e:
|
|
993
|
-
console.print(f"[red]Failed to upload result: {e}[/red]")
|
|
994
|
-
if local_eval_path:
|
|
995
|
-
console.print(f"[yellow]Eval log saved locally: {local_eval_path}[/yellow]")
|
|
996
|
-
console.print("[yellow]Your result was saved locally but not uploaded.[/yellow]")
|
|
997
|
-
sys.exit(1)
|
|
998
|
-
|
|
999
|
-
# Record upload completion
|
|
1000
|
-
upload_duration = time.monotonic() - upload_start_time
|
|
1001
|
-
events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
|
|
1002
|
-
|
|
1003
|
-
console.print()
|
|
1004
|
-
console.print("[green]Result uploaded successfully![/green]")
|
|
1005
|
-
|
|
1006
|
-
# Show local eval log path (quote paths with spaces for easy copy-paste)
|
|
1007
|
-
if local_eval_path:
|
|
1008
|
-
path_str = str(local_eval_path)
|
|
1009
|
-
if " " in path_str:
|
|
1010
|
-
path_str = f'"{path_str}"'
|
|
1011
|
-
console.print(f"[dim]Eval log: {path_str}[/dim]")
|
|
1012
|
-
|
|
1013
|
-
# Show next task if available
|
|
1014
|
-
if upload_result.get("next_assignment_id"):
|
|
1015
|
-
console.print()
|
|
1016
|
-
console.print("Run [bold]hte-cli tasks run[/bold] for the next task.")
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
586
|
@tasks.command("pull-images")
|
|
1020
587
|
@click.option("--count", "-n", default=5, help="Number of upcoming tasks to pull images for")
|
|
1021
588
|
@click.pass_context
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
|
|
2
2
|
hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
|
|
3
3
|
hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
|
|
4
|
-
hte_cli/cli.py,sha256=
|
|
4
|
+
hte_cli/cli.py,sha256=YCsaW1rAzOAusgi1qN9YWJWr68jpctTNG22JluEcCsQ,24416
|
|
5
5
|
hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
|
|
6
6
|
hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
|
|
7
7
|
hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
|
|
@@ -9,7 +9,7 @@ hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
|
|
|
9
9
|
hte_cli/runner.py,sha256=SWl9FF4X3e9eBbZyL0ujhmmSL5OK8J6st-Ty0jD5AWM,14550
|
|
10
10
|
hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
|
|
11
11
|
hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
|
|
12
|
-
hte_cli-0.2.
|
|
13
|
-
hte_cli-0.2.
|
|
14
|
-
hte_cli-0.2.
|
|
15
|
-
hte_cli-0.2.
|
|
12
|
+
hte_cli-0.2.23.dist-info/METADATA,sha256=cNU9v5zaqLtSnSsgHC7SxiYOysMg00exWz2iSHp2n6w,3820
|
|
13
|
+
hte_cli-0.2.23.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
14
|
+
hte_cli-0.2.23.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
|
|
15
|
+
hte_cli-0.2.23.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|