hte-cli 0.2.29__tar.gz → 0.2.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hte_cli-0.2.29 → hte_cli-0.2.31}/PKG-INFO +1 -1
- {hte_cli-0.2.29 → hte_cli-0.2.31}/pyproject.toml +1 -1
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/api_client.py +24 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/cli.py +114 -20
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/image_utils.py +1 -4
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/unit/test_image_utils.py +3 -1
- {hte_cli-0.2.29 → hte_cli-0.2.31}/uv.lock +1 -1
- {hte_cli-0.2.29 → hte_cli-0.2.31}/.gitignore +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/README.md +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/__init__.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/__main__.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/config.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/errors.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/events.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/runner.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/scorers.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/version_check.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/__init__.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/__init__.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/automated_runner.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/conftest.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/e2e_test.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/test_benchmark_flows.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/test_eval_logs.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/test_infrastructure.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/test_runtime_imports.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/test_session_lifecycle.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/e2e/verify_docker_deps.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/unit/__init__.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/unit/conftest.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/unit/test_runner.py +0 -0
- {hte_cli-0.2.29 → hte_cli-0.2.31}/tests/unit/test_scorers.py +0 -0
|
@@ -265,3 +265,27 @@ class APIClient:
|
|
|
265
265
|
json=payload,
|
|
266
266
|
timeout=UPLOAD_TIMEOUT,
|
|
267
267
|
)
|
|
268
|
+
|
|
269
|
+
def upload_partial_log(
|
|
270
|
+
self,
|
|
271
|
+
session_id: str,
|
|
272
|
+
eval_log_bytes: bytes,
|
|
273
|
+
) -> dict:
|
|
274
|
+
"""Upload partial eval log for interrupted sessions.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
session_id: The session ID
|
|
278
|
+
eval_log_bytes: Partial eval log content
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Response dict with status and log_path
|
|
282
|
+
"""
|
|
283
|
+
payload = {
|
|
284
|
+
"eval_log_base64": base64.b64encode(eval_log_bytes).decode("ascii"),
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return self.post(
|
|
288
|
+
f"/sessions/{session_id}/partial-log",
|
|
289
|
+
json=payload,
|
|
290
|
+
timeout=UPLOAD_TIMEOUT,
|
|
291
|
+
)
|
|
@@ -23,6 +23,42 @@ console = Console()
|
|
|
23
23
|
SUPPORT_EMAIL = "jacktpayne51@gmail.com"
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
def _find_eval_log_bytes(runner) -> bytes | None:
|
|
27
|
+
"""Find and read eval log bytes from runner's work directory.
|
|
28
|
+
|
|
29
|
+
Used for interrupted sessions to upload partial logs.
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
# Look for eval logs in the work directory
|
|
33
|
+
if not runner.work_dir.exists():
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
# Find any .eval files in the work directory tree
|
|
37
|
+
eval_files = list(runner.work_dir.rglob("*.eval"))
|
|
38
|
+
if not eval_files:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
# Get the most recent one
|
|
42
|
+
eval_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
|
|
43
|
+
return eval_files[0].read_bytes()
|
|
44
|
+
except Exception:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _upload_partial_log(api: APIClient, session_id: str, eval_log_bytes: bytes, console: Console) -> None:
|
|
49
|
+
"""Upload partial eval log for interrupted session.
|
|
50
|
+
|
|
51
|
+
Best-effort: silently handles failures to not block exit.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
size_kb = len(eval_log_bytes) / 1024
|
|
55
|
+
console.print(f"[dim]Uploading partial eval log ({size_kb:.0f} KB)...[/dim]")
|
|
56
|
+
api.upload_partial_log(session_id, eval_log_bytes)
|
|
57
|
+
console.print("[dim]Partial eval log uploaded.[/dim]")
|
|
58
|
+
except Exception as e:
|
|
59
|
+
console.print(f"[dim]Could not upload partial log: {e}[/dim]")
|
|
60
|
+
|
|
61
|
+
|
|
26
62
|
@click.group()
|
|
27
63
|
@click.version_option(__version__, prog_name="hte-cli")
|
|
28
64
|
@click.pass_context
|
|
@@ -282,6 +318,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
282
318
|
# Validate compose for sandbox-required benchmarks
|
|
283
319
|
benchmark = session_info.get("benchmark", "").lower()
|
|
284
320
|
SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
|
|
321
|
+
|
|
322
|
+
# NYU CTF requires a shared Docker network for agent/game-server communication
|
|
323
|
+
if benchmark == "nyuctf":
|
|
324
|
+
_ensure_nyuctf_network()
|
|
325
|
+
|
|
285
326
|
if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
|
|
286
327
|
console.print(
|
|
287
328
|
f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
|
|
@@ -333,15 +374,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
333
374
|
host_platform = get_host_docker_platform()
|
|
334
375
|
|
|
335
376
|
if is_linux_arm:
|
|
336
|
-
console.print(
|
|
337
|
-
f"[yellow]![/yellow] Detected [bold]Linux ARM64[/bold] environment"
|
|
338
|
-
)
|
|
377
|
+
console.print("[yellow]![/yellow] Detected [bold]Linux ARM64[/bold] environment")
|
|
339
378
|
console.print(
|
|
340
379
|
f" [dim]Will verify cached images match host architecture ({host_platform})[/dim]"
|
|
341
380
|
)
|
|
342
|
-
console.print(
|
|
343
|
-
f" [dim]Mismatched images will be automatically re-pulled[/dim]"
|
|
344
|
-
)
|
|
381
|
+
console.print(" [dim]Mismatched images will be automatically re-pulled[/dim]")
|
|
345
382
|
console.print()
|
|
346
383
|
|
|
347
384
|
console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
|
|
@@ -377,7 +414,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
377
414
|
f" [dim]Cached image: {image_arch} | Host: {host_arch}[/dim]"
|
|
378
415
|
)
|
|
379
416
|
console.print(
|
|
380
|
-
|
|
417
|
+
" [dim]Removing cached image and re-pulling correct architecture...[/dim]"
|
|
381
418
|
)
|
|
382
419
|
|
|
383
420
|
needed_fix, fix_msg = fix_image_architecture(img)
|
|
@@ -391,7 +428,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
391
428
|
# No ARM variant available - this is an x86-only image
|
|
392
429
|
# Re-pull the amd64 version and warn about QEMU
|
|
393
430
|
console.print(
|
|
394
|
-
|
|
431
|
+
" [dim]No ARM variant available - re-pulling x86 version...[/dim]"
|
|
395
432
|
)
|
|
396
433
|
success = pull_image_with_progress(img)
|
|
397
434
|
if success:
|
|
@@ -402,7 +439,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
402
439
|
pulled_images.append(img)
|
|
403
440
|
continue
|
|
404
441
|
else:
|
|
405
|
-
console.print(
|
|
442
|
+
console.print(
|
|
443
|
+
f" [red]✗[/red] {short_name} [dim](failed to pull)[/dim]"
|
|
444
|
+
)
|
|
406
445
|
failed_images.append(img)
|
|
407
446
|
pull_errors[img] = "failed to pull x86 fallback"
|
|
408
447
|
continue
|
|
@@ -426,12 +465,12 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
426
465
|
|
|
427
466
|
def show_progress(image: str, line: str):
|
|
428
467
|
# Show docker output directly - includes MB progress from PTY
|
|
429
|
-
# Lines look like: "abc123: Downloading 360.9MB/4.075GB"
|
|
468
|
+
# Lines look like: "abc123: Downloading [======> ] 360.9MB/4.075GB"
|
|
430
469
|
if ": " in line:
|
|
431
470
|
parts = line.split(": ", 1)
|
|
432
471
|
if len(parts) == 2:
|
|
433
472
|
layer_id = parts[0][-8:]
|
|
434
|
-
layer_status = parts[1][:
|
|
473
|
+
layer_status = parts[1][:70] # Increased to include size info
|
|
435
474
|
display = f"{layer_id}: {layer_status}"
|
|
436
475
|
if display != last_status[0]:
|
|
437
476
|
last_status[0] = display
|
|
@@ -450,6 +489,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
450
489
|
# On Linux ARM64, verify pulled image architecture
|
|
451
490
|
if is_linux_arm:
|
|
452
491
|
from hte_cli.image_utils import get_image_architecture
|
|
492
|
+
|
|
453
493
|
pulled_arch = get_image_architecture(img)
|
|
454
494
|
|
|
455
495
|
if pulled_arch == "arm64":
|
|
@@ -462,7 +502,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
462
502
|
f" [yellow]![/yellow] {short_name} [dim](downloaded, arch: amd64)[/dim]"
|
|
463
503
|
)
|
|
464
504
|
console.print(
|
|
465
|
-
|
|
505
|
+
" [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
|
|
466
506
|
)
|
|
467
507
|
x86_images_on_arm.append(img)
|
|
468
508
|
else:
|
|
@@ -494,9 +534,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
494
534
|
console.print(
|
|
495
535
|
f"[yellow]⚠ Warning:[/yellow] {len(x86_images_on_arm)} x86 image(s) detected on ARM host"
|
|
496
536
|
)
|
|
497
|
-
console.print(
|
|
498
|
-
" These require QEMU emulation. If container fails to start, run:"
|
|
499
|
-
)
|
|
537
|
+
console.print(" These require QEMU emulation. If container fails to start, run:")
|
|
500
538
|
console.print(
|
|
501
539
|
" [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
|
|
502
540
|
)
|
|
@@ -513,14 +551,21 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
513
551
|
|
|
514
552
|
# Architecture-specific advice
|
|
515
553
|
if is_linux_arm:
|
|
516
|
-
console.print(
|
|
517
|
-
|
|
554
|
+
console.print(
|
|
555
|
+
" 2. You're on Linux ARM64 - try: docker pull <image> --platform linux/arm64"
|
|
556
|
+
)
|
|
557
|
+
console.print(
|
|
558
|
+
" 3. For x86-only images, enable QEMU: docker run --privileged --rm tonistiigi/binfmt --install all"
|
|
559
|
+
)
|
|
518
560
|
else:
|
|
519
561
|
console.print(" 2. Try manual pull: docker pull <image>")
|
|
520
562
|
|
|
521
563
|
console.print(" 4. Check network connectivity")
|
|
522
564
|
console.print()
|
|
523
|
-
console.print(
|
|
565
|
+
console.print(
|
|
566
|
+
"Session remains active - you can retry with: hte-cli session join "
|
|
567
|
+
+ session_id
|
|
568
|
+
)
|
|
524
569
|
sys.exit(1)
|
|
525
570
|
|
|
526
571
|
# Send setup_completed - THIS STARTS THE TIMER ON SERVER
|
|
@@ -574,6 +619,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
574
619
|
eval_log_bytes = result.eval_log_path.read_bytes()
|
|
575
620
|
except KeyboardInterrupt:
|
|
576
621
|
events.docker_stopped(exit_code=130)
|
|
622
|
+
# Try to find and upload any partial eval log before exiting
|
|
623
|
+
eval_log_bytes = _find_eval_log_bytes(runner)
|
|
624
|
+
if eval_log_bytes:
|
|
625
|
+
_upload_partial_log(api, session_id, eval_log_bytes, console)
|
|
577
626
|
console.print()
|
|
578
627
|
console.print(
|
|
579
628
|
"[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
|
|
@@ -581,6 +630,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
581
630
|
sys.exit(0)
|
|
582
631
|
except Exception as e:
|
|
583
632
|
events.docker_stopped(exit_code=1)
|
|
633
|
+
# Try to upload partial log on failure too
|
|
634
|
+
eval_log_bytes = _find_eval_log_bytes(runner)
|
|
635
|
+
if eval_log_bytes:
|
|
636
|
+
_upload_partial_log(api, session_id, eval_log_bytes, console)
|
|
584
637
|
console.print(f"[red]Task execution failed: {e}[/red]")
|
|
585
638
|
sys.exit(1)
|
|
586
639
|
finally:
|
|
@@ -930,7 +983,9 @@ def diagnose_cmd():
|
|
|
930
983
|
console.print(" [red]✗[/red] QEMU x86 emulation NOT working")
|
|
931
984
|
console.print()
|
|
932
985
|
console.print(" [yellow]To enable QEMU emulation, run:[/yellow]")
|
|
933
|
-
console.print(
|
|
986
|
+
console.print(
|
|
987
|
+
" [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
|
|
988
|
+
)
|
|
934
989
|
except subprocess.TimeoutExpired:
|
|
935
990
|
console.print(" [yellow]![/yellow] QEMU test timed out")
|
|
936
991
|
except Exception as e:
|
|
@@ -974,7 +1029,10 @@ def _check_docker() -> tuple[bool, str | None]:
|
|
|
974
1029
|
timeout=10,
|
|
975
1030
|
)
|
|
976
1031
|
if result.returncode != 0:
|
|
977
|
-
return
|
|
1032
|
+
return (
|
|
1033
|
+
False,
|
|
1034
|
+
"Docker is not running. Start Docker (Docker Desktop, colima, or dockerd).",
|
|
1035
|
+
)
|
|
978
1036
|
except FileNotFoundError:
|
|
979
1037
|
return False, "Docker is not installed. Install from https://docs.docker.com/get-docker/"
|
|
980
1038
|
except Exception as e:
|
|
@@ -1023,5 +1081,41 @@ def _check_docker() -> tuple[bool, str | None]:
|
|
|
1023
1081
|
return True, None
|
|
1024
1082
|
|
|
1025
1083
|
|
|
1084
|
+
def _ensure_nyuctf_network() -> None:
|
|
1085
|
+
"""Ensure the ctfnet Docker network exists for NYU CTF challenges.
|
|
1086
|
+
|
|
1087
|
+
NYU CTF tasks use a shared Docker network ('ctfnet') for communication
|
|
1088
|
+
between the agent container and game-server container. This network must
|
|
1089
|
+
exist before docker compose up is called, since it's declared as external.
|
|
1090
|
+
"""
|
|
1091
|
+
import subprocess
|
|
1092
|
+
|
|
1093
|
+
NETWORK_NAME = "ctfnet"
|
|
1094
|
+
|
|
1095
|
+
try:
|
|
1096
|
+
# Check if network exists
|
|
1097
|
+
result = subprocess.run(
|
|
1098
|
+
["docker", "network", "inspect", NETWORK_NAME],
|
|
1099
|
+
capture_output=True,
|
|
1100
|
+
text=True,
|
|
1101
|
+
timeout=10,
|
|
1102
|
+
)
|
|
1103
|
+
if result.returncode == 0:
|
|
1104
|
+
return # Network exists
|
|
1105
|
+
|
|
1106
|
+
# Create the network
|
|
1107
|
+
subprocess.run(
|
|
1108
|
+
["docker", "network", "create", NETWORK_NAME],
|
|
1109
|
+
capture_output=True,
|
|
1110
|
+
text=True,
|
|
1111
|
+
check=True,
|
|
1112
|
+
timeout=10,
|
|
1113
|
+
)
|
|
1114
|
+
except subprocess.CalledProcessError:
|
|
1115
|
+
pass # Network creation failed, will error later with clearer message
|
|
1116
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
1117
|
+
pass # Docker not available, will error later
|
|
1118
|
+
|
|
1119
|
+
|
|
1026
1120
|
if __name__ == "__main__":
|
|
1027
1121
|
cli()
|
|
@@ -124,6 +124,7 @@ def is_running_in_linux_vm_on_arm() -> bool:
|
|
|
124
124
|
True if running Linux on ARM64
|
|
125
125
|
"""
|
|
126
126
|
import sys
|
|
127
|
+
|
|
127
128
|
return sys.platform == "linux" and get_host_architecture() in ("aarch64", "arm64")
|
|
128
129
|
|
|
129
130
|
|
|
@@ -313,10 +314,6 @@ def pull_image_with_progress(
|
|
|
313
314
|
|
|
314
315
|
# Read output from master with timeout
|
|
315
316
|
output_buffer = ""
|
|
316
|
-
# Regex to parse docker progress: "abc123: Downloading [===> ] 10.5MB/50MB"
|
|
317
|
-
progress_pattern = re.compile(
|
|
318
|
-
r"([a-f0-9]+):\s*(Downloading|Extracting|Verifying Checksum|Download complete|Pull complete|Already exists|Waiting)(?:\s+\[.*?\]\s+)?(\d+\.?\d*\s*[kMG]?B)?(?:/(\d+\.?\d*\s*[kMG]?B))?"
|
|
319
|
-
)
|
|
320
317
|
|
|
321
318
|
while True:
|
|
322
319
|
# Check if process is done
|
|
@@ -610,7 +610,9 @@ class TestFixImageArchitecture:
|
|
|
610
610
|
@patch("hte_cli.image_utils.remove_image")
|
|
611
611
|
@patch("hte_cli.image_utils.check_image_architecture_matches_host")
|
|
612
612
|
@patch("hte_cli.image_utils.platform.machine")
|
|
613
|
-
def test_returns_false_when_repull_fails(
|
|
613
|
+
def test_returns_false_when_repull_fails(
|
|
614
|
+
self, mock_machine, mock_check, mock_remove, mock_pull
|
|
615
|
+
):
|
|
614
616
|
"""Returns (False, message) when re-pull fails."""
|
|
615
617
|
mock_machine.return_value = "aarch64"
|
|
616
618
|
mock_check.return_value = (False, "amd64", "aarch64")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|