hte-cli 0.2.28__tar.gz → 0.2.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hte_cli-0.2.28 → hte_cli-0.2.30}/PKG-INFO +1 -1
- {hte_cli-0.2.28 → hte_cli-0.2.30}/pyproject.toml +1 -1
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/cli.py +252 -13
- {hte_cli-0.2.28 → hte_cli-0.2.30}/uv.lock +1 -1
- {hte_cli-0.2.28 → hte_cli-0.2.30}/.gitignore +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/README.md +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/__init__.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/__main__.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/api_client.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/config.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/errors.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/events.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/image_utils.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/runner.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/scorers.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/src/hte_cli/version_check.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/__init__.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/__init__.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/automated_runner.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/conftest.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/e2e_test.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/test_benchmark_flows.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/test_eval_logs.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/test_infrastructure.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/test_runtime_imports.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/test_session_lifecycle.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/e2e/verify_docker_deps.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/unit/__init__.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/unit/conftest.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/unit/test_image_utils.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/unit/test_runner.py +0 -0
- {hte_cli-0.2.28 → hte_cli-0.2.30}/tests/unit/test_scorers.py +0 -0
|
@@ -282,6 +282,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
282
282
|
# Validate compose for sandbox-required benchmarks
|
|
283
283
|
benchmark = session_info.get("benchmark", "").lower()
|
|
284
284
|
SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
|
|
285
|
+
|
|
286
|
+
# NYU CTF requires a shared Docker network for agent/game-server communication
|
|
287
|
+
if benchmark == "nyuctf":
|
|
288
|
+
_ensure_nyuctf_network()
|
|
289
|
+
|
|
285
290
|
if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
|
|
286
291
|
console.print(
|
|
287
292
|
f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
|
|
@@ -347,6 +352,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
347
352
|
console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
|
|
348
353
|
pull_start = time.monotonic()
|
|
349
354
|
pull_errors = {}
|
|
355
|
+
x86_images_on_arm = [] # Track x86 images that need QEMU
|
|
350
356
|
|
|
351
357
|
for img in images:
|
|
352
358
|
short_name = img.split("/")[-1][:40]
|
|
@@ -386,6 +392,25 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
386
392
|
)
|
|
387
393
|
pulled_images.append(img)
|
|
388
394
|
continue
|
|
395
|
+
elif "failed to re-pull" in fix_msg:
|
|
396
|
+
# No ARM variant available - this is an x86-only image
|
|
397
|
+
# Re-pull the amd64 version and warn about QEMU
|
|
398
|
+
console.print(
|
|
399
|
+
f" [dim]No ARM variant available - re-pulling x86 version...[/dim]"
|
|
400
|
+
)
|
|
401
|
+
success = pull_image_with_progress(img)
|
|
402
|
+
if success:
|
|
403
|
+
console.print(
|
|
404
|
+
f" [yellow]![/yellow] {short_name} [dim](x86-only image, needs QEMU)[/dim]"
|
|
405
|
+
)
|
|
406
|
+
x86_images_on_arm.append(img)
|
|
407
|
+
pulled_images.append(img)
|
|
408
|
+
continue
|
|
409
|
+
else:
|
|
410
|
+
console.print(f" [red]✗[/red] {short_name} [dim](failed to pull)[/dim]")
|
|
411
|
+
failed_images.append(img)
|
|
412
|
+
pull_errors[img] = "failed to pull x86 fallback"
|
|
413
|
+
continue
|
|
389
414
|
else:
|
|
390
415
|
console.print(f" [red]✗[/red] {short_name} [dim]({fix_msg})[/dim]")
|
|
391
416
|
failed_images.append(img)
|
|
@@ -396,14 +421,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
396
421
|
last_status = ["connecting..."]
|
|
397
422
|
last_error = [""]
|
|
398
423
|
|
|
399
|
-
#
|
|
400
|
-
#
|
|
424
|
+
# Use platform from compose if specified, otherwise let Docker decide
|
|
425
|
+
# (Docker will prefer native arch for multi-arch images, or pull what's available)
|
|
401
426
|
pull_platform = platform
|
|
402
|
-
if not pull_platform and is_linux_arm and host_platform:
|
|
403
|
-
pull_platform = host_platform
|
|
404
|
-
console.print(
|
|
405
|
-
f" [dim]Pulling {short_name} with platform {host_platform}...[/dim]"
|
|
406
|
-
)
|
|
407
427
|
|
|
408
428
|
with console.status(
|
|
409
429
|
f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
|
|
@@ -432,12 +452,28 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
432
452
|
)
|
|
433
453
|
|
|
434
454
|
if success:
|
|
435
|
-
#
|
|
436
|
-
if is_linux_arm
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
455
|
+
# On Linux ARM64, verify pulled image architecture
|
|
456
|
+
if is_linux_arm:
|
|
457
|
+
from hte_cli.image_utils import get_image_architecture
|
|
458
|
+
pulled_arch = get_image_architecture(img)
|
|
459
|
+
|
|
460
|
+
if pulled_arch == "arm64":
|
|
461
|
+
console.print(
|
|
462
|
+
f" [green]✓[/green] {short_name} [dim](downloaded, arch: arm64)[/dim]"
|
|
463
|
+
)
|
|
464
|
+
elif pulled_arch == "amd64":
|
|
465
|
+
# x86 image on ARM host - needs QEMU emulation
|
|
466
|
+
console.print(
|
|
467
|
+
f" [yellow]![/yellow] {short_name} [dim](downloaded, arch: amd64)[/dim]"
|
|
468
|
+
)
|
|
469
|
+
console.print(
|
|
470
|
+
f" [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
|
|
471
|
+
)
|
|
472
|
+
x86_images_on_arm.append(img)
|
|
473
|
+
else:
|
|
474
|
+
console.print(
|
|
475
|
+
f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]"
|
|
476
|
+
)
|
|
441
477
|
else:
|
|
442
478
|
console.print(f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]")
|
|
443
479
|
pulled_images.append(img)
|
|
@@ -458,6 +494,19 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
458
494
|
)
|
|
459
495
|
console.print()
|
|
460
496
|
|
|
497
|
+
# Warn about x86 images on ARM that need QEMU
|
|
498
|
+
if x86_images_on_arm:
|
|
499
|
+
console.print(
|
|
500
|
+
f"[yellow]⚠ Warning:[/yellow] {len(x86_images_on_arm)} x86 image(s) detected on ARM host"
|
|
501
|
+
)
|
|
502
|
+
console.print(
|
|
503
|
+
" These require QEMU emulation. If container fails to start, run:"
|
|
504
|
+
)
|
|
505
|
+
console.print(
|
|
506
|
+
" [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
|
|
507
|
+
)
|
|
508
|
+
console.print()
|
|
509
|
+
|
|
461
510
|
# Fail fast if any required image couldn't be pulled
|
|
462
511
|
if failed_images:
|
|
463
512
|
console.print(
|
|
@@ -750,6 +799,160 @@ def tasks_pull_images(ctx, count: int):
|
|
|
750
799
|
console.print("[yellow]Image pulling not yet implemented.[/yellow]")
|
|
751
800
|
|
|
752
801
|
|
|
802
|
+
@cli.command("diagnose")
|
|
803
|
+
def diagnose_cmd():
|
|
804
|
+
"""
|
|
805
|
+
Diagnose Docker and architecture setup.
|
|
806
|
+
|
|
807
|
+
Checks Docker installation, architecture detection, and image compatibility.
|
|
808
|
+
Useful for troubleshooting before running tasks.
|
|
809
|
+
"""
|
|
810
|
+
import subprocess
|
|
811
|
+
import sys as system_module
|
|
812
|
+
from hte_cli.image_utils import (
|
|
813
|
+
get_host_architecture,
|
|
814
|
+
get_host_docker_platform,
|
|
815
|
+
is_running_in_linux_vm_on_arm,
|
|
816
|
+
get_image_architecture,
|
|
817
|
+
check_image_exists_locally,
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
console.print("[bold]HTE-CLI Diagnostics[/bold]")
|
|
821
|
+
console.print("=" * 50)
|
|
822
|
+
console.print()
|
|
823
|
+
|
|
824
|
+
# CLI version
|
|
825
|
+
console.print(f"[bold]CLI Version:[/bold] {__version__}")
|
|
826
|
+
console.print()
|
|
827
|
+
|
|
828
|
+
# Platform info
|
|
829
|
+
console.print("[bold]Platform:[/bold]")
|
|
830
|
+
host_arch = get_host_architecture()
|
|
831
|
+
host_platform = get_host_docker_platform()
|
|
832
|
+
is_linux_arm = is_running_in_linux_vm_on_arm()
|
|
833
|
+
|
|
834
|
+
console.print(f" OS: {system_module.platform}")
|
|
835
|
+
console.print(f" Architecture: {host_arch}")
|
|
836
|
+
console.print(f" Docker platform: {host_platform or 'unknown'}")
|
|
837
|
+
|
|
838
|
+
if is_linux_arm:
|
|
839
|
+
console.print()
|
|
840
|
+
console.print("[yellow]⚠ Linux ARM64 detected![/yellow]")
|
|
841
|
+
console.print(" This environment may have architecture compatibility issues.")
|
|
842
|
+
console.print(" The CLI will automatically handle multi-arch images.")
|
|
843
|
+
console.print(" For x86-only images (CTF challenges), QEMU emulation is required.")
|
|
844
|
+
console.print()
|
|
845
|
+
|
|
846
|
+
# Docker checks
|
|
847
|
+
console.print("[bold]Docker:[/bold]")
|
|
848
|
+
|
|
849
|
+
# Check Docker installed
|
|
850
|
+
try:
|
|
851
|
+
result = subprocess.run(
|
|
852
|
+
["docker", "--version"],
|
|
853
|
+
capture_output=True,
|
|
854
|
+
text=True,
|
|
855
|
+
timeout=10,
|
|
856
|
+
)
|
|
857
|
+
if result.returncode == 0:
|
|
858
|
+
console.print(f" [green]✓[/green] Docker: {result.stdout.strip()}")
|
|
859
|
+
else:
|
|
860
|
+
console.print(" [red]✗[/red] Docker not working")
|
|
861
|
+
except Exception as e:
|
|
862
|
+
console.print(f" [red]✗[/red] Docker not found: {e}")
|
|
863
|
+
|
|
864
|
+
# Check Docker daemon
|
|
865
|
+
try:
|
|
866
|
+
result = subprocess.run(
|
|
867
|
+
["docker", "info"],
|
|
868
|
+
capture_output=True,
|
|
869
|
+
text=True,
|
|
870
|
+
timeout=10,
|
|
871
|
+
)
|
|
872
|
+
if result.returncode == 0:
|
|
873
|
+
console.print(" [green]✓[/green] Docker daemon running")
|
|
874
|
+
else:
|
|
875
|
+
console.print(" [red]✗[/red] Docker daemon not running")
|
|
876
|
+
except Exception:
|
|
877
|
+
console.print(" [red]✗[/red] Cannot connect to Docker daemon")
|
|
878
|
+
|
|
879
|
+
# Check Compose version
|
|
880
|
+
try:
|
|
881
|
+
result = subprocess.run(
|
|
882
|
+
["docker", "compose", "version", "--short"],
|
|
883
|
+
capture_output=True,
|
|
884
|
+
text=True,
|
|
885
|
+
timeout=10,
|
|
886
|
+
)
|
|
887
|
+
if result.returncode == 0:
|
|
888
|
+
version = result.stdout.strip()
|
|
889
|
+
console.print(f" [green]✓[/green] Docker Compose: {version}")
|
|
890
|
+
else:
|
|
891
|
+
console.print(" [yellow]![/yellow] Docker Compose not available")
|
|
892
|
+
except Exception:
|
|
893
|
+
console.print(" [yellow]![/yellow] Docker Compose not found")
|
|
894
|
+
|
|
895
|
+
console.print()
|
|
896
|
+
|
|
897
|
+
# Test images
|
|
898
|
+
console.print("[bold]Image Architecture Test:[/bold]")
|
|
899
|
+
test_images = [
|
|
900
|
+
("python:3.12-slim", "multi-arch (nl2bash/cybashbench)"),
|
|
901
|
+
("cybench/cybench:latest", "x86-only (cybench/cybergym)"),
|
|
902
|
+
]
|
|
903
|
+
|
|
904
|
+
for image, description in test_images:
|
|
905
|
+
if check_image_exists_locally(image):
|
|
906
|
+
arch = get_image_architecture(image)
|
|
907
|
+
if arch:
|
|
908
|
+
if is_linux_arm and arch == "amd64":
|
|
909
|
+
console.print(
|
|
910
|
+
f" [yellow]![/yellow] {image}: {arch} [dim](x86 on ARM - needs QEMU)[/dim]"
|
|
911
|
+
)
|
|
912
|
+
else:
|
|
913
|
+
console.print(f" [green]✓[/green] {image}: {arch}")
|
|
914
|
+
else:
|
|
915
|
+
console.print(f" [dim]?[/dim] {image}: cached (unknown arch)")
|
|
916
|
+
else:
|
|
917
|
+
console.print(f" [dim]-[/dim] {image}: not cached - {description}")
|
|
918
|
+
|
|
919
|
+
console.print()
|
|
920
|
+
|
|
921
|
+
# QEMU check (for Linux ARM64)
|
|
922
|
+
if is_linux_arm:
|
|
923
|
+
console.print("[bold]QEMU Emulation:[/bold]")
|
|
924
|
+
try:
|
|
925
|
+
# Check if binfmt is set up for x86
|
|
926
|
+
result = subprocess.run(
|
|
927
|
+
["docker", "run", "--rm", "--platform", "linux/amd64", "alpine", "uname", "-m"],
|
|
928
|
+
capture_output=True,
|
|
929
|
+
text=True,
|
|
930
|
+
timeout=30,
|
|
931
|
+
)
|
|
932
|
+
if result.returncode == 0 and "x86_64" in result.stdout:
|
|
933
|
+
console.print(" [green]✓[/green] QEMU x86 emulation working")
|
|
934
|
+
else:
|
|
935
|
+
console.print(" [red]✗[/red] QEMU x86 emulation NOT working")
|
|
936
|
+
console.print()
|
|
937
|
+
console.print(" [yellow]To enable QEMU emulation, run:[/yellow]")
|
|
938
|
+
console.print(" [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]")
|
|
939
|
+
except subprocess.TimeoutExpired:
|
|
940
|
+
console.print(" [yellow]![/yellow] QEMU test timed out")
|
|
941
|
+
except Exception as e:
|
|
942
|
+
console.print(f" [red]✗[/red] QEMU test failed: {e}")
|
|
943
|
+
console.print()
|
|
944
|
+
|
|
945
|
+
console.print("[bold]Recommendation:[/bold]")
|
|
946
|
+
if is_linux_arm:
|
|
947
|
+
console.print(" For nl2bash/cybashbench: Should work with native ARM images")
|
|
948
|
+
console.print(" For CTF challenges (cybench, nyuctf, etc.): Requires QEMU emulation")
|
|
949
|
+
else:
|
|
950
|
+
console.print(" [green]✓[/green] Standard platform - all benchmarks should work")
|
|
951
|
+
|
|
952
|
+
console.print()
|
|
953
|
+
console.print("[dim]Run 'hte-cli session join <id>' to test with a real task[/dim]")
|
|
954
|
+
|
|
955
|
+
|
|
753
956
|
# =============================================================================
|
|
754
957
|
# Helper Functions
|
|
755
958
|
# =============================================================================
|
|
@@ -825,5 +1028,41 @@ def _check_docker() -> tuple[bool, str | None]:
|
|
|
825
1028
|
return True, None
|
|
826
1029
|
|
|
827
1030
|
|
|
1031
|
+
def _ensure_nyuctf_network() -> None:
|
|
1032
|
+
"""Ensure the ctfnet Docker network exists for NYU CTF challenges.
|
|
1033
|
+
|
|
1034
|
+
NYU CTF tasks use a shared Docker network ('ctfnet') for communication
|
|
1035
|
+
between the agent container and game-server container. This network must
|
|
1036
|
+
exist before docker compose up is called, since it's declared as external.
|
|
1037
|
+
"""
|
|
1038
|
+
import subprocess
|
|
1039
|
+
|
|
1040
|
+
NETWORK_NAME = "ctfnet"
|
|
1041
|
+
|
|
1042
|
+
try:
|
|
1043
|
+
# Check if network exists
|
|
1044
|
+
result = subprocess.run(
|
|
1045
|
+
["docker", "network", "inspect", NETWORK_NAME],
|
|
1046
|
+
capture_output=True,
|
|
1047
|
+
text=True,
|
|
1048
|
+
timeout=10,
|
|
1049
|
+
)
|
|
1050
|
+
if result.returncode == 0:
|
|
1051
|
+
return # Network exists
|
|
1052
|
+
|
|
1053
|
+
# Create the network
|
|
1054
|
+
subprocess.run(
|
|
1055
|
+
["docker", "network", "create", NETWORK_NAME],
|
|
1056
|
+
capture_output=True,
|
|
1057
|
+
text=True,
|
|
1058
|
+
check=True,
|
|
1059
|
+
timeout=10,
|
|
1060
|
+
)
|
|
1061
|
+
except subprocess.CalledProcessError:
|
|
1062
|
+
pass # Network creation failed, will error later with clearer message
|
|
1063
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
1064
|
+
pass # Docker not available, will error later
|
|
1065
|
+
|
|
1066
|
+
|
|
828
1067
|
if __name__ == "__main__":
|
|
829
1068
|
cli()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|