hte-cli 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/cli.py CHANGED
@@ -282,6 +282,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
282
282
  # Validate compose for sandbox-required benchmarks
283
283
  benchmark = session_info.get("benchmark", "").lower()
284
284
  SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
285
+
286
+ # NYU CTF requires a shared Docker network for agent/game-server communication
287
+ if benchmark == "nyuctf":
288
+ _ensure_nyuctf_network()
289
+
285
290
  if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
286
291
  console.print(
287
292
  f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
@@ -347,6 +352,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
347
352
  console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
348
353
  pull_start = time.monotonic()
349
354
  pull_errors = {}
355
+ x86_images_on_arm = [] # Track x86 images that need QEMU
350
356
 
351
357
  for img in images:
352
358
  short_name = img.split("/")[-1][:40]
@@ -386,6 +392,25 @@ def session_join(ctx, session_id: str, force_setup: bool):
386
392
  )
387
393
  pulled_images.append(img)
388
394
  continue
395
+ elif "failed to re-pull" in fix_msg:
396
+ # No ARM variant available - this is an x86-only image
397
+ # Re-pull the amd64 version and warn about QEMU
398
+ console.print(
399
+ f" [dim]No ARM variant available - re-pulling x86 version...[/dim]"
400
+ )
401
+ success = pull_image_with_progress(img)
402
+ if success:
403
+ console.print(
404
+ f" [yellow]![/yellow] {short_name} [dim](x86-only image, needs QEMU)[/dim]"
405
+ )
406
+ x86_images_on_arm.append(img)
407
+ pulled_images.append(img)
408
+ continue
409
+ else:
410
+ console.print(f" [red]✗[/red] {short_name} [dim](failed to pull)[/dim]")
411
+ failed_images.append(img)
412
+ pull_errors[img] = "failed to pull x86 fallback"
413
+ continue
389
414
  else:
390
415
  console.print(f" [red]✗[/red] {short_name} [dim]({fix_msg})[/dim]")
391
416
  failed_images.append(img)
@@ -396,14 +421,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
396
421
  last_status = ["connecting..."]
397
422
  last_error = [""]
398
423
 
399
- # On Linux ARM64, use host platform if no explicit platform in compose
400
- # This prevents pulling amd64 images that won't run
424
+ # Use platform from compose if specified, otherwise let Docker decide
425
+ # (Docker will prefer native arch for multi-arch images, or pull what's available)
401
426
  pull_platform = platform
402
- if not pull_platform and is_linux_arm and host_platform:
403
- pull_platform = host_platform
404
- console.print(
405
- f" [dim]Pulling {short_name} with platform {host_platform}...[/dim]"
406
- )
407
427
 
408
428
  with console.status(
409
429
  f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
@@ -432,12 +452,28 @@ def session_join(ctx, session_id: str, force_setup: bool):
432
452
  )
433
453
 
434
454
  if success:
435
- # Show platform info on Linux ARM64 for confirmation
436
- if is_linux_arm and pull_platform:
437
- arch_short = pull_platform.split("/")[-1] # e.g., "arm64"
438
- console.print(
439
- f" [green]✓[/green] {short_name} [dim](downloaded, arch: {arch_short})[/dim]"
440
- )
455
+ # On Linux ARM64, verify pulled image architecture
456
+ if is_linux_arm:
457
+ from hte_cli.image_utils import get_image_architecture
458
+ pulled_arch = get_image_architecture(img)
459
+
460
+ if pulled_arch == "arm64":
461
+ console.print(
462
+ f" [green]✓[/green] {short_name} [dim](downloaded, arch: arm64)[/dim]"
463
+ )
464
+ elif pulled_arch == "amd64":
465
+ # x86 image on ARM host - needs QEMU emulation
466
+ console.print(
467
+ f" [yellow]![/yellow] {short_name} [dim](downloaded, arch: amd64)[/dim]"
468
+ )
469
+ console.print(
470
+ f" [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
471
+ )
472
+ x86_images_on_arm.append(img)
473
+ else:
474
+ console.print(
475
+ f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]"
476
+ )
441
477
  else:
442
478
  console.print(f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]")
443
479
  pulled_images.append(img)
@@ -458,6 +494,19 @@ def session_join(ctx, session_id: str, force_setup: bool):
458
494
  )
459
495
  console.print()
460
496
 
497
+ # Warn about x86 images on ARM that need QEMU
498
+ if x86_images_on_arm:
499
+ console.print(
500
+ f"[yellow]⚠ Warning:[/yellow] {len(x86_images_on_arm)} x86 image(s) detected on ARM host"
501
+ )
502
+ console.print(
503
+ " These require QEMU emulation. If container fails to start, run:"
504
+ )
505
+ console.print(
506
+ " [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
507
+ )
508
+ console.print()
509
+
461
510
  # Fail fast if any required image couldn't be pulled
462
511
  if failed_images:
463
512
  console.print(
@@ -750,6 +799,160 @@ def tasks_pull_images(ctx, count: int):
750
799
  console.print("[yellow]Image pulling not yet implemented.[/yellow]")
751
800
 
752
801
 
802
+ @cli.command("diagnose")
803
+ def diagnose_cmd():
804
+ """
805
+ Diagnose Docker and architecture setup.
806
+
807
+ Checks Docker installation, architecture detection, and image compatibility.
808
+ Useful for troubleshooting before running tasks.
809
+ """
810
+ import subprocess
811
+ import sys as system_module
812
+ from hte_cli.image_utils import (
813
+ get_host_architecture,
814
+ get_host_docker_platform,
815
+ is_running_in_linux_vm_on_arm,
816
+ get_image_architecture,
817
+ check_image_exists_locally,
818
+ )
819
+
820
+ console.print("[bold]HTE-CLI Diagnostics[/bold]")
821
+ console.print("=" * 50)
822
+ console.print()
823
+
824
+ # CLI version
825
+ console.print(f"[bold]CLI Version:[/bold] {__version__}")
826
+ console.print()
827
+
828
+ # Platform info
829
+ console.print("[bold]Platform:[/bold]")
830
+ host_arch = get_host_architecture()
831
+ host_platform = get_host_docker_platform()
832
+ is_linux_arm = is_running_in_linux_vm_on_arm()
833
+
834
+ console.print(f" OS: {system_module.platform}")
835
+ console.print(f" Architecture: {host_arch}")
836
+ console.print(f" Docker platform: {host_platform or 'unknown'}")
837
+
838
+ if is_linux_arm:
839
+ console.print()
840
+ console.print("[yellow]⚠ Linux ARM64 detected![/yellow]")
841
+ console.print(" This environment may have architecture compatibility issues.")
842
+ console.print(" The CLI will automatically handle multi-arch images.")
843
+ console.print(" For x86-only images (CTF challenges), QEMU emulation is required.")
844
+ console.print()
845
+
846
+ # Docker checks
847
+ console.print("[bold]Docker:[/bold]")
848
+
849
+ # Check Docker installed
850
+ try:
851
+ result = subprocess.run(
852
+ ["docker", "--version"],
853
+ capture_output=True,
854
+ text=True,
855
+ timeout=10,
856
+ )
857
+ if result.returncode == 0:
858
+ console.print(f" [green]✓[/green] Docker: {result.stdout.strip()}")
859
+ else:
860
+ console.print(" [red]✗[/red] Docker not working")
861
+ except Exception as e:
862
+ console.print(f" [red]✗[/red] Docker not found: {e}")
863
+
864
+ # Check Docker daemon
865
+ try:
866
+ result = subprocess.run(
867
+ ["docker", "info"],
868
+ capture_output=True,
869
+ text=True,
870
+ timeout=10,
871
+ )
872
+ if result.returncode == 0:
873
+ console.print(" [green]✓[/green] Docker daemon running")
874
+ else:
875
+ console.print(" [red]✗[/red] Docker daemon not running")
876
+ except Exception:
877
+ console.print(" [red]✗[/red] Cannot connect to Docker daemon")
878
+
879
+ # Check Compose version
880
+ try:
881
+ result = subprocess.run(
882
+ ["docker", "compose", "version", "--short"],
883
+ capture_output=True,
884
+ text=True,
885
+ timeout=10,
886
+ )
887
+ if result.returncode == 0:
888
+ version = result.stdout.strip()
889
+ console.print(f" [green]✓[/green] Docker Compose: {version}")
890
+ else:
891
+ console.print(" [yellow]![/yellow] Docker Compose not available")
892
+ except Exception:
893
+ console.print(" [yellow]![/yellow] Docker Compose not found")
894
+
895
+ console.print()
896
+
897
+ # Test images
898
+ console.print("[bold]Image Architecture Test:[/bold]")
899
+ test_images = [
900
+ ("python:3.12-slim", "multi-arch (nl2bash/cybashbench)"),
901
+ ("cybench/cybench:latest", "x86-only (cybench/cybergym)"),
902
+ ]
903
+
904
+ for image, description in test_images:
905
+ if check_image_exists_locally(image):
906
+ arch = get_image_architecture(image)
907
+ if arch:
908
+ if is_linux_arm and arch == "amd64":
909
+ console.print(
910
+ f" [yellow]![/yellow] {image}: {arch} [dim](x86 on ARM - needs QEMU)[/dim]"
911
+ )
912
+ else:
913
+ console.print(f" [green]✓[/green] {image}: {arch}")
914
+ else:
915
+ console.print(f" [dim]?[/dim] {image}: cached (unknown arch)")
916
+ else:
917
+ console.print(f" [dim]-[/dim] {image}: not cached - {description}")
918
+
919
+ console.print()
920
+
921
+ # QEMU check (for Linux ARM64)
922
+ if is_linux_arm:
923
+ console.print("[bold]QEMU Emulation:[/bold]")
924
+ try:
925
+ # Check if binfmt is set up for x86
926
+ result = subprocess.run(
927
+ ["docker", "run", "--rm", "--platform", "linux/amd64", "alpine", "uname", "-m"],
928
+ capture_output=True,
929
+ text=True,
930
+ timeout=30,
931
+ )
932
+ if result.returncode == 0 and "x86_64" in result.stdout:
933
+ console.print(" [green]✓[/green] QEMU x86 emulation working")
934
+ else:
935
+ console.print(" [red]✗[/red] QEMU x86 emulation NOT working")
936
+ console.print()
937
+ console.print(" [yellow]To enable QEMU emulation, run:[/yellow]")
938
+ console.print(" [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]")
939
+ except subprocess.TimeoutExpired:
940
+ console.print(" [yellow]![/yellow] QEMU test timed out")
941
+ except Exception as e:
942
+ console.print(f" [red]✗[/red] QEMU test failed: {e}")
943
+ console.print()
944
+
945
+ console.print("[bold]Recommendation:[/bold]")
946
+ if is_linux_arm:
947
+ console.print(" For nl2bash/cybashbench: Should work with native ARM images")
948
+ console.print(" For CTF challenges (cybench, nyuctf, etc.): Requires QEMU emulation")
949
+ else:
950
+ console.print(" [green]✓[/green] Standard platform - all benchmarks should work")
951
+
952
+ console.print()
953
+ console.print("[dim]Run 'hte-cli session join <id>' to test with a real task[/dim]")
954
+
955
+
753
956
  # =============================================================================
754
957
  # Helper Functions
755
958
  # =============================================================================
@@ -825,5 +1028,41 @@ def _check_docker() -> tuple[bool, str | None]:
825
1028
  return True, None
826
1029
 
827
1030
 
1031
+ def _ensure_nyuctf_network() -> None:
1032
+ """Ensure the ctfnet Docker network exists for NYU CTF challenges.
1033
+
1034
+ NYU CTF tasks use a shared Docker network ('ctfnet') for communication
1035
+ between the agent container and game-server container. This network must
1036
+ exist before docker compose up is called, since it's declared as external.
1037
+ """
1038
+ import subprocess
1039
+
1040
+ NETWORK_NAME = "ctfnet"
1041
+
1042
+ try:
1043
+ # Check if network exists
1044
+ result = subprocess.run(
1045
+ ["docker", "network", "inspect", NETWORK_NAME],
1046
+ capture_output=True,
1047
+ text=True,
1048
+ timeout=10,
1049
+ )
1050
+ if result.returncode == 0:
1051
+ return # Network exists
1052
+
1053
+ # Create the network
1054
+ subprocess.run(
1055
+ ["docker", "network", "create", NETWORK_NAME],
1056
+ capture_output=True,
1057
+ text=True,
1058
+ check=True,
1059
+ timeout=10,
1060
+ )
1061
+ except subprocess.CalledProcessError:
1062
+ pass # Network creation failed, will error later with clearer message
1063
+ except (subprocess.TimeoutExpired, FileNotFoundError):
1064
+ pass # Docker not available, will error later
1065
+
1066
+
828
1067
  if __name__ == "__main__":
829
1068
  cli()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.28
3
+ Version: 0.2.30
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,7 +1,7 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
3
  hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
- hte_cli/cli.py,sha256=EgL9nlQ2R0TSp8qUtPe5YwTN3KlrNCQ8tRQnUhnFrP4,30647
4
+ hte_cli/cli.py,sha256=W8MUjc10ouzqiZOdoJLixWQRBGV2ED9m4Vorc96gbto,39989
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
7
  hte_cli/events.py,sha256=oDKCS-a0IZ7bz7xkwQj5eM4DoDCYvnclAGohrMTWf8s,5644
@@ -9,7 +9,7 @@ hte_cli/image_utils.py,sha256=n4AmbaR9tbH0ahXbTOn7Rr_VeRbhg1RgWknsWwI_83c,13249
9
9
  hte_cli/runner.py,sha256=SWl9FF4X3e9eBbZyL0ujhmmSL5OK8J6st-Ty0jD5AWM,14550
10
10
  hte_cli/scorers.py,sha256=B0ZjQ3Fh-VDkc_8CDc86yW7vpdimbV3RSqs7l-VeUIg,6629
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.2.28.dist-info/METADATA,sha256=cBb8EnxzFdZrsodJ3icqvprVywpeYJDb1__AI6Lm99A,3820
13
- hte_cli-0.2.28.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.2.28.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.2.28.dist-info/RECORD,,
12
+ hte_cli-0.2.30.dist-info/METADATA,sha256=GGAZFAGh4SeQTC6e8Ey4lmL1qN624ajAuKQ_GICQq7A,3820
13
+ hte_cli-0.2.30.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.30.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.30.dist-info/RECORD,,