PyPI - hte-cli - Versions diffs - 0.2.29__tar.gz → 0.2.31__tar.gz - Mend

hte-cli 0.2.29tar.gz → 0.2.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{hte_cli-0.2.29 → hte_cli-0.2.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hte-cli
-Version: 0.2.29
+Version: 0.2.31
 Summary: Human Time-to-Completion Evaluation CLI
 Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
 Author: Lyptus Research

{hte_cli-0.2.29 → hte_cli-0.2.31}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hte-cli"
-version = "0.2.29"
+version = "0.2.31"
 description = "Human Time-to-Completion Evaluation CLI"
 readme = "README.md"
 requires-python = ">=3.11"

{hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/api_client.py RENAMED Viewed

@@ -265,3 +265,27 @@ class APIClient:
             json=payload,
             timeout=UPLOAD_TIMEOUT,
         )
+    def upload_partial_log(
+        self,
+        session_id: str,
+        eval_log_bytes: bytes,
+    ) -> dict:
+        """Upload partial eval log for interrupted sessions.
+        Args:
+            session_id: The session ID
+            eval_log_bytes: Partial eval log content
+        Returns:
+            Response dict with status and log_path
+        """
+        payload = {
+            "eval_log_base64": base64.b64encode(eval_log_bytes).decode("ascii"),
+        }
+        return self.post(
+            f"/sessions/{session_id}/partial-log",
+            json=payload,
+            timeout=UPLOAD_TIMEOUT,
+        )

{hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/cli.py RENAMED Viewed

@@ -23,6 +23,42 @@ console = Console()
 SUPPORT_EMAIL = "jacktpayne51@gmail.com"
+def _find_eval_log_bytes(runner) -> bytes | None:
+    """Find and read eval log bytes from runner's work directory.
+    Used for interrupted sessions to upload partial logs.
+    """
+    try:
+        # Look for eval logs in the work directory
+        if not runner.work_dir.exists():
+            return None
+        # Find any .eval files in the work directory tree
+        eval_files = list(runner.work_dir.rglob("*.eval"))
+        if not eval_files:
+            return None
+        # Get the most recent one
+        eval_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+        return eval_files[0].read_bytes()
+    except Exception:
+        return None
+def _upload_partial_log(api: APIClient, session_id: str, eval_log_bytes: bytes, console: Console) -> None:
+    """Upload partial eval log for interrupted session.
+    Best-effort: silently handles failures to not block exit.
+    """
+    try:
+        size_kb = len(eval_log_bytes) / 1024
+        console.print(f"[dim]Uploading partial eval log ({size_kb:.0f} KB)...[/dim]")
+        api.upload_partial_log(session_id, eval_log_bytes)
+        console.print("[dim]Partial eval log uploaded.[/dim]")
+    except Exception as e:
+        console.print(f"[dim]Could not upload partial log: {e}[/dim]")
 @click.group()
 @click.version_option(__version__, prog_name="hte-cli")
 @click.pass_context
@@ -282,6 +318,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
     # Validate compose for sandbox-required benchmarks
     benchmark = session_info.get("benchmark", "").lower()
     SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
+    # NYU CTF requires a shared Docker network for agent/game-server communication
+    if benchmark == "nyuctf":
+        _ensure_nyuctf_network()
     if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
         console.print(
             f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
@@ -333,15 +374,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
             host_platform = get_host_docker_platform()
             if is_linux_arm:
-                console.print(
-                    f"[yellow]![/yellow] Detected [bold]Linux ARM64[/bold] environment"
-                )
+                console.print("[yellow]![/yellow] Detected [bold]Linux ARM64[/bold] environment")
                 console.print(
                     f"  [dim]Will verify cached images match host architecture ({host_platform})[/dim]"
                 )
-                console.print(
-                    f"  [dim]Mismatched images will be automatically re-pulled[/dim]"
-                )
+                console.print("  [dim]Mismatched images will be automatically re-pulled[/dim]")
                 console.print()
             console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
@@ -377,7 +414,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
                             f"      [dim]Cached image: {image_arch} | Host: {host_arch}[/dim]"
                         )
                         console.print(
-                            f"      [dim]Removing cached image and re-pulling correct architecture...[/dim]"
+                            "      [dim]Removing cached image and re-pulling correct architecture...[/dim]"
                         )
                         needed_fix, fix_msg = fix_image_architecture(img)
@@ -391,7 +428,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
                             # No ARM variant available - this is an x86-only image
                             # Re-pull the amd64 version and warn about QEMU
                             console.print(
-                                f"      [dim]No ARM variant available - re-pulling x86 version...[/dim]"
+                                "      [dim]No ARM variant available - re-pulling x86 version...[/dim]"
                             )
                             success = pull_image_with_progress(img)
                             if success:
@@ -402,7 +439,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
                                 pulled_images.append(img)
                                 continue
                             else:
-                                console.print(f"  [red]✗[/red] {short_name} [dim](failed to pull)[/dim]")
+                                console.print(
+                                    f"  [red]✗[/red] {short_name} [dim](failed to pull)[/dim]"
+                                )
                                 failed_images.append(img)
                                 pull_errors[img] = "failed to pull x86 fallback"
                                 continue
@@ -426,12 +465,12 @@ def session_join(ctx, session_id: str, force_setup: bool):
                     def show_progress(image: str, line: str):
                         # Show docker output directly - includes MB progress from PTY
-                        # Lines look like: "abc123: Downloading  360.9MB/4.075GB"
+                        # Lines look like: "abc123: Downloading [======>    ]  360.9MB/4.075GB"
                         if ": " in line:
                             parts = line.split(": ", 1)
                             if len(parts) == 2:
                                 layer_id = parts[0][-8:]
-                                layer_status = parts[1][:45]
+                                layer_status = parts[1][:70]  # Increased to include size info
                                 display = f"{layer_id}: {layer_status}"
                                 if display != last_status[0]:
                                     last_status[0] = display
@@ -450,6 +489,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
                     # On Linux ARM64, verify pulled image architecture
                     if is_linux_arm:
                         from hte_cli.image_utils import get_image_architecture
                         pulled_arch = get_image_architecture(img)
                         if pulled_arch == "arm64":
@@ -462,7 +502,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
                                 f"  [yellow]![/yellow] {short_name} [dim](downloaded, arch: amd64)[/dim]"
                             )
                             console.print(
-                                f"      [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
+                                "      [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
                             )
                             x86_images_on_arm.append(img)
                         else:
@@ -494,9 +534,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
                 console.print(
                     f"[yellow]⚠ Warning:[/yellow] {len(x86_images_on_arm)} x86 image(s) detected on ARM host"
                 )
-                console.print(
-                    "  These require QEMU emulation. If container fails to start, run:"
-                )
+                console.print("  These require QEMU emulation. If container fails to start, run:")
                 console.print(
                     "  [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
                 )
@@ -513,14 +551,21 @@ def session_join(ctx, session_id: str, force_setup: bool):
                 # Architecture-specific advice
                 if is_linux_arm:
-                    console.print(f"  2. You're on Linux ARM64 - try: docker pull <image> --platform linux/arm64")
-                    console.print("  3. For x86-only images, enable QEMU: docker run --privileged --rm tonistiigi/binfmt --install all")
+                    console.print(
+                        "  2. You're on Linux ARM64 - try: docker pull <image> --platform linux/arm64"
+                    )
+                    console.print(
+                        "  3. For x86-only images, enable QEMU: docker run --privileged --rm tonistiigi/binfmt --install all"
+                    )
                 else:
                     console.print("  2. Try manual pull: docker pull <image>")
                 console.print("  4. Check network connectivity")
                 console.print()
-                console.print("Session remains active - you can retry with: hte-cli session join " + session_id)
+                console.print(
+                    "Session remains active - you can retry with: hte-cli session join "
+                    + session_id
+                )
                 sys.exit(1)
         # Send setup_completed - THIS STARTS THE TIMER ON SERVER
@@ -574,6 +619,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
             eval_log_bytes = result.eval_log_path.read_bytes()
     except KeyboardInterrupt:
         events.docker_stopped(exit_code=130)
+        # Try to find and upload any partial eval log before exiting
+        eval_log_bytes = _find_eval_log_bytes(runner)
+        if eval_log_bytes:
+            _upload_partial_log(api, session_id, eval_log_bytes, console)
         console.print()
         console.print(
             "[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
@@ -581,6 +630,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
         sys.exit(0)
     except Exception as e:
         events.docker_stopped(exit_code=1)
+        # Try to upload partial log on failure too
+        eval_log_bytes = _find_eval_log_bytes(runner)
+        if eval_log_bytes:
+            _upload_partial_log(api, session_id, eval_log_bytes, console)
         console.print(f"[red]Task execution failed: {e}[/red]")
         sys.exit(1)
     finally:
@@ -930,7 +983,9 @@ def diagnose_cmd():
                 console.print("  [red]✗[/red] QEMU x86 emulation NOT working")
                 console.print()
                 console.print("  [yellow]To enable QEMU emulation, run:[/yellow]")
-                console.print("  [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]")
+                console.print(
+                    "  [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
+                )
         except subprocess.TimeoutExpired:
             console.print("  [yellow]![/yellow] QEMU test timed out")
         except Exception as e:
@@ -974,7 +1029,10 @@ def _check_docker() -> tuple[bool, str | None]:
             timeout=10,
         )
         if result.returncode != 0:
-            return False, "Docker is not running. Start Docker (Docker Desktop, colima, or dockerd)."
+            return (
+                False,
+                "Docker is not running. Start Docker (Docker Desktop, colima, or dockerd).",
+            )
     except FileNotFoundError:
         return False, "Docker is not installed. Install from https://docs.docker.com/get-docker/"
     except Exception as e:
@@ -1023,5 +1081,41 @@ def _check_docker() -> tuple[bool, str | None]:
     return True, None
+def _ensure_nyuctf_network() -> None:
+    """Ensure the ctfnet Docker network exists for NYU CTF challenges.
+    NYU CTF tasks use a shared Docker network ('ctfnet') for communication
+    between the agent container and game-server container. This network must
+    exist before docker compose up is called, since it's declared as external.
+    """
+    import subprocess
+    NETWORK_NAME = "ctfnet"
+    try:
+        # Check if network exists
+        result = subprocess.run(
+            ["docker", "network", "inspect", NETWORK_NAME],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+        if result.returncode == 0:
+            return  # Network exists
+        # Create the network
+        subprocess.run(
+            ["docker", "network", "create", NETWORK_NAME],
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=10,
+        )
+    except subprocess.CalledProcessError:
+        pass  # Network creation failed, will error later with clearer message
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        pass  # Docker not available, will error later
 if __name__ == "__main__":
     cli()

{hte_cli-0.2.29 → hte_cli-0.2.31}/src/hte_cli/image_utils.py RENAMED Viewed

@@ -124,6 +124,7 @@ def is_running_in_linux_vm_on_arm() -> bool:
         True if running Linux on ARM64
     """
     import sys
     return sys.platform == "linux" and get_host_architecture() in ("aarch64", "arm64")
@@ -313,10 +314,6 @@ def pull_image_with_progress(
         # Read output from master with timeout
         output_buffer = ""
-        # Regex to parse docker progress: "abc123: Downloading [===>  ] 10.5MB/50MB"
-        progress_pattern = re.compile(
-            r"([a-f0-9]+):\s*(Downloading|Extracting|Verifying Checksum|Download complete|Pull complete|Already exists|Waiting)(?:\s+\[.*?\]\s+)?(\d+\.?\d*\s*[kMG]?B)?(?:/(\d+\.?\d*\s*[kMG]?B))?"
-        )
         while True:
             # Check if process is done

{hte_cli-0.2.29 → hte_cli-0.2.31}/tests/unit/test_image_utils.py RENAMED Viewed

@@ -610,7 +610,9 @@ class TestFixImageArchitecture:
     @patch("hte_cli.image_utils.remove_image")
     @patch("hte_cli.image_utils.check_image_architecture_matches_host")
     @patch("hte_cli.image_utils.platform.machine")
-    def test_returns_false_when_repull_fails(self, mock_machine, mock_check, mock_remove, mock_pull):
+    def test_returns_false_when_repull_fails(
+        self, mock_machine, mock_check, mock_remove, mock_pull
+    ):
         """Returns (False, message) when re-pull fails."""
         mock_machine.return_value = "aarch64"
         mock_check.return_value = (False, "amd64", "aarch64")

{hte_cli-0.2.29 → hte_cli-0.2.31}/uv.lock RENAMED Viewed

@@ -625,7 +625,7 @@ wheels = [
 [[package]]
 name = "hte-cli"
-version = "0.2.29"
+version = "0.2.30"
 source = { editable = "." }
 dependencies = [
     { name = "click" },