hte-cli 0.2.30__tar.gz → 0.2.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {hte_cli-0.2.30 → hte_cli-0.2.32}/PKG-INFO +1 -1
  2. {hte_cli-0.2.30 → hte_cli-0.2.32}/pyproject.toml +1 -1
  3. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/api_client.py +24 -0
  4. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/cli.py +183 -23
  5. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/image_utils.py +1 -4
  6. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/unit/test_image_utils.py +3 -1
  7. {hte_cli-0.2.30 → hte_cli-0.2.32}/.gitignore +0 -0
  8. {hte_cli-0.2.30 → hte_cli-0.2.32}/README.md +0 -0
  9. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/__init__.py +0 -0
  10. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/__main__.py +0 -0
  11. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/config.py +0 -0
  12. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/errors.py +0 -0
  13. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/events.py +0 -0
  14. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/runner.py +0 -0
  15. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/scorers.py +0 -0
  16. {hte_cli-0.2.30 → hte_cli-0.2.32}/src/hte_cli/version_check.py +0 -0
  17. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/__init__.py +0 -0
  18. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/__init__.py +0 -0
  19. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/automated_runner.py +0 -0
  20. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/conftest.py +0 -0
  21. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/e2e_test.py +0 -0
  22. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/test_benchmark_flows.py +0 -0
  23. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/test_eval_logs.py +0 -0
  24. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/test_infrastructure.py +0 -0
  25. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/test_runtime_imports.py +0 -0
  26. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/test_session_lifecycle.py +0 -0
  27. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/e2e/verify_docker_deps.py +0 -0
  28. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/unit/__init__.py +0 -0
  29. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/unit/conftest.py +0 -0
  30. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/unit/test_runner.py +0 -0
  31. {hte_cli-0.2.30 → hte_cli-0.2.32}/tests/unit/test_scorers.py +0 -0
  32. {hte_cli-0.2.30 → hte_cli-0.2.32}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.30
3
+ Version: 0.2.32
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hte-cli"
3
- version = "0.2.30"
3
+ version = "0.2.32"
4
4
  description = "Human Time-to-Completion Evaluation CLI"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -265,3 +265,27 @@ class APIClient:
265
265
  json=payload,
266
266
  timeout=UPLOAD_TIMEOUT,
267
267
  )
268
+
269
+ def upload_partial_log(
270
+ self,
271
+ session_id: str,
272
+ eval_log_bytes: bytes,
273
+ ) -> dict:
274
+ """Upload partial eval log for interrupted sessions.
275
+
276
+ Args:
277
+ session_id: The session ID
278
+ eval_log_bytes: Partial eval log content
279
+
280
+ Returns:
281
+ Response dict with status and log_path
282
+ """
283
+ payload = {
284
+ "eval_log_base64": base64.b64encode(eval_log_bytes).decode("ascii"),
285
+ }
286
+
287
+ return self.post(
288
+ f"/sessions/{session_id}/partial-log",
289
+ json=payload,
290
+ timeout=UPLOAD_TIMEOUT,
291
+ )
@@ -4,7 +4,9 @@ Uses Click for command parsing and Rich for pretty output.
4
4
  """
5
5
 
6
6
  import os
7
+ import signal
7
8
  import sys
9
+ import threading
8
10
  import webbrowser
9
11
 
10
12
  import click
@@ -22,6 +24,121 @@ console = Console()
22
24
  # Support email per spec
23
25
  SUPPORT_EMAIL = "jacktpayne51@gmail.com"
24
26
 
27
+ # Warning before cap (15 minutes)
28
+ CAP_WARNING_SECONDS = 15 * 60
29
+
30
+
31
+ class CapEnforcer:
32
+ """Background timer that enforces time cap on capped_completion tasks.
33
+
34
+ Shows warning 15 minutes before cap and terminates the task when cap is reached.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ time_cap_seconds: int,
40
+ start_time: float,
41
+ console: Console,
42
+ main_thread_id: int,
43
+ ):
44
+ self.time_cap_seconds = time_cap_seconds
45
+ self.start_time = start_time
46
+ self.console = console
47
+ self.main_thread_id = main_thread_id
48
+ self._stop_event = threading.Event()
49
+ self._warning_shown = False
50
+ self._thread: threading.Thread | None = None
51
+ self.cap_reached = False
52
+
53
+ def start(self):
54
+ """Start the background timer thread."""
55
+ self._thread = threading.Thread(target=self._run, daemon=True)
56
+ self._thread.start()
57
+
58
+ def stop(self):
59
+ """Stop the background timer."""
60
+ self._stop_event.set()
61
+ if self._thread:
62
+ self._thread.join(timeout=1.0)
63
+
64
+ def _run(self):
65
+ """Timer loop that checks elapsed time."""
66
+ import time
67
+
68
+ warning_threshold = self.time_cap_seconds - CAP_WARNING_SECONDS
69
+
70
+ while not self._stop_event.is_set():
71
+ elapsed = time.monotonic() - self.start_time
72
+
73
+ # Show warning at 15 minutes before cap
74
+ if elapsed >= warning_threshold and not self._warning_shown:
75
+ self._warning_shown = True
76
+ remaining = self.time_cap_seconds - elapsed
77
+ minutes = int(remaining // 60)
78
+ self.console.print()
79
+ self.console.print(
80
+ f"[yellow bold]Warning: Time cap approaching - {minutes} minutes remaining[/yellow bold]"
81
+ )
82
+ self.console.print(
83
+ "[yellow]When cap is reached, session will end and you'll need to record progress in the web UI.[/yellow]"
84
+ )
85
+ self.console.print()
86
+
87
+ # Cap reached - terminate the main thread
88
+ if elapsed >= self.time_cap_seconds:
89
+ self.cap_reached = True
90
+ self.console.print()
91
+ self.console.print(
92
+ f"[red bold]Time cap reached ({self.time_cap_seconds // 60} minutes). Session ending.[/red bold]"
93
+ )
94
+ self.console.print(
95
+ "[yellow]Return to the web UI to record your progress and estimate completion time.[/yellow]"
96
+ )
97
+ # Send SIGINT to main thread to trigger KeyboardInterrupt
98
+ os.kill(os.getpid(), signal.SIGINT)
99
+ break
100
+
101
+ # Check every second
102
+ self._stop_event.wait(1.0)
103
+
104
+
105
+ def _find_eval_log_bytes(runner) -> bytes | None:
106
+ """Find and read eval log bytes from runner's work directory.
107
+
108
+ Used for interrupted sessions to upload partial logs.
109
+ """
110
+ try:
111
+ # Look for eval logs in the work directory
112
+ if not runner.work_dir.exists():
113
+ return None
114
+
115
+ # Find any .eval files in the work directory tree
116
+ eval_files = list(runner.work_dir.rglob("*.eval"))
117
+ if not eval_files:
118
+ return None
119
+
120
+ # Get the most recent one
121
+ eval_files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
122
+ return eval_files[0].read_bytes()
123
+ except Exception:
124
+ return None
125
+
126
+
127
+ def _upload_partial_log(
128
+ api: APIClient, session_id: str, eval_log_bytes: bytes, console: Console
129
+ ) -> None:
130
+ """Upload partial eval log for interrupted session.
131
+
132
+ Best-effort: silently handles failures to not block exit.
133
+ """
134
+ try:
135
+ size_kb = len(eval_log_bytes) / 1024
136
+ console.print(f"[dim]Uploading partial eval log ({size_kb:.0f} KB)...[/dim]")
137
+ api.upload_partial_log(session_id, eval_log_bytes)
138
+ console.print("[dim]Partial eval log uploaded.[/dim]")
139
+ except Exception as e:
140
+ console.print(f"[dim]Could not upload partial log: {e}[/dim]")
141
+
25
142
 
26
143
  @click.group()
27
144
  @click.version_option(__version__, prog_name="hte-cli")
@@ -338,15 +455,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
338
455
  host_platform = get_host_docker_platform()
339
456
 
340
457
  if is_linux_arm:
341
- console.print(
342
- f"[yellow]![/yellow] Detected [bold]Linux ARM64[/bold] environment"
343
- )
458
+ console.print("[yellow]![/yellow] Detected [bold]Linux ARM64[/bold] environment")
344
459
  console.print(
345
460
  f" [dim]Will verify cached images match host architecture ({host_platform})[/dim]"
346
461
  )
347
- console.print(
348
- f" [dim]Mismatched images will be automatically re-pulled[/dim]"
349
- )
462
+ console.print(" [dim]Mismatched images will be automatically re-pulled[/dim]")
350
463
  console.print()
351
464
 
352
465
  console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
@@ -382,7 +495,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
382
495
  f" [dim]Cached image: {image_arch} | Host: {host_arch}[/dim]"
383
496
  )
384
497
  console.print(
385
- f" [dim]Removing cached image and re-pulling correct architecture...[/dim]"
498
+ " [dim]Removing cached image and re-pulling correct architecture...[/dim]"
386
499
  )
387
500
 
388
501
  needed_fix, fix_msg = fix_image_architecture(img)
@@ -396,7 +509,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
396
509
  # No ARM variant available - this is an x86-only image
397
510
  # Re-pull the amd64 version and warn about QEMU
398
511
  console.print(
399
- f" [dim]No ARM variant available - re-pulling x86 version...[/dim]"
512
+ " [dim]No ARM variant available - re-pulling x86 version...[/dim]"
400
513
  )
401
514
  success = pull_image_with_progress(img)
402
515
  if success:
@@ -407,7 +520,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
407
520
  pulled_images.append(img)
408
521
  continue
409
522
  else:
410
- console.print(f" [red]✗[/red] {short_name} [dim](failed to pull)[/dim]")
523
+ console.print(
524
+ f" [red]✗[/red] {short_name} [dim](failed to pull)[/dim]"
525
+ )
411
526
  failed_images.append(img)
412
527
  pull_errors[img] = "failed to pull x86 fallback"
413
528
  continue
@@ -431,12 +546,12 @@ def session_join(ctx, session_id: str, force_setup: bool):
431
546
 
432
547
  def show_progress(image: str, line: str):
433
548
  # Show docker output directly - includes MB progress from PTY
434
- # Lines look like: "abc123: Downloading 360.9MB/4.075GB"
549
+ # Lines look like: "abc123: Downloading [======> ] 360.9MB/4.075GB"
435
550
  if ": " in line:
436
551
  parts = line.split(": ", 1)
437
552
  if len(parts) == 2:
438
553
  layer_id = parts[0][-8:]
439
- layer_status = parts[1][:45]
554
+ layer_status = parts[1][:85] # Include full progress bar + size
440
555
  display = f"{layer_id}: {layer_status}"
441
556
  if display != last_status[0]:
442
557
  last_status[0] = display
@@ -455,6 +570,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
455
570
  # On Linux ARM64, verify pulled image architecture
456
571
  if is_linux_arm:
457
572
  from hte_cli.image_utils import get_image_architecture
573
+
458
574
  pulled_arch = get_image_architecture(img)
459
575
 
460
576
  if pulled_arch == "arm64":
@@ -467,7 +583,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
467
583
  f" [yellow]![/yellow] {short_name} [dim](downloaded, arch: amd64)[/dim]"
468
584
  )
469
585
  console.print(
470
- f" [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
586
+ " [yellow]This is an x86 image - requires QEMU emulation on ARM[/yellow]"
471
587
  )
472
588
  x86_images_on_arm.append(img)
473
589
  else:
@@ -499,9 +615,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
499
615
  console.print(
500
616
  f"[yellow]⚠ Warning:[/yellow] {len(x86_images_on_arm)} x86 image(s) detected on ARM host"
501
617
  )
502
- console.print(
503
- " These require QEMU emulation. If container fails to start, run:"
504
- )
618
+ console.print(" These require QEMU emulation. If container fails to start, run:")
505
619
  console.print(
506
620
  " [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
507
621
  )
@@ -518,14 +632,21 @@ def session_join(ctx, session_id: str, force_setup: bool):
518
632
 
519
633
  # Architecture-specific advice
520
634
  if is_linux_arm:
521
- console.print(f" 2. You're on Linux ARM64 - try: docker pull <image> --platform linux/arm64")
522
- console.print(" 3. For x86-only images, enable QEMU: docker run --privileged --rm tonistiigi/binfmt --install all")
635
+ console.print(
636
+ " 2. You're on Linux ARM64 - try: docker pull <image> --platform linux/arm64"
637
+ )
638
+ console.print(
639
+ " 3. For x86-only images, enable QEMU: docker run --privileged --rm tonistiigi/binfmt --install all"
640
+ )
523
641
  else:
524
642
  console.print(" 2. Try manual pull: docker pull <image>")
525
643
 
526
644
  console.print(" 4. Check network connectivity")
527
645
  console.print()
528
- console.print("Session remains active - you can retry with: hte-cli session join " + session_id)
646
+ console.print(
647
+ "Session remains active - you can retry with: hte-cli session join "
648
+ + session_id
649
+ )
529
650
  sys.exit(1)
530
651
 
531
652
  # Send setup_completed - THIS STARTS THE TIMER ON SERVER
@@ -566,6 +687,23 @@ def session_join(ctx, session_id: str, force_setup: bool):
566
687
 
567
688
  events.docker_started()
568
689
 
690
+ # Start cap enforcer if this is a capped_completion task
691
+ time_cap_seconds = assignment.get("time_cap_seconds")
692
+ cap_enforcer: CapEnforcer | None = None
693
+ if time_cap_seconds and session_info.get("mode") == "capped_completion":
694
+ cap_enforcer = CapEnforcer(
695
+ time_cap_seconds=time_cap_seconds,
696
+ start_time=time.monotonic(),
697
+ console=console,
698
+ main_thread_id=threading.get_ident(),
699
+ )
700
+ cap_enforcer.start()
701
+ console.print(
702
+ f"[dim]Time cap: {time_cap_seconds // 60} minutes "
703
+ f"(warning at {(time_cap_seconds - CAP_WARNING_SECONDS) // 60} min)[/dim]"
704
+ )
705
+ console.print()
706
+
569
707
  runner = TaskRunner()
570
708
  eval_log_bytes = None
571
709
  try:
@@ -579,16 +717,33 @@ def session_join(ctx, session_id: str, force_setup: bool):
579
717
  eval_log_bytes = result.eval_log_path.read_bytes()
580
718
  except KeyboardInterrupt:
581
719
  events.docker_stopped(exit_code=130)
720
+ # Try to find and upload any partial eval log before exiting
721
+ eval_log_bytes = _find_eval_log_bytes(runner)
722
+ if eval_log_bytes:
723
+ _upload_partial_log(api, session_id, eval_log_bytes, console)
582
724
  console.print()
583
- console.print(
584
- "[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
585
- )
725
+ # Different message if cap was reached vs user interrupt
726
+ if cap_enforcer and cap_enforcer.cap_reached:
727
+ console.print(
728
+ "[yellow]Time cap reached. Return to web UI to record progress and estimate completion time.[/yellow]"
729
+ )
730
+ else:
731
+ console.print(
732
+ "[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
733
+ )
586
734
  sys.exit(0)
587
735
  except Exception as e:
588
736
  events.docker_stopped(exit_code=1)
737
+ # Try to upload partial log on failure too
738
+ eval_log_bytes = _find_eval_log_bytes(runner)
739
+ if eval_log_bytes:
740
+ _upload_partial_log(api, session_id, eval_log_bytes, console)
589
741
  console.print(f"[red]Task execution failed: {e}[/red]")
590
742
  sys.exit(1)
591
743
  finally:
744
+ # Stop cap enforcer
745
+ if cap_enforcer:
746
+ cap_enforcer.stop()
592
747
  runner.cleanup()
593
748
 
594
749
  events.docker_stopped(exit_code=0)
@@ -935,7 +1090,9 @@ def diagnose_cmd():
935
1090
  console.print(" [red]✗[/red] QEMU x86 emulation NOT working")
936
1091
  console.print()
937
1092
  console.print(" [yellow]To enable QEMU emulation, run:[/yellow]")
938
- console.print(" [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]")
1093
+ console.print(
1094
+ " [bold]docker run --privileged --rm tonistiigi/binfmt --install all[/bold]"
1095
+ )
939
1096
  except subprocess.TimeoutExpired:
940
1097
  console.print(" [yellow]![/yellow] QEMU test timed out")
941
1098
  except Exception as e:
@@ -979,7 +1136,10 @@ def _check_docker() -> tuple[bool, str | None]:
979
1136
  timeout=10,
980
1137
  )
981
1138
  if result.returncode != 0:
982
- return False, "Docker is not running. Start Docker (Docker Desktop, colima, or dockerd)."
1139
+ return (
1140
+ False,
1141
+ "Docker is not running. Start Docker (Docker Desktop, colima, or dockerd).",
1142
+ )
983
1143
  except FileNotFoundError:
984
1144
  return False, "Docker is not installed. Install from https://docs.docker.com/get-docker/"
985
1145
  except Exception as e:
@@ -124,6 +124,7 @@ def is_running_in_linux_vm_on_arm() -> bool:
124
124
  True if running Linux on ARM64
125
125
  """
126
126
  import sys
127
+
127
128
  return sys.platform == "linux" and get_host_architecture() in ("aarch64", "arm64")
128
129
 
129
130
 
@@ -313,10 +314,6 @@ def pull_image_with_progress(
313
314
 
314
315
  # Read output from master with timeout
315
316
  output_buffer = ""
316
- # Regex to parse docker progress: "abc123: Downloading [===> ] 10.5MB/50MB"
317
- progress_pattern = re.compile(
318
- r"([a-f0-9]+):\s*(Downloading|Extracting|Verifying Checksum|Download complete|Pull complete|Already exists|Waiting)(?:\s+\[.*?\]\s+)?(\d+\.?\d*\s*[kMG]?B)?(?:/(\d+\.?\d*\s*[kMG]?B))?"
319
- )
320
317
 
321
318
  while True:
322
319
  # Check if process is done
@@ -610,7 +610,9 @@ class TestFixImageArchitecture:
610
610
  @patch("hte_cli.image_utils.remove_image")
611
611
  @patch("hte_cli.image_utils.check_image_architecture_matches_host")
612
612
  @patch("hte_cli.image_utils.platform.machine")
613
- def test_returns_false_when_repull_fails(self, mock_machine, mock_check, mock_remove, mock_pull):
613
+ def test_returns_false_when_repull_fails(
614
+ self, mock_machine, mock_check, mock_remove, mock_pull
615
+ ):
614
616
  """Returns (False, message) when re-pull fails."""
615
617
  mock_machine.return_value = "aarch64"
616
618
  mock_check.return_value = (False, "amd64", "aarch64")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes