hte-cli 0.2.31__tar.gz → 0.2.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {hte_cli-0.2.31 → hte_cli-0.2.33}/PKG-INFO +1 -1
  2. {hte_cli-0.2.31 → hte_cli-0.2.33}/pyproject.toml +1 -1
  3. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/cli.py +164 -25
  4. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/image_utils.py +0 -52
  5. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/unit/test_image_utils.py +0 -55
  6. {hte_cli-0.2.31 → hte_cli-0.2.33}/uv.lock +1 -1
  7. {hte_cli-0.2.31 → hte_cli-0.2.33}/.gitignore +0 -0
  8. {hte_cli-0.2.31 → hte_cli-0.2.33}/README.md +0 -0
  9. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/__init__.py +0 -0
  10. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/__main__.py +0 -0
  11. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/api_client.py +0 -0
  12. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/config.py +0 -0
  13. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/errors.py +0 -0
  14. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/events.py +0 -0
  15. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/runner.py +0 -0
  16. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/scorers.py +0 -0
  17. {hte_cli-0.2.31 → hte_cli-0.2.33}/src/hte_cli/version_check.py +0 -0
  18. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/__init__.py +0 -0
  19. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/__init__.py +0 -0
  20. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/automated_runner.py +0 -0
  21. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/conftest.py +0 -0
  22. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/e2e_test.py +0 -0
  23. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/test_benchmark_flows.py +0 -0
  24. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/test_eval_logs.py +0 -0
  25. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/test_infrastructure.py +0 -0
  26. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/test_runtime_imports.py +0 -0
  27. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/test_session_lifecycle.py +0 -0
  28. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/e2e/verify_docker_deps.py +0 -0
  29. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/unit/__init__.py +0 -0
  30. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/unit/conftest.py +0 -0
  31. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/unit/test_runner.py +0 -0
  32. {hte_cli-0.2.31 → hte_cli-0.2.33}/tests/unit/test_scorers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.31
3
+ Version: 0.2.33
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hte-cli"
3
- version = "0.2.31"
3
+ version = "0.2.33"
4
4
  description = "Human Time-to-Completion Evaluation CLI"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -4,7 +4,9 @@ Uses Click for command parsing and Rich for pretty output.
4
4
  """
5
5
 
6
6
  import os
7
+ import signal
7
8
  import sys
9
+ import threading
8
10
  import webbrowser
9
11
 
10
12
  import click
@@ -22,6 +24,83 @@ console = Console()
22
24
  # Support email per spec
23
25
  SUPPORT_EMAIL = "jacktpayne51@gmail.com"
24
26
 
27
+ # Warning before cap (15 minutes)
28
+ CAP_WARNING_SECONDS = 15 * 60
29
+
30
+
31
+ class CapEnforcer:
32
+ """Background timer that enforces time cap on capped_completion tasks.
33
+
34
+ Shows warning 15 minutes before cap and terminates the task when cap is reached.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ time_cap_seconds: int,
40
+ start_time: float,
41
+ console: Console,
42
+ main_thread_id: int,
43
+ ):
44
+ self.time_cap_seconds = time_cap_seconds
45
+ self.start_time = start_time
46
+ self.console = console
47
+ self.main_thread_id = main_thread_id
48
+ self._stop_event = threading.Event()
49
+ self._warning_shown = False
50
+ self._thread: threading.Thread | None = None
51
+ self.cap_reached = False
52
+
53
+ def start(self):
54
+ """Start the background timer thread."""
55
+ self._thread = threading.Thread(target=self._run, daemon=True)
56
+ self._thread.start()
57
+
58
+ def stop(self):
59
+ """Stop the background timer."""
60
+ self._stop_event.set()
61
+ if self._thread:
62
+ self._thread.join(timeout=1.0)
63
+
64
+ def _run(self):
65
+ """Timer loop that checks elapsed time."""
66
+ import time
67
+
68
+ warning_threshold = self.time_cap_seconds - CAP_WARNING_SECONDS
69
+
70
+ while not self._stop_event.is_set():
71
+ elapsed = time.monotonic() - self.start_time
72
+
73
+ # Show warning at 15 minutes before cap
74
+ if elapsed >= warning_threshold and not self._warning_shown:
75
+ self._warning_shown = True
76
+ remaining = self.time_cap_seconds - elapsed
77
+ minutes = int(remaining // 60)
78
+ self.console.print()
79
+ self.console.print(
80
+ f"[yellow bold]Warning: Time cap approaching - {minutes} minutes remaining[/yellow bold]"
81
+ )
82
+ self.console.print(
83
+ "[yellow]When cap is reached, session will end and you'll need to record progress in the web UI.[/yellow]"
84
+ )
85
+ self.console.print()
86
+
87
+ # Cap reached - terminate the main thread
88
+ if elapsed >= self.time_cap_seconds:
89
+ self.cap_reached = True
90
+ self.console.print()
91
+ self.console.print(
92
+ f"[red bold]Time cap reached ({self.time_cap_seconds // 60} minutes). Session ending.[/red bold]"
93
+ )
94
+ self.console.print(
95
+ "[yellow]Return to the web UI to record your progress and estimate completion time.[/yellow]"
96
+ )
97
+ # Send SIGINT to main thread to trigger KeyboardInterrupt
98
+ os.kill(os.getpid(), signal.SIGINT)
99
+ break
100
+
101
+ # Check every second
102
+ self._stop_event.wait(1.0)
103
+
25
104
 
26
105
  def _find_eval_log_bytes(runner) -> bytes | None:
27
106
  """Find and read eval log bytes from runner's work directory.
@@ -45,7 +124,9 @@ def _find_eval_log_bytes(runner) -> bytes | None:
45
124
  return None
46
125
 
47
126
 
48
- def _upload_partial_log(api: APIClient, session_id: str, eval_log_bytes: bytes, console: Console) -> None:
127
+ def _upload_partial_log(
128
+ api: APIClient, session_id: str, eval_log_bytes: bytes, console: Console
129
+ ) -> None:
49
130
  """Upload partial eval log for interrupted session.
50
131
 
51
132
  Best-effort: silently handles failures to not block exit.
@@ -282,7 +363,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
282
363
  extract_image_platforms_from_compose,
283
364
  pull_image_with_progress,
284
365
  check_image_architecture_matches_host,
285
- fix_image_architecture,
366
+ remove_image,
286
367
  get_host_docker_platform,
287
368
  is_running_in_linux_vm_on_arm,
288
369
  )
@@ -406,34 +487,69 @@ def session_join(ctx, session_id: str, force_setup: bool):
406
487
  cached_images.append(img)
407
488
  continue
408
489
  else:
409
- # Architecture mismatch detected - this is the key fix for Linux ARM64
490
+ # Architecture mismatch detected - remove and re-pull with correct arch
410
491
  console.print(
411
492
  f" [yellow]⚠[/yellow] {short_name} [yellow]architecture mismatch![/yellow]"
412
493
  )
413
494
  console.print(
414
495
  f" [dim]Cached image: {image_arch} | Host: {host_arch}[/dim]"
415
496
  )
416
- console.print(
417
- " [dim]Removing cached image and re-pulling correct architecture...[/dim]"
418
- )
419
497
 
420
- needed_fix, fix_msg = fix_image_architecture(img)
421
- if needed_fix:
498
+ # Remove the wrongly-cached image
499
+ if not remove_image(img):
500
+ console.print(
501
+ f" [red]✗[/red] {short_name} [dim](failed to remove cached image)[/dim]"
502
+ )
503
+ failed_images.append(img)
504
+ pull_errors[img] = "failed to remove cached image"
505
+ continue
506
+
507
+ # Re-pull with correct platform and progress display
508
+ last_status = ["connecting..."]
509
+ last_error = [""]
510
+
511
+ with console.status(
512
+ f"[yellow]↓[/yellow] {short_name} [dim]re-pulling for {host_arch}...[/dim]"
513
+ ) as status:
514
+
515
+ def show_progress(image: str, line: str):
516
+ if ": " in line:
517
+ parts = line.split(": ", 1)
518
+ if len(parts) == 2:
519
+ layer_id = parts[0][-8:]
520
+ layer_status = parts[1][:85]
521
+ display = f"{layer_id}: {layer_status}"
522
+ if display != last_status[0]:
523
+ last_status[0] = display
524
+ status.update(
525
+ f"[yellow]↓[/yellow] {short_name} [dim]{display}[/dim]"
526
+ )
527
+ if "error" in line.lower() or "denied" in line.lower():
528
+ last_error[0] = line
529
+
530
+ success = pull_image_with_progress(
531
+ img, platform=host_platform, on_progress=show_progress
532
+ )
533
+
534
+ if success:
422
535
  console.print(
423
- f" [green]✓[/green] {short_name} [green]fixed![/green] [dim]({fix_msg})[/dim]"
536
+ f" [green]✓[/green] {short_name} [green]fixed![/green] [dim](re-pulled as {host_platform.split('/')[-1]})[/dim]"
424
537
  )
425
538
  pulled_images.append(img)
426
539
  continue
427
- elif "failed to re-pull" in fix_msg:
428
- # No ARM variant available - this is an x86-only image
429
- # Re-pull the amd64 version and warn about QEMU
540
+ else:
541
+ # ARM re-pull failed - try without platform constraint (x86 fallback)
430
542
  console.print(
431
- " [dim]No ARM variant available - re-pulling x86 version...[/dim]"
543
+ " [dim]No ARM variant - trying x86 fallback...[/dim]"
432
544
  )
433
- success = pull_image_with_progress(img)
545
+ with console.status(
546
+ f"[yellow]↓[/yellow] {short_name} [dim]pulling x86...[/dim]"
547
+ ) as status:
548
+ success = pull_image_with_progress(img, on_progress=show_progress)
549
+
434
550
  if success:
435
551
  console.print(
436
- f" [yellow]![/yellow] {short_name} [dim](x86-only image, needs QEMU)[/dim]"
552
+ f" [yellow]![/yellow] {short_name} [dim](x86-only, needs QEMU)[/dim]"
437
553
  )
438
554
  x86_images_on_arm.append(img)
439
555
  pulled_images.append(img)
@@ -442,14 +558,11 @@ def session_join(ctx, session_id: str, force_setup: bool):
442
558
  console.print(
443
559
  f" [red]✗[/red] {short_name} [dim](failed to pull)[/dim]"
444
560
  )
561
+ if last_error[0]:
562
+ console.print(f" [dim]{last_error[0][:60]}[/dim]")
563
+ pull_errors[img] = last_error[0]
445
564
  failed_images.append(img)
446
- pull_errors[img] = "failed to pull x86 fallback"
447
565
  continue
448
- else:
449
- console.print(f" [red]✗[/red] {short_name} [dim]({fix_msg})[/dim]")
450
- failed_images.append(img)
451
- pull_errors[img] = fix_msg
452
- continue
453
566
 
454
567
  # Need to pull - show progress
455
568
  last_status = ["connecting..."]
@@ -470,7 +583,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
470
583
  parts = line.split(": ", 1)
471
584
  if len(parts) == 2:
472
585
  layer_id = parts[0][-8:]
473
- layer_status = parts[1][:70] # Increased to include size info
586
+ layer_status = parts[1][:85] # Include full progress bar + size
474
587
  display = f"{layer_id}: {layer_status}"
475
588
  if display != last_status[0]:
476
589
  last_status[0] = display
@@ -606,6 +719,23 @@ def session_join(ctx, session_id: str, force_setup: bool):
606
719
 
607
720
  events.docker_started()
608
721
 
722
+ # Start cap enforcer if this is a capped_completion task
723
+ time_cap_seconds = assignment.get("time_cap_seconds")
724
+ cap_enforcer: CapEnforcer | None = None
725
+ if time_cap_seconds and session_info.get("mode") == "capped_completion":
726
+ cap_enforcer = CapEnforcer(
727
+ time_cap_seconds=time_cap_seconds,
728
+ start_time=time.monotonic(),
729
+ console=console,
730
+ main_thread_id=threading.get_ident(),
731
+ )
732
+ cap_enforcer.start()
733
+ console.print(
734
+ f"[dim]Time cap: {time_cap_seconds // 60} minutes "
735
+ f"(warning at {(time_cap_seconds - CAP_WARNING_SECONDS) // 60} min)[/dim]"
736
+ )
737
+ console.print()
738
+
609
739
  runner = TaskRunner()
610
740
  eval_log_bytes = None
611
741
  try:
@@ -624,9 +754,15 @@ def session_join(ctx, session_id: str, force_setup: bool):
624
754
  if eval_log_bytes:
625
755
  _upload_partial_log(api, session_id, eval_log_bytes, console)
626
756
  console.print()
627
- console.print(
628
- "[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
629
- )
757
+ # Different message if cap was reached vs user interrupt
758
+ if cap_enforcer and cap_enforcer.cap_reached:
759
+ console.print(
760
+ "[yellow]Time cap reached. Return to web UI to record progress and estimate completion time.[/yellow]"
761
+ )
762
+ else:
763
+ console.print(
764
+ "[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
765
+ )
630
766
  sys.exit(0)
631
767
  except Exception as e:
632
768
  events.docker_stopped(exit_code=1)
@@ -637,6 +773,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
637
773
  console.print(f"[red]Task execution failed: {e}[/red]")
638
774
  sys.exit(1)
639
775
  finally:
776
+ # Stop cap enforcer
777
+ if cap_enforcer:
778
+ cap_enforcer.stop()
640
779
  runner.cleanup()
641
780
 
642
781
  events.docker_stopped(exit_code=0)
@@ -222,58 +222,6 @@ def remove_image(image: str) -> bool:
222
222
  return False
223
223
 
224
224
 
225
- def fix_image_architecture(
226
- image: str,
227
- on_status: Callable[[str], None] | None = None,
228
- ) -> tuple[bool, str]:
229
- """
230
- Check if a cached image has wrong architecture and fix it if needed.
231
-
232
- For Linux ARM64 hosts (e.g., VM on Apple Silicon), this:
233
- 1. Checks if the cached image is amd64 when host is arm64
234
- 2. Removes the wrongly-cached image
235
- 3. Re-pulls with explicit --platform linux/arm64
236
-
237
- Args:
238
- image: Image name to check/fix
239
- on_status: Callback for status updates
240
-
241
- Returns:
242
- Tuple of (needed_fix, message):
243
- - needed_fix: True if image was re-pulled
244
- - message: Description of what happened
245
- """
246
- matches, image_arch, host_arch = check_image_architecture_matches_host(image)
247
-
248
- if matches:
249
- if image_arch:
250
- return (False, f"architecture OK ({image_arch})")
251
- else:
252
- return (False, "not cached")
253
-
254
- # Architecture mismatch detected
255
- host_platform = get_host_docker_platform()
256
- if not host_platform:
257
- return (False, f"unknown host architecture: {host_arch}")
258
-
259
- if on_status:
260
- on_status(f"Cached image is {image_arch}, host is {host_arch} - re-pulling...")
261
-
262
- # Remove the wrongly-cached image
263
- logger.info(f"Removing wrongly-cached {image_arch} image: {image}")
264
- if not remove_image(image):
265
- return (False, f"failed to remove cached {image_arch} image")
266
-
267
- # Re-pull with correct platform
268
- logger.info(f"Re-pulling {image} with platform {host_platform}")
269
- success = pull_image_with_progress(image, platform=host_platform)
270
-
271
- if success:
272
- return (True, f"re-pulled as {host_platform.split('/')[-1]}")
273
- else:
274
- return (False, f"failed to re-pull with platform {host_platform}")
275
-
276
-
277
225
  def pull_image_with_progress(
278
226
  image: str,
279
227
  platform: str | None = None,
@@ -15,7 +15,6 @@ from hte_cli.image_utils import (
15
15
  check_image_architecture_matches_host,
16
16
  is_running_in_linux_vm_on_arm,
17
17
  remove_image,
18
- fix_image_architecture,
19
18
  )
20
19
 
21
20
 
@@ -569,57 +568,3 @@ class TestRemoveImage:
569
568
  """Returns False when docker rmi fails."""
570
569
  mock_run.return_value = MagicMock(returncode=1)
571
570
  assert remove_image("python:3.12-slim") is False
572
-
573
-
574
- class TestFixImageArchitecture:
575
- """Tests for fix_image_architecture."""
576
-
577
- @patch("hte_cli.image_utils.pull_image_with_progress")
578
- @patch("hte_cli.image_utils.remove_image")
579
- @patch("hte_cli.image_utils.check_image_architecture_matches_host")
580
- def test_no_fix_needed_when_matches(self, mock_check, mock_remove, mock_pull):
581
- """Returns (False, message) when architecture already matches."""
582
- mock_check.return_value = (True, "arm64", "aarch64")
583
-
584
- needed_fix, message = fix_image_architecture("python:3.12-slim")
585
-
586
- assert needed_fix is False
587
- assert "architecture OK" in message
588
- mock_remove.assert_not_called()
589
- mock_pull.assert_not_called()
590
-
591
- @patch("hte_cli.image_utils.pull_image_with_progress")
592
- @patch("hte_cli.image_utils.remove_image")
593
- @patch("hte_cli.image_utils.check_image_architecture_matches_host")
594
- @patch("hte_cli.image_utils.platform.machine")
595
- def test_fixes_mismatch_by_repulling(self, mock_machine, mock_check, mock_remove, mock_pull):
596
- """Removes and re-pulls when architecture mismatches."""
597
- mock_machine.return_value = "aarch64"
598
- mock_check.return_value = (False, "amd64", "aarch64") # Mismatch!
599
- mock_remove.return_value = True
600
- mock_pull.return_value = True
601
-
602
- needed_fix, message = fix_image_architecture("python:3.12-slim")
603
-
604
- assert needed_fix is True
605
- assert "re-pulled" in message
606
- mock_remove.assert_called_once_with("python:3.12-slim")
607
- mock_pull.assert_called_once_with("python:3.12-slim", platform="linux/arm64")
608
-
609
- @patch("hte_cli.image_utils.pull_image_with_progress")
610
- @patch("hte_cli.image_utils.remove_image")
611
- @patch("hte_cli.image_utils.check_image_architecture_matches_host")
612
- @patch("hte_cli.image_utils.platform.machine")
613
- def test_returns_false_when_repull_fails(
614
- self, mock_machine, mock_check, mock_remove, mock_pull
615
- ):
616
- """Returns (False, message) when re-pull fails."""
617
- mock_machine.return_value = "aarch64"
618
- mock_check.return_value = (False, "amd64", "aarch64")
619
- mock_remove.return_value = True
620
- mock_pull.return_value = False # Pull fails
621
-
622
- needed_fix, message = fix_image_architecture("python:3.12-slim")
623
-
624
- assert needed_fix is False
625
- assert "failed to re-pull" in message
@@ -625,7 +625,7 @@ wheels = [
625
625
 
626
626
  [[package]]
627
627
  name = "hte-cli"
628
- version = "0.2.30"
628
+ version = "0.2.32"
629
629
  source = { editable = "." }
630
630
  dependencies = [
631
631
  { name = "click" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes