hte-cli 0.1.26__tar.gz → 0.1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.1.26
3
+ Version: 0.1.28
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hte-cli"
3
- version = "0.1.26"
3
+ version = "0.1.28"
4
4
  description = "Human Time-to-Completion Evaluation CLI"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -397,10 +397,16 @@ def tasks_run(ctx, task_id: str | None):
397
397
  # Step 5: Pre-pull Docker images with progress
398
398
  from hte_cli.image_utils import extract_images_from_compose
399
399
  import re
400
+ import time
401
+
402
+ setup_start_time = time.monotonic()
403
+ images: list[str] = []
404
+ results: list[tuple[str, bool, str]] = []
400
405
 
401
406
  if compose_yaml:
402
407
  images = extract_images_from_compose(compose_yaml)
403
408
  if images:
409
+ events.setup_started(images)
404
410
  console.print()
405
411
  console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
406
412
 
@@ -533,14 +539,27 @@ def tasks_run(ctx, task_id: str | None):
533
539
  console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
534
540
  results.append((img, False, "failed"))
535
541
 
536
- failed = sum(1 for _, ok, _ in results if not ok)
537
- if failed > 0:
542
+ failed_count = sum(1 for _, ok, _ in results if not ok)
543
+ if failed_count > 0:
538
544
  console.print(
539
- f"[yellow]Warning: {failed} image(s) failed to pull. "
545
+ f"[yellow]Warning: {failed_count} image(s) failed to pull. "
540
546
  "Task may fail to start.[/yellow]"
541
547
  )
542
548
  console.print()
543
549
 
550
+ # Record image pull timing
551
+ if images:
552
+ pull_duration = time.monotonic() - setup_start_time
553
+ pulled = [img for img, ok, status in results if ok and status == "pulled"]
554
+ cached = [img for img, ok, status in results if ok and status == "cached"]
555
+ failed = [img for img, ok, status in results if not ok]
556
+ events.image_pull_completed(
557
+ duration_seconds=pull_duration,
558
+ pulled=pulled,
559
+ cached=cached,
560
+ failed=failed,
561
+ )
562
+
544
563
  # Step 6: Run Inspect's human_cli
545
564
  runner = TaskRunner()
546
565
  console.print("[bold]Starting task environment...[/bold]")
@@ -549,6 +568,10 @@ def tasks_run(ctx, task_id: str | None):
549
568
 
550
569
  events.docker_started()
551
570
 
571
+ # Record total setup time (image pulls + compose up)
572
+ total_setup = time.monotonic() - setup_start_time
573
+ events.setup_completed(total_seconds=total_setup)
574
+
552
575
  eval_log_bytes = None
553
576
  local_eval_path = None
554
577
  try:
@@ -612,13 +635,14 @@ def tasks_run(ctx, task_id: str | None):
612
635
  except Exception:
613
636
  pass # Not a CyberGym task or malformed zip
614
637
 
615
- # Show upload size info
616
- upload_size_kb = 0
617
- if eval_log_bytes:
618
- upload_size_kb = len(eval_log_bytes) / 1024
619
- size_mb = upload_size_kb / 1024
620
- if size_mb > 50:
621
- console.print(f"[yellow]Warning: Large eval log ({size_mb:.1f} MB)[/yellow]")
638
+ # Show upload size info and track timing
639
+ upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
640
+ upload_size_kb = upload_size_bytes / 1024
641
+ if upload_size_kb / 1024 > 50:
642
+ console.print(f"[yellow]Warning: Large eval log ({upload_size_kb / 1024:.1f} MB)[/yellow]")
643
+
644
+ events.upload_started(size_bytes=upload_size_bytes)
645
+ upload_start_time = time.monotonic()
622
646
 
623
647
  with Progress(
624
648
  SpinnerColumn(),
@@ -645,6 +669,10 @@ def tasks_run(ctx, task_id: str | None):
645
669
  console.print("[yellow]Your result was saved locally but not uploaded.[/yellow]")
646
670
  sys.exit(1)
647
671
 
672
+ # Record upload completion
673
+ upload_duration = time.monotonic() - upload_start_time
674
+ events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
675
+
648
676
  console.print()
649
677
  console.print("[green]Result uploaded successfully![/green]")
650
678
 
@@ -24,6 +24,12 @@ class EventStreamer:
24
24
  "docker_started",
25
25
  "docker_stopped",
26
26
  "session_completed",
27
+ # Overhead tracking events
28
+ "setup_started",
29
+ "image_pull_completed",
30
+ "setup_completed",
31
+ "upload_started",
32
+ "upload_completed",
27
33
  }
28
34
 
29
35
  def __init__(self, api: APIClient, session_id: str):
@@ -126,3 +132,45 @@ class EventStreamer:
126
132
  if answer is not None:
127
133
  data["answer_submitted"] = True
128
134
  return self.send("session_completed", data or None)
135
+
136
+ # Overhead tracking events
137
+
138
+ def setup_started(self, images: list[str]) -> bool:
139
+ """Record start of setup phase (before image pulls)."""
140
+ return self.send("setup_started", {"images": images})
141
+
142
+ def image_pull_completed(
143
+ self,
144
+ duration_seconds: float,
145
+ pulled: list[str],
146
+ cached: list[str],
147
+ failed: list[str],
148
+ ) -> bool:
149
+ """Record image pull results with timing."""
150
+ return self.send(
151
+ "image_pull_completed",
152
+ {
153
+ "duration_seconds": duration_seconds,
154
+ "pulled": pulled,
155
+ "cached": cached,
156
+ "failed": failed,
157
+ },
158
+ )
159
+
160
+ def setup_completed(self, total_seconds: float) -> bool:
161
+ """Record end of setup phase (environment ready for work)."""
162
+ return self.send("setup_completed", {"total_seconds": total_seconds})
163
+
164
+ def upload_started(self, size_bytes: int) -> bool:
165
+ """Record start of result upload."""
166
+ return self.send("upload_started", {"size_bytes": size_bytes})
167
+
168
+ def upload_completed(self, duration_seconds: float, size_bytes: int) -> bool:
169
+ """Record end of result upload with timing."""
170
+ return self.send(
171
+ "upload_completed",
172
+ {
173
+ "duration_seconds": duration_seconds,
174
+ "size_bytes": size_bytes,
175
+ },
176
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes