hte-cli 0.1.27__tar.gz → 0.1.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hte_cli-0.1.27 → hte_cli-0.1.28}/PKG-INFO +1 -1
- {hte_cli-0.1.27 → hte_cli-0.1.28}/pyproject.toml +1 -1
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/cli.py +38 -10
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/events.py +48 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/.gitignore +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/README.md +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/__init__.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/__main__.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/api_client.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/config.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/errors.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/image_utils.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/runner.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/scorers.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/src/hte_cli/version_check.py +0 -0
- {hte_cli-0.1.27 → hte_cli-0.1.28}/uv.lock +0 -0
|
@@ -397,10 +397,16 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
397
397
|
# Step 5: Pre-pull Docker images with progress
|
|
398
398
|
from hte_cli.image_utils import extract_images_from_compose
|
|
399
399
|
import re
|
|
400
|
+
import time
|
|
401
|
+
|
|
402
|
+
setup_start_time = time.monotonic()
|
|
403
|
+
images: list[str] = []
|
|
404
|
+
results: list[tuple[str, bool, str]] = []
|
|
400
405
|
|
|
401
406
|
if compose_yaml:
|
|
402
407
|
images = extract_images_from_compose(compose_yaml)
|
|
403
408
|
if images:
|
|
409
|
+
events.setup_started(images)
|
|
404
410
|
console.print()
|
|
405
411
|
console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
|
|
406
412
|
|
|
@@ -533,14 +539,27 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
533
539
|
console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
|
|
534
540
|
results.append((img, False, "failed"))
|
|
535
541
|
|
|
536
|
-
|
|
537
|
-
if
|
|
542
|
+
failed_count = sum(1 for _, ok, _ in results if not ok)
|
|
543
|
+
if failed_count > 0:
|
|
538
544
|
console.print(
|
|
539
|
-
f"[yellow]Warning: {
|
|
545
|
+
f"[yellow]Warning: {failed_count} image(s) failed to pull. "
|
|
540
546
|
"Task may fail to start.[/yellow]"
|
|
541
547
|
)
|
|
542
548
|
console.print()
|
|
543
549
|
|
|
550
|
+
# Record image pull timing
|
|
551
|
+
if images:
|
|
552
|
+
pull_duration = time.monotonic() - setup_start_time
|
|
553
|
+
pulled = [img for img, ok, status in results if ok and status == "pulled"]
|
|
554
|
+
cached = [img for img, ok, status in results if ok and status == "cached"]
|
|
555
|
+
failed = [img for img, ok, status in results if not ok]
|
|
556
|
+
events.image_pull_completed(
|
|
557
|
+
duration_seconds=pull_duration,
|
|
558
|
+
pulled=pulled,
|
|
559
|
+
cached=cached,
|
|
560
|
+
failed=failed,
|
|
561
|
+
)
|
|
562
|
+
|
|
544
563
|
# Step 6: Run Inspect's human_cli
|
|
545
564
|
runner = TaskRunner()
|
|
546
565
|
console.print("[bold]Starting task environment...[/bold]")
|
|
@@ -549,6 +568,10 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
549
568
|
|
|
550
569
|
events.docker_started()
|
|
551
570
|
|
|
571
|
+
# Record total setup time (image pulls + compose up)
|
|
572
|
+
total_setup = time.monotonic() - setup_start_time
|
|
573
|
+
events.setup_completed(total_seconds=total_setup)
|
|
574
|
+
|
|
552
575
|
eval_log_bytes = None
|
|
553
576
|
local_eval_path = None
|
|
554
577
|
try:
|
|
@@ -612,13 +635,14 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
612
635
|
except Exception:
|
|
613
636
|
pass # Not a CyberGym task or malformed zip
|
|
614
637
|
|
|
615
|
-
# Show upload size info
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
638
|
+
# Show upload size info and track timing
|
|
639
|
+
upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
|
|
640
|
+
upload_size_kb = upload_size_bytes / 1024
|
|
641
|
+
if upload_size_kb / 1024 > 50:
|
|
642
|
+
console.print(f"[yellow]Warning: Large eval log ({upload_size_kb / 1024:.1f} MB)[/yellow]")
|
|
643
|
+
|
|
644
|
+
events.upload_started(size_bytes=upload_size_bytes)
|
|
645
|
+
upload_start_time = time.monotonic()
|
|
622
646
|
|
|
623
647
|
with Progress(
|
|
624
648
|
SpinnerColumn(),
|
|
@@ -645,6 +669,10 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
645
669
|
console.print("[yellow]Your result was saved locally but not uploaded.[/yellow]")
|
|
646
670
|
sys.exit(1)
|
|
647
671
|
|
|
672
|
+
# Record upload completion
|
|
673
|
+
upload_duration = time.monotonic() - upload_start_time
|
|
674
|
+
events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
|
|
675
|
+
|
|
648
676
|
console.print()
|
|
649
677
|
console.print("[green]Result uploaded successfully![/green]")
|
|
650
678
|
|
|
@@ -24,6 +24,12 @@ class EventStreamer:
|
|
|
24
24
|
"docker_started",
|
|
25
25
|
"docker_stopped",
|
|
26
26
|
"session_completed",
|
|
27
|
+
# Overhead tracking events
|
|
28
|
+
"setup_started",
|
|
29
|
+
"image_pull_completed",
|
|
30
|
+
"setup_completed",
|
|
31
|
+
"upload_started",
|
|
32
|
+
"upload_completed",
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
def __init__(self, api: APIClient, session_id: str):
|
|
@@ -126,3 +132,45 @@ class EventStreamer:
|
|
|
126
132
|
if answer is not None:
|
|
127
133
|
data["answer_submitted"] = True
|
|
128
134
|
return self.send("session_completed", data or None)
|
|
135
|
+
|
|
136
|
+
# Overhead tracking events
|
|
137
|
+
|
|
138
|
+
def setup_started(self, images: list[str]) -> bool:
|
|
139
|
+
"""Record start of setup phase (before image pulls)."""
|
|
140
|
+
return self.send("setup_started", {"images": images})
|
|
141
|
+
|
|
142
|
+
def image_pull_completed(
|
|
143
|
+
self,
|
|
144
|
+
duration_seconds: float,
|
|
145
|
+
pulled: list[str],
|
|
146
|
+
cached: list[str],
|
|
147
|
+
failed: list[str],
|
|
148
|
+
) -> bool:
|
|
149
|
+
"""Record image pull results with timing."""
|
|
150
|
+
return self.send(
|
|
151
|
+
"image_pull_completed",
|
|
152
|
+
{
|
|
153
|
+
"duration_seconds": duration_seconds,
|
|
154
|
+
"pulled": pulled,
|
|
155
|
+
"cached": cached,
|
|
156
|
+
"failed": failed,
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
def setup_completed(self, total_seconds: float) -> bool:
|
|
161
|
+
"""Record end of setup phase (environment ready for work)."""
|
|
162
|
+
return self.send("setup_completed", {"total_seconds": total_seconds})
|
|
163
|
+
|
|
164
|
+
def upload_started(self, size_bytes: int) -> bool:
|
|
165
|
+
"""Record start of result upload."""
|
|
166
|
+
return self.send("upload_started", {"size_bytes": size_bytes})
|
|
167
|
+
|
|
168
|
+
def upload_completed(self, duration_seconds: float, size_bytes: int) -> bool:
|
|
169
|
+
"""Record end of result upload with timing."""
|
|
170
|
+
return self.send(
|
|
171
|
+
"upload_completed",
|
|
172
|
+
{
|
|
173
|
+
"duration_seconds": duration_seconds,
|
|
174
|
+
"size_bytes": size_bytes,
|
|
175
|
+
},
|
|
176
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|