hte-cli 0.1.23__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hte_cli/api_client.py +25 -0
- hte_cli/cli.py +395 -30
- hte_cli/events.py +48 -0
- hte_cli/image_utils.py +155 -0
- hte_cli/runner.py +27 -318
- hte_cli/scorers.py +157 -0
- {hte_cli-0.1.23.dist-info → hte_cli-0.2.8.dist-info}/METADATA +5 -1
- hte_cli-0.2.8.dist-info/RECORD +15 -0
- hte_cli-0.1.23.dist-info/RECORD +0 -13
- {hte_cli-0.1.23.dist-info → hte_cli-0.2.8.dist-info}/WHEEL +0 -0
- {hte_cli-0.1.23.dist-info → hte_cli-0.2.8.dist-info}/entry_points.txt +0 -0
hte_cli/api_client.py
CHANGED
|
@@ -201,6 +201,31 @@ class APIClient:
|
|
|
201
201
|
},
|
|
202
202
|
)
|
|
203
203
|
|
|
204
|
+
# =========================================================================
|
|
205
|
+
# Session-based API (new flow: hte-cli session join <session_id>)
|
|
206
|
+
# =========================================================================
|
|
207
|
+
|
|
208
|
+
def join_session(self, session_id: str) -> dict:
|
|
209
|
+
"""Join an existing session created by web UI.
|
|
210
|
+
|
|
211
|
+
Returns session info including task data, benchmark, mode, etc.
|
|
212
|
+
Sets cli_connected_at on the server.
|
|
213
|
+
"""
|
|
214
|
+
return self.post(f"/sessions/{session_id}/join")
|
|
215
|
+
|
|
216
|
+
def get_session_files(self, session_id: str) -> bytes:
|
|
217
|
+
"""Download task files for a session as zip."""
|
|
218
|
+
return self.get_raw(f"/sessions/{session_id}/files")
|
|
219
|
+
|
|
220
|
+
def get_session_compose(self, session_id: str) -> str:
|
|
221
|
+
"""Get compose.yaml content for a session."""
|
|
222
|
+
content = self.get_raw(f"/sessions/{session_id}/compose")
|
|
223
|
+
return content.decode("utf-8")
|
|
224
|
+
|
|
225
|
+
# =========================================================================
|
|
226
|
+
# Result Upload
|
|
227
|
+
# =========================================================================
|
|
228
|
+
|
|
204
229
|
def upload_result(
|
|
205
230
|
self,
|
|
206
231
|
session_id: str,
|
hte_cli/cli.py
CHANGED
|
@@ -13,9 +13,7 @@ import click
|
|
|
13
13
|
from rich.console import Console
|
|
14
14
|
from rich.table import Table
|
|
15
15
|
from rich.panel import Panel
|
|
16
|
-
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
17
|
-
from rich.live import Live
|
|
18
|
-
from rich.text import Text
|
|
16
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
19
17
|
|
|
20
18
|
from hte_cli import __version__, API_BASE_URL
|
|
21
19
|
from hte_cli.config import Config, get_eval_logs_dir
|
|
@@ -149,13 +147,319 @@ def auth_status(ctx):
|
|
|
149
147
|
|
|
150
148
|
|
|
151
149
|
# =============================================================================
|
|
152
|
-
#
|
|
150
|
+
# Session Commands (New flow: session join <session_id>)
|
|
151
|
+
# =============================================================================
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@cli.group()
|
|
155
|
+
def session():
|
|
156
|
+
"""Session management commands."""
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@session.command("join")
|
|
161
|
+
@click.argument("session_id")
|
|
162
|
+
@click.option("--force-setup", is_flag=True, help="Re-run setup even if reconnecting")
|
|
163
|
+
@click.pass_context
|
|
164
|
+
def session_join(ctx, session_id: str, force_setup: bool):
|
|
165
|
+
"""Join an existing session by ID.
|
|
166
|
+
|
|
167
|
+
This is the primary way to start working on a task:
|
|
168
|
+
1. Start the task from the web UI (creates session)
|
|
169
|
+
2. Run this command with the session ID shown in the web UI
|
|
170
|
+
3. The environment will be set up and the timer will start
|
|
171
|
+
"""
|
|
172
|
+
config: Config = ctx.obj["config"]
|
|
173
|
+
|
|
174
|
+
if not config.is_authenticated():
|
|
175
|
+
console.print("[red]Not logged in. Run: hte-cli auth login[/red]")
|
|
176
|
+
sys.exit(1)
|
|
177
|
+
|
|
178
|
+
api = APIClient(config)
|
|
179
|
+
|
|
180
|
+
# Step 1: Join session
|
|
181
|
+
console.print()
|
|
182
|
+
with Progress(
|
|
183
|
+
SpinnerColumn(),
|
|
184
|
+
TextColumn("[progress.description]{task.description}"),
|
|
185
|
+
console=console,
|
|
186
|
+
) as progress:
|
|
187
|
+
progress.add_task("Joining session...", total=None)
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
session_info = api.join_session(session_id)
|
|
191
|
+
except APIError as e:
|
|
192
|
+
if "Invalid session ID format" in str(e):
|
|
193
|
+
console.print(f"[red]{e}[/red]")
|
|
194
|
+
elif e.status_code == 404:
|
|
195
|
+
console.print("[red]Session not found. Check the session ID and try again.[/red]")
|
|
196
|
+
elif e.status_code == 400 and "paused" in str(e).lower():
|
|
197
|
+
console.print("[yellow]Session is paused. Please resume from the web UI first.[/yellow]")
|
|
198
|
+
else:
|
|
199
|
+
console.print(f"[red]Error: {e}[/red]")
|
|
200
|
+
sys.exit(1)
|
|
201
|
+
|
|
202
|
+
# Check if reconnecting (session already in_progress)
|
|
203
|
+
is_reconnect = session_info.get("status") == "in_progress"
|
|
204
|
+
|
|
205
|
+
if is_reconnect and not force_setup:
|
|
206
|
+
console.print("[yellow]Reconnecting to existing session...[/yellow]")
|
|
207
|
+
console.print()
|
|
208
|
+
|
|
209
|
+
console.print(
|
|
210
|
+
Panel(
|
|
211
|
+
f"[bold]Task:[/bold] {session_info['task_id']}\n"
|
|
212
|
+
f"[bold]Benchmark:[/bold] {session_info['benchmark']}\n"
|
|
213
|
+
f"[bold]Mode:[/bold] {session_info['mode']}",
|
|
214
|
+
title="Session Joined",
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
console.print()
|
|
218
|
+
|
|
219
|
+
# Import runner components
|
|
220
|
+
import time
|
|
221
|
+
from hte_cli.events import EventStreamer
|
|
222
|
+
from hte_cli.runner import TaskRunner
|
|
223
|
+
from hte_cli.image_utils import extract_images_from_compose, pull_image_with_progress
|
|
224
|
+
|
|
225
|
+
# Create event streamer
|
|
226
|
+
events = EventStreamer(api, session_id)
|
|
227
|
+
|
|
228
|
+
# Step 2: Download task files and compose (skip if reconnecting without force)
|
|
229
|
+
files_zip = None
|
|
230
|
+
compose_yaml = None
|
|
231
|
+
|
|
232
|
+
if not is_reconnect or force_setup:
|
|
233
|
+
console.print("[bold]Step 1:[/bold] Downloading task resources...")
|
|
234
|
+
|
|
235
|
+
with console.status("[dim]Fetching task files...[/dim]"):
|
|
236
|
+
try:
|
|
237
|
+
files_zip = api.get_session_files(session_id)
|
|
238
|
+
console.print(" [green]✓[/green] Task files downloaded")
|
|
239
|
+
except APIError as e:
|
|
240
|
+
console.print(f" [dim]○ No task files (optional)[/dim]")
|
|
241
|
+
files_zip = None
|
|
242
|
+
|
|
243
|
+
with console.status("[dim]Fetching compose configuration...[/dim]"):
|
|
244
|
+
try:
|
|
245
|
+
compose_yaml = api.get_session_compose(session_id)
|
|
246
|
+
console.print(" [green]✓[/green] Docker compose downloaded")
|
|
247
|
+
except APIError as e:
|
|
248
|
+
console.print(f" [dim]○ No compose file (optional)[/dim]")
|
|
249
|
+
compose_yaml = None
|
|
250
|
+
|
|
251
|
+
console.print()
|
|
252
|
+
|
|
253
|
+
# Validate compose for sandbox-required benchmarks
|
|
254
|
+
benchmark = session_info.get("benchmark", "").lower()
|
|
255
|
+
SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
|
|
256
|
+
if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
|
|
257
|
+
console.print(
|
|
258
|
+
f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
|
|
259
|
+
)
|
|
260
|
+
console.print()
|
|
261
|
+
console.print(
|
|
262
|
+
f"Please contact support: {SUPPORT_EMAIL}"
|
|
263
|
+
)
|
|
264
|
+
sys.exit(1)
|
|
265
|
+
|
|
266
|
+
# Build assignment dict for runner compatibility
|
|
267
|
+
assignment = {
|
|
268
|
+
"assignment_id": session_info.get("assignment_id"),
|
|
269
|
+
"session_id": session_id,
|
|
270
|
+
"task_id": session_info["task_id"],
|
|
271
|
+
"benchmark": session_info["benchmark"],
|
|
272
|
+
"mode": session_info["mode"],
|
|
273
|
+
"time_cap_seconds": session_info.get("time_cap_seconds"),
|
|
274
|
+
"task": {
|
|
275
|
+
"instructions": session_info.get("instructions", ""),
|
|
276
|
+
"metadata": session_info.get("metadata", {}),
|
|
277
|
+
"scorer_type": session_info.get("scorer_type"),
|
|
278
|
+
"intermediate_scoring": session_info.get("intermediate_scoring", False),
|
|
279
|
+
},
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
# Step 3: Run setup (skip if reconnecting without force)
|
|
283
|
+
setup_start_time = time.monotonic()
|
|
284
|
+
images = []
|
|
285
|
+
pulled_images = []
|
|
286
|
+
cached_images = []
|
|
287
|
+
failed_images = []
|
|
288
|
+
|
|
289
|
+
if not is_reconnect or force_setup:
|
|
290
|
+
# Extract images from compose
|
|
291
|
+
if compose_yaml:
|
|
292
|
+
images = extract_images_from_compose(compose_yaml)
|
|
293
|
+
|
|
294
|
+
# Send setup_started event
|
|
295
|
+
events.setup_started(images=images)
|
|
296
|
+
|
|
297
|
+
# Pull images if we have any
|
|
298
|
+
if images:
|
|
299
|
+
from hte_cli.image_utils import check_image_exists_locally
|
|
300
|
+
|
|
301
|
+
console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
|
|
302
|
+
pull_start = time.monotonic()
|
|
303
|
+
|
|
304
|
+
for img in images:
|
|
305
|
+
short_name = img.split("/")[-1][:40]
|
|
306
|
+
|
|
307
|
+
# Check if already cached
|
|
308
|
+
if check_image_exists_locally(img):
|
|
309
|
+
console.print(f" [green]✓[/green] {short_name} [dim](cached)[/dim]")
|
|
310
|
+
cached_images.append(img)
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
# Need to pull - show progress
|
|
314
|
+
last_status = ["connecting..."] # Use list for closure mutability
|
|
315
|
+
with console.status(f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]") as status:
|
|
316
|
+
def show_progress(image: str, line: str):
|
|
317
|
+
# Parse docker pull output for layer progress
|
|
318
|
+
# Lines look like: "abc123: Downloading [====> ] 10MB/50MB"
|
|
319
|
+
# Or: "abc123: Extracting [====> ] 10MB/50MB"
|
|
320
|
+
# Or: "abc123: Pull complete", "Digest: sha256:...", "Status: ..."
|
|
321
|
+
display_text = None
|
|
322
|
+
if ": " in line:
|
|
323
|
+
parts = line.split(": ", 1)
|
|
324
|
+
if len(parts) == 2:
|
|
325
|
+
layer_id = parts[0][-8:] # Last 8 chars of layer ID
|
|
326
|
+
layer_status = parts[1] # Don't truncate - keep full progress
|
|
327
|
+
# Show progress bars for Downloading/Extracting with MB info
|
|
328
|
+
if "Downloading" in layer_status or "Extracting" in layer_status:
|
|
329
|
+
# Keep progress: "[====> ] 10.5MB/50MB"
|
|
330
|
+
display_text = f"{layer_id}: {layer_status[:50]}"
|
|
331
|
+
elif "Pull complete" in layer_status:
|
|
332
|
+
display_text = f"{layer_id}: done"
|
|
333
|
+
elif "Download complete" in layer_status:
|
|
334
|
+
display_text = f"{layer_id}: download done"
|
|
335
|
+
elif "Already exists" in layer_status:
|
|
336
|
+
display_text = f"{layer_id}: cached"
|
|
337
|
+
elif "Waiting" in layer_status:
|
|
338
|
+
display_text = f"{layer_id}: waiting"
|
|
339
|
+
elif "Verifying" in layer_status:
|
|
340
|
+
display_text = f"{layer_id}: verifying"
|
|
341
|
+
else:
|
|
342
|
+
display_text = line[:55]
|
|
343
|
+
elif line.strip():
|
|
344
|
+
display_text = line[:55]
|
|
345
|
+
|
|
346
|
+
if display_text and display_text != last_status[0]:
|
|
347
|
+
last_status[0] = display_text
|
|
348
|
+
status.update(f"[yellow]↓[/yellow] {short_name} [dim]{display_text}[/dim]")
|
|
349
|
+
|
|
350
|
+
success = pull_image_with_progress(img, on_progress=show_progress)
|
|
351
|
+
|
|
352
|
+
if success:
|
|
353
|
+
console.print(f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]")
|
|
354
|
+
pulled_images.append(img)
|
|
355
|
+
else:
|
|
356
|
+
console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
|
|
357
|
+
failed_images.append(img)
|
|
358
|
+
|
|
359
|
+
pull_duration = time.monotonic() - pull_start
|
|
360
|
+
events.image_pull_completed(
|
|
361
|
+
duration_seconds=pull_duration,
|
|
362
|
+
pulled=pulled_images,
|
|
363
|
+
cached=cached_images,
|
|
364
|
+
failed=failed_images,
|
|
365
|
+
)
|
|
366
|
+
console.print()
|
|
367
|
+
|
|
368
|
+
# Send setup_completed - THIS STARTS THE TIMER ON SERVER
|
|
369
|
+
total_setup = time.monotonic() - setup_start_time
|
|
370
|
+
events.setup_completed(total_seconds=total_setup)
|
|
371
|
+
console.print("[green]Environment ready! Timer started.[/green]")
|
|
372
|
+
console.print()
|
|
373
|
+
else:
|
|
374
|
+
# Reconnecting - compose should already be running
|
|
375
|
+
console.print("[dim]Skipping setup (use --force-setup to re-run)[/dim]")
|
|
376
|
+
console.print()
|
|
377
|
+
|
|
378
|
+
# Step 4: Show instructions
|
|
379
|
+
if session_info.get("instructions"):
|
|
380
|
+
console.print(Panel(session_info["instructions"], title="Task Instructions"))
|
|
381
|
+
console.print()
|
|
382
|
+
|
|
383
|
+
# Step 3: Run the task using TaskRunner
|
|
384
|
+
step_num = "3" if (not is_reconnect or force_setup) and images else "2" if (not is_reconnect or force_setup) else "1"
|
|
385
|
+
console.print(f"[bold]Step {step_num}:[/bold] Starting task environment...")
|
|
386
|
+
|
|
387
|
+
events.docker_started()
|
|
388
|
+
|
|
389
|
+
runner = TaskRunner()
|
|
390
|
+
eval_log_bytes = None
|
|
391
|
+
try:
|
|
392
|
+
with console.status("[dim]Launching Docker containers (this may take a minute)...[/dim]"):
|
|
393
|
+
result = runner.run_from_assignment(
|
|
394
|
+
assignment=assignment,
|
|
395
|
+
compose_yaml=compose_yaml,
|
|
396
|
+
files_zip=files_zip,
|
|
397
|
+
)
|
|
398
|
+
# Read eval log before cleanup
|
|
399
|
+
if result.eval_log_path and result.eval_log_path.exists():
|
|
400
|
+
eval_log_bytes = result.eval_log_path.read_bytes()
|
|
401
|
+
except KeyboardInterrupt:
|
|
402
|
+
events.docker_stopped(exit_code=130)
|
|
403
|
+
console.print()
|
|
404
|
+
console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
|
|
405
|
+
sys.exit(0)
|
|
406
|
+
except Exception as e:
|
|
407
|
+
events.docker_stopped(exit_code=1)
|
|
408
|
+
console.print(f"[red]Task execution failed: {e}[/red]")
|
|
409
|
+
sys.exit(1)
|
|
410
|
+
finally:
|
|
411
|
+
runner.cleanup()
|
|
412
|
+
|
|
413
|
+
events.docker_stopped(exit_code=0)
|
|
414
|
+
|
|
415
|
+
# Step 6: Upload result
|
|
416
|
+
if result and result.answer:
|
|
417
|
+
events.session_completed(
|
|
418
|
+
elapsed_seconds=result.time_seconds,
|
|
419
|
+
answer=result.answer,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
console.print()
|
|
423
|
+
console.print("[green]Task completed![/green]")
|
|
424
|
+
console.print(f"Answer: {result.answer}")
|
|
425
|
+
console.print(f"Time: {result.time_seconds:.1f}s")
|
|
426
|
+
|
|
427
|
+
# Upload to server
|
|
428
|
+
with Progress(
|
|
429
|
+
SpinnerColumn(),
|
|
430
|
+
TextColumn("[progress.description]{task.description}"),
|
|
431
|
+
console=console,
|
|
432
|
+
) as progress:
|
|
433
|
+
progress.add_task("Uploading result...", total=None)
|
|
434
|
+
try:
|
|
435
|
+
upload_result = api.upload_result(
|
|
436
|
+
session_id=session_id,
|
|
437
|
+
answer=result.answer or "",
|
|
438
|
+
client_active_seconds=result.time_seconds,
|
|
439
|
+
eval_log_bytes=eval_log_bytes,
|
|
440
|
+
score=result.score,
|
|
441
|
+
score_binarized=result.score_binarized,
|
|
442
|
+
agent_id=result.agent_id,
|
|
443
|
+
)
|
|
444
|
+
except APIError as e:
|
|
445
|
+
console.print(f"[red]Failed to upload result: {e}[/red]")
|
|
446
|
+
sys.exit(1)
|
|
447
|
+
|
|
448
|
+
if upload_result.get("score") is not None:
|
|
449
|
+
console.print(f"Score: {upload_result['score']}")
|
|
450
|
+
|
|
451
|
+
console.print()
|
|
452
|
+
console.print("[green]Done! Return to the web UI to see your results.[/green]")
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
# =============================================================================
|
|
456
|
+
# Tasks Commands (DEPRECATED - use 'session join' instead)
|
|
153
457
|
# =============================================================================
|
|
154
458
|
|
|
155
459
|
|
|
156
460
|
@cli.group()
|
|
157
461
|
def tasks():
|
|
158
|
-
"""Task commands."""
|
|
462
|
+
"""Task commands (deprecated - use 'session join' instead)."""
|
|
159
463
|
pass
|
|
160
464
|
|
|
161
465
|
|
|
@@ -225,7 +529,23 @@ def tasks_list(ctx):
|
|
|
225
529
|
@click.argument("task_id", required=False)
|
|
226
530
|
@click.pass_context
|
|
227
531
|
def tasks_run(ctx, task_id: str | None):
|
|
228
|
-
"""Run a task
|
|
532
|
+
"""[DEPRECATED] Run a task - use 'session join' instead."""
|
|
533
|
+
console.print()
|
|
534
|
+
console.print("[red]This command is deprecated.[/red]")
|
|
535
|
+
console.print()
|
|
536
|
+
console.print("The new workflow is:")
|
|
537
|
+
console.print(" 1. Start the task from the web UI: https://cyber-task-horizons.com")
|
|
538
|
+
console.print(" 2. Run the command shown: [bold]hte-cli session join <session_id>[/bold]")
|
|
539
|
+
console.print()
|
|
540
|
+
console.print("This ensures accurate timing by starting the timer only when")
|
|
541
|
+
console.print("the environment is ready, not including Docker setup time.")
|
|
542
|
+
console.print()
|
|
543
|
+
sys.exit(1)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
# Keep the old implementation as _tasks_run_legacy for testing if needed
|
|
547
|
+
def _tasks_run_legacy(ctx, task_id: str | None):
|
|
548
|
+
"""Legacy implementation of tasks run (for testing only)."""
|
|
229
549
|
config: Config = ctx.obj["config"]
|
|
230
550
|
|
|
231
551
|
if not config.is_authenticated():
|
|
@@ -397,12 +717,18 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
397
717
|
return
|
|
398
718
|
|
|
399
719
|
# Step 5: Pre-pull Docker images with progress
|
|
400
|
-
from hte_cli.
|
|
720
|
+
from hte_cli.image_utils import extract_images_from_compose
|
|
401
721
|
import re
|
|
722
|
+
import time
|
|
723
|
+
|
|
724
|
+
setup_start_time = time.monotonic()
|
|
725
|
+
images: list[str] = []
|
|
726
|
+
results: list[tuple[str, bool, str]] = []
|
|
402
727
|
|
|
403
728
|
if compose_yaml:
|
|
404
729
|
images = extract_images_from_compose(compose_yaml)
|
|
405
730
|
if images:
|
|
731
|
+
events.setup_started(images)
|
|
406
732
|
console.print()
|
|
407
733
|
console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
|
|
408
734
|
|
|
@@ -448,34 +774,48 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
448
774
|
def get_progress_summary(image: str) -> str:
|
|
449
775
|
"""Get a human-readable progress summary for an image with MB counts."""
|
|
450
776
|
if image not in image_layers or not image_layers[image]:
|
|
451
|
-
return "
|
|
777
|
+
return "connecting..."
|
|
452
778
|
|
|
453
779
|
layers = image_layers[image]
|
|
454
780
|
total_layers = len(layers)
|
|
455
|
-
complete = sum(1 for s, _, _ in layers.values() if "complete" in s.lower())
|
|
456
781
|
|
|
457
|
-
#
|
|
782
|
+
# Count layers in different states
|
|
783
|
+
complete = 0
|
|
784
|
+
downloading = 0
|
|
785
|
+
waiting = 0
|
|
458
786
|
total_downloaded_mb = 0
|
|
459
787
|
total_size_mb = 0
|
|
788
|
+
|
|
460
789
|
for status, downloaded, total in layers.values():
|
|
461
|
-
|
|
462
|
-
|
|
790
|
+
status_lower = status.lower()
|
|
791
|
+
if "complete" in status_lower:
|
|
792
|
+
complete += 1
|
|
463
793
|
total_downloaded_mb += total
|
|
464
794
|
total_size_mb += total
|
|
465
|
-
elif
|
|
795
|
+
elif "downloading" in status_lower:
|
|
796
|
+
downloading += 1
|
|
466
797
|
total_downloaded_mb += downloaded
|
|
467
798
|
total_size_mb += total
|
|
799
|
+
elif "waiting" in status_lower:
|
|
800
|
+
waiting += 1
|
|
468
801
|
|
|
802
|
+
# Choose the most informative display
|
|
469
803
|
if complete == total_layers and total_layers > 0:
|
|
470
804
|
if total_size_mb > 0:
|
|
471
|
-
return f"
|
|
472
|
-
return f"
|
|
805
|
+
return f"done ({total_size_mb:.0f}MB)"
|
|
806
|
+
return f"done ({total_layers} layers)"
|
|
473
807
|
elif total_size_mb > 0:
|
|
474
|
-
|
|
808
|
+
# Show MB progress when available
|
|
809
|
+
pct = int(100 * total_downloaded_mb / total_size_mb) if total_size_mb > 0 else 0
|
|
810
|
+
return f"{total_downloaded_mb:.0f}/{total_size_mb:.0f}MB ({pct}%)"
|
|
811
|
+
elif downloading > 0:
|
|
812
|
+
return f"downloading ({complete}/{total_layers} done)"
|
|
475
813
|
elif complete > 0:
|
|
476
|
-
return f"
|
|
814
|
+
return f"extracting ({complete}/{total_layers} done)"
|
|
815
|
+
elif waiting > 0:
|
|
816
|
+
return f"queued ({total_layers} layers)"
|
|
477
817
|
else:
|
|
478
|
-
return f"
|
|
818
|
+
return f"preparing ({total_layers} layers)"
|
|
479
819
|
|
|
480
820
|
def on_image_progress(image: str, line: str):
|
|
481
821
|
"""Track layer-level progress with size info."""
|
|
@@ -492,7 +832,7 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
492
832
|
short_name = img.split("/")[-1] if "/" in img else img
|
|
493
833
|
|
|
494
834
|
# Check if cached first
|
|
495
|
-
from hte_cli.
|
|
835
|
+
from hte_cli.image_utils import check_image_exists_locally, pull_image_with_progress
|
|
496
836
|
|
|
497
837
|
if check_image_exists_locally(img):
|
|
498
838
|
console.print(f" [green]✓[/green] {short_name} [dim](cached)[/dim]")
|
|
@@ -502,7 +842,10 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
502
842
|
# Need to pull - use Rich Status for live updates
|
|
503
843
|
image_layers[img] = {}
|
|
504
844
|
|
|
505
|
-
with console.status(
|
|
845
|
+
with console.status(
|
|
846
|
+
f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
|
|
847
|
+
) as status:
|
|
848
|
+
|
|
506
849
|
def show_progress(image: str, line: str):
|
|
507
850
|
on_image_progress(image, line)
|
|
508
851
|
summary = get_progress_summary(image)
|
|
@@ -518,14 +861,27 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
518
861
|
console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
|
|
519
862
|
results.append((img, False, "failed"))
|
|
520
863
|
|
|
521
|
-
|
|
522
|
-
if
|
|
864
|
+
failed_count = sum(1 for _, ok, _ in results if not ok)
|
|
865
|
+
if failed_count > 0:
|
|
523
866
|
console.print(
|
|
524
|
-
f"[yellow]Warning: {
|
|
867
|
+
f"[yellow]Warning: {failed_count} image(s) failed to pull. "
|
|
525
868
|
"Task may fail to start.[/yellow]"
|
|
526
869
|
)
|
|
527
870
|
console.print()
|
|
528
871
|
|
|
872
|
+
# Record image pull timing
|
|
873
|
+
if images:
|
|
874
|
+
pull_duration = time.monotonic() - setup_start_time
|
|
875
|
+
pulled = [img for img, ok, status in results if ok and status == "pulled"]
|
|
876
|
+
cached = [img for img, ok, status in results if ok and status == "cached"]
|
|
877
|
+
failed = [img for img, ok, status in results if not ok]
|
|
878
|
+
events.image_pull_completed(
|
|
879
|
+
duration_seconds=pull_duration,
|
|
880
|
+
pulled=pulled,
|
|
881
|
+
cached=cached,
|
|
882
|
+
failed=failed,
|
|
883
|
+
)
|
|
884
|
+
|
|
529
885
|
# Step 6: Run Inspect's human_cli
|
|
530
886
|
runner = TaskRunner()
|
|
531
887
|
console.print("[bold]Starting task environment...[/bold]")
|
|
@@ -534,6 +890,10 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
534
890
|
|
|
535
891
|
events.docker_started()
|
|
536
892
|
|
|
893
|
+
# Record total setup time (image pulls + compose up)
|
|
894
|
+
total_setup = time.monotonic() - setup_start_time
|
|
895
|
+
events.setup_completed(total_seconds=total_setup)
|
|
896
|
+
|
|
537
897
|
eval_log_bytes = None
|
|
538
898
|
local_eval_path = None
|
|
539
899
|
try:
|
|
@@ -597,13 +957,14 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
597
957
|
except Exception:
|
|
598
958
|
pass # Not a CyberGym task or malformed zip
|
|
599
959
|
|
|
600
|
-
# Show upload size info
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
960
|
+
# Show upload size info and track timing
|
|
961
|
+
upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
|
|
962
|
+
upload_size_kb = upload_size_bytes / 1024
|
|
963
|
+
if upload_size_kb / 1024 > 50:
|
|
964
|
+
console.print(f"[yellow]Warning: Large eval log ({upload_size_kb / 1024:.1f} MB)[/yellow]")
|
|
965
|
+
|
|
966
|
+
events.upload_started(size_bytes=upload_size_bytes)
|
|
967
|
+
upload_start_time = time.monotonic()
|
|
607
968
|
|
|
608
969
|
with Progress(
|
|
609
970
|
SpinnerColumn(),
|
|
@@ -630,6 +991,10 @@ def tasks_run(ctx, task_id: str | None):
|
|
|
630
991
|
console.print("[yellow]Your result was saved locally but not uploaded.[/yellow]")
|
|
631
992
|
sys.exit(1)
|
|
632
993
|
|
|
994
|
+
# Record upload completion
|
|
995
|
+
upload_duration = time.monotonic() - upload_start_time
|
|
996
|
+
events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
|
|
997
|
+
|
|
633
998
|
console.print()
|
|
634
999
|
console.print("[green]Result uploaded successfully![/green]")
|
|
635
1000
|
|
hte_cli/events.py
CHANGED
|
@@ -24,6 +24,12 @@ class EventStreamer:
|
|
|
24
24
|
"docker_started",
|
|
25
25
|
"docker_stopped",
|
|
26
26
|
"session_completed",
|
|
27
|
+
# Overhead tracking events
|
|
28
|
+
"setup_started",
|
|
29
|
+
"image_pull_completed",
|
|
30
|
+
"setup_completed",
|
|
31
|
+
"upload_started",
|
|
32
|
+
"upload_completed",
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
def __init__(self, api: APIClient, session_id: str):
|
|
@@ -126,3 +132,45 @@ class EventStreamer:
|
|
|
126
132
|
if answer is not None:
|
|
127
133
|
data["answer_submitted"] = True
|
|
128
134
|
return self.send("session_completed", data or None)
|
|
135
|
+
|
|
136
|
+
# Overhead tracking events
|
|
137
|
+
|
|
138
|
+
def setup_started(self, images: list[str]) -> bool:
|
|
139
|
+
"""Record start of setup phase (before image pulls)."""
|
|
140
|
+
return self.send("setup_started", {"images": images})
|
|
141
|
+
|
|
142
|
+
def image_pull_completed(
|
|
143
|
+
self,
|
|
144
|
+
duration_seconds: float,
|
|
145
|
+
pulled: list[str],
|
|
146
|
+
cached: list[str],
|
|
147
|
+
failed: list[str],
|
|
148
|
+
) -> bool:
|
|
149
|
+
"""Record image pull results with timing."""
|
|
150
|
+
return self.send(
|
|
151
|
+
"image_pull_completed",
|
|
152
|
+
{
|
|
153
|
+
"duration_seconds": duration_seconds,
|
|
154
|
+
"pulled": pulled,
|
|
155
|
+
"cached": cached,
|
|
156
|
+
"failed": failed,
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
def setup_completed(self, total_seconds: float) -> bool:
|
|
161
|
+
"""Record end of setup phase (environment ready for work)."""
|
|
162
|
+
return self.send("setup_completed", {"total_seconds": total_seconds})
|
|
163
|
+
|
|
164
|
+
def upload_started(self, size_bytes: int) -> bool:
|
|
165
|
+
"""Record start of result upload."""
|
|
166
|
+
return self.send("upload_started", {"size_bytes": size_bytes})
|
|
167
|
+
|
|
168
|
+
def upload_completed(self, duration_seconds: float, size_bytes: int) -> bool:
|
|
169
|
+
"""Record end of result upload with timing."""
|
|
170
|
+
return self.send(
|
|
171
|
+
"upload_completed",
|
|
172
|
+
{
|
|
173
|
+
"duration_seconds": duration_seconds,
|
|
174
|
+
"size_bytes": size_bytes,
|
|
175
|
+
},
|
|
176
|
+
)
|