hte-cli 0.2.3__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hte_cli-0.2.3 → hte_cli-0.2.5}/PKG-INFO +1 -1
- {hte_cli-0.2.3 → hte_cli-0.2.5}/pyproject.toml +1 -1
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/cli.py +24 -20
- {hte_cli-0.2.3 → hte_cli-0.2.5}/.gitignore +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/README.md +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/__init__.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/__main__.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/api_client.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/config.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/errors.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/events.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/image_utils.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/runner.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/scorers.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/src/hte_cli/version_check.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/__init__.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/__init__.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/automated_runner.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/conftest.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/e2e_test.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/test_benchmark_flows.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/test_eval_logs.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/test_infrastructure.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/test_runtime_imports.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/test_session_lifecycle.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/tests/e2e/verify_docker_deps.py +0 -0
- {hte_cli-0.2.3 → hte_cli-0.2.5}/uv.lock +0 -0
|
@@ -230,26 +230,26 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
230
230
|
compose_yaml = None
|
|
231
231
|
|
|
232
232
|
if not is_reconnect or force_setup:
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
console=console,
|
|
237
|
-
) as progress:
|
|
238
|
-
task_id_display = progress.add_task("Downloading task files...", total=None)
|
|
233
|
+
console.print("[bold]Step 1:[/bold] Downloading task resources...")
|
|
234
|
+
|
|
235
|
+
with console.status("[dim]Fetching task files...[/dim]"):
|
|
239
236
|
try:
|
|
240
237
|
files_zip = api.get_session_files(session_id)
|
|
238
|
+
console.print(" [green]✓[/green] Task files downloaded")
|
|
241
239
|
except APIError as e:
|
|
242
|
-
|
|
243
|
-
console.print(f"[dim]Note: {e}[/dim]")
|
|
240
|
+
console.print(f" [dim]○ No task files (optional)[/dim]")
|
|
244
241
|
files_zip = None
|
|
245
242
|
|
|
246
|
-
|
|
243
|
+
with console.status("[dim]Fetching compose configuration...[/dim]"):
|
|
247
244
|
try:
|
|
248
245
|
compose_yaml = api.get_session_compose(session_id)
|
|
246
|
+
console.print(" [green]✓[/green] Docker compose downloaded")
|
|
249
247
|
except APIError as e:
|
|
250
|
-
|
|
248
|
+
console.print(f" [dim]○ No compose file (optional)[/dim]")
|
|
251
249
|
compose_yaml = None
|
|
252
250
|
|
|
251
|
+
console.print()
|
|
252
|
+
|
|
253
253
|
# Validate compose for sandbox-required benchmarks
|
|
254
254
|
benchmark = session_info.get("benchmark", "").lower()
|
|
255
255
|
SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
|
|
@@ -298,7 +298,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
298
298
|
if images:
|
|
299
299
|
from hte_cli.image_utils import check_image_exists_locally
|
|
300
300
|
|
|
301
|
-
console.print(f"[bold]Pulling {len(images)} Docker image(s)...
|
|
301
|
+
console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
|
|
302
302
|
pull_start = time.monotonic()
|
|
303
303
|
|
|
304
304
|
for img in images:
|
|
@@ -315,11 +315,15 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
315
315
|
def show_progress(image: str, line: str):
|
|
316
316
|
# Parse docker pull output for layer progress
|
|
317
317
|
# Lines look like: "abc123: Downloading [====> ] 10MB/50MB"
|
|
318
|
+
# Or: "Digest: sha256:..." or "Status: Downloaded newer image"
|
|
318
319
|
if ": " in line:
|
|
319
320
|
parts = line.split(": ", 1)
|
|
320
321
|
if len(parts) == 2:
|
|
321
322
|
layer_status = parts[1][:50] # Truncate
|
|
322
323
|
status.update(f"[yellow]↓[/yellow] {short_name} [dim]{layer_status}[/dim]")
|
|
324
|
+
elif line.strip():
|
|
325
|
+
# Show any other non-empty output
|
|
326
|
+
status.update(f"[yellow]↓[/yellow] {short_name} [dim]{line[:50]}[/dim]")
|
|
323
327
|
|
|
324
328
|
success = pull_image_with_progress(img, on_progress=show_progress)
|
|
325
329
|
|
|
@@ -354,21 +358,21 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
354
358
|
console.print(Panel(session_info["instructions"], title="Task Instructions"))
|
|
355
359
|
console.print()
|
|
356
360
|
|
|
357
|
-
# Step
|
|
358
|
-
|
|
359
|
-
console.print("[
|
|
360
|
-
console.print()
|
|
361
|
+
# Step 3: Run the task using TaskRunner
|
|
362
|
+
step_num = "3" if (not is_reconnect or force_setup) and images else "2" if (not is_reconnect or force_setup) else "1"
|
|
363
|
+
console.print(f"[bold]Step {step_num}:[/bold] Starting task environment...")
|
|
361
364
|
|
|
362
365
|
events.docker_started()
|
|
363
366
|
|
|
364
367
|
runner = TaskRunner()
|
|
365
368
|
eval_log_bytes = None
|
|
366
369
|
try:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
370
|
+
with console.status("[dim]Launching Docker containers (this may take a minute)...[/dim]"):
|
|
371
|
+
result = runner.run_from_assignment(
|
|
372
|
+
assignment=assignment,
|
|
373
|
+
compose_yaml=compose_yaml,
|
|
374
|
+
files_zip=files_zip,
|
|
375
|
+
)
|
|
372
376
|
# Read eval log before cleanup
|
|
373
377
|
if result.eval_log_path and result.eval_log_path.exists():
|
|
374
378
|
eval_log_bytes = result.eval_log_path.read_bytes()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|