hte-cli 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hte_cli-0.2.0 → hte_cli-0.2.2}/PKG-INFO +1 -1
- {hte_cli-0.2.0 → hte_cli-0.2.2}/pyproject.toml +1 -1
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/cli.py +68 -34
- {hte_cli-0.2.0 → hte_cli-0.2.2}/.gitignore +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/README.md +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/__init__.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/__main__.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/api_client.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/config.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/errors.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/events.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/image_utils.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/runner.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/scorers.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/src/hte_cli/version_check.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/__init__.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/__init__.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/automated_runner.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/conftest.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/e2e_test.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/test_benchmark_flows.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/test_eval_logs.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/test_infrastructure.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/test_runtime_imports.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/test_session_lifecycle.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/tests/e2e/verify_docker_deps.py +0 -0
- {hte_cli-0.2.0 → hte_cli-0.2.2}/uv.lock +0 -0
|
@@ -217,8 +217,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
217
217
|
console.print()
|
|
218
218
|
|
|
219
219
|
# Import runner components
|
|
220
|
+
import time
|
|
220
221
|
from hte_cli.events import EventStreamer
|
|
221
|
-
from hte_cli.runner import TaskRunner
|
|
222
|
+
from hte_cli.runner import TaskRunner
|
|
223
|
+
from hte_cli.image_utils import extract_images_from_compose, pull_image_with_progress
|
|
222
224
|
|
|
223
225
|
# Create event streamer
|
|
224
226
|
events = EventStreamer(api, session_id)
|
|
@@ -264,48 +266,62 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
264
266
|
# Build assignment dict for runner compatibility
|
|
265
267
|
assignment = {
|
|
266
268
|
"assignment_id": session_info.get("assignment_id"),
|
|
269
|
+
"session_id": session_id,
|
|
267
270
|
"task_id": session_info["task_id"],
|
|
268
271
|
"benchmark": session_info["benchmark"],
|
|
269
272
|
"mode": session_info["mode"],
|
|
270
273
|
"time_cap_seconds": session_info.get("time_cap_seconds"),
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
"scorer_type": session_info.get("scorer_type"),
|
|
278
|
-
"intermediate_scoring": session_info.get("intermediate_scoring", False),
|
|
274
|
+
"task": {
|
|
275
|
+
"instructions": session_info.get("instructions", ""),
|
|
276
|
+
"metadata": session_info.get("metadata", {}),
|
|
277
|
+
"scorer_type": session_info.get("scorer_type"),
|
|
278
|
+
"intermediate_scoring": session_info.get("intermediate_scoring", False),
|
|
279
|
+
},
|
|
279
280
|
}
|
|
280
281
|
|
|
281
282
|
# Step 3: Run setup (skip if reconnecting without force)
|
|
283
|
+
setup_start_time = time.monotonic()
|
|
284
|
+
images = []
|
|
285
|
+
pulled_images = []
|
|
286
|
+
cached_images = []
|
|
287
|
+
failed_images = []
|
|
288
|
+
|
|
282
289
|
if not is_reconnect or force_setup:
|
|
290
|
+
# Extract images from compose
|
|
291
|
+
if compose_yaml:
|
|
292
|
+
images = extract_images_from_compose(compose_yaml)
|
|
293
|
+
|
|
283
294
|
# Send setup_started event
|
|
284
|
-
events.setup_started(
|
|
295
|
+
events.setup_started(images=images)
|
|
285
296
|
|
|
286
|
-
#
|
|
287
|
-
|
|
288
|
-
|
|
297
|
+
# Pull images if we have any
|
|
298
|
+
if images:
|
|
299
|
+
console.print(f"[bold]Pulling {len(images)} Docker image(s)...[/bold]")
|
|
300
|
+
pull_start = time.monotonic()
|
|
289
301
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
302
|
+
for img in images:
|
|
303
|
+
short_name = img.split("/")[-1][:40]
|
|
304
|
+
with console.status(f"[yellow]↓[/yellow] {short_name}") as status:
|
|
305
|
+
success = pull_image_with_progress(img)
|
|
306
|
+
if success:
|
|
307
|
+
console.print(f" [green]✓[/green] {short_name}")
|
|
308
|
+
pulled_images.append(img)
|
|
309
|
+
else:
|
|
310
|
+
console.print(f" [red]✗[/red] {short_name} (failed)")
|
|
311
|
+
failed_images.append(img)
|
|
312
|
+
|
|
313
|
+
pull_duration = time.monotonic() - pull_start
|
|
314
|
+
events.image_pull_completed(
|
|
315
|
+
duration_seconds=pull_duration,
|
|
316
|
+
pulled=pulled_images,
|
|
317
|
+
cached=cached_images,
|
|
318
|
+
failed=failed_images,
|
|
297
319
|
)
|
|
298
|
-
|
|
299
|
-
# Pull images
|
|
300
|
-
events.image_pull_started({})
|
|
301
|
-
compose_manager.pull_images()
|
|
302
|
-
events.image_pull_completed({})
|
|
303
|
-
|
|
304
|
-
# Start containers
|
|
305
|
-
compose_manager.up()
|
|
320
|
+
console.print()
|
|
306
321
|
|
|
307
322
|
# Send setup_completed - THIS STARTS THE TIMER ON SERVER
|
|
308
|
-
|
|
323
|
+
total_setup = time.monotonic() - setup_start_time
|
|
324
|
+
events.setup_completed(total_seconds=total_setup)
|
|
309
325
|
console.print("[green]Environment ready! Timer started.[/green]")
|
|
310
326
|
console.print()
|
|
311
327
|
else:
|
|
@@ -318,19 +334,37 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
318
334
|
console.print(Panel(session_info["instructions"], title="Task Instructions"))
|
|
319
335
|
console.print()
|
|
320
336
|
|
|
321
|
-
# Step 5: Run the task
|
|
322
|
-
|
|
337
|
+
# Step 5: Run the task using TaskRunner
|
|
338
|
+
console.print("[bold]Starting task environment...[/bold]")
|
|
339
|
+
console.print("[dim]Launching Docker containers...[/dim]")
|
|
340
|
+
console.print()
|
|
341
|
+
|
|
342
|
+
events.docker_started()
|
|
343
|
+
|
|
344
|
+
runner = TaskRunner()
|
|
345
|
+
eval_log_bytes = None
|
|
323
346
|
try:
|
|
324
|
-
result = runner.
|
|
347
|
+
result = runner.run_from_assignment(
|
|
325
348
|
assignment=assignment,
|
|
326
|
-
task=task_data,
|
|
327
349
|
compose_yaml=compose_yaml,
|
|
328
350
|
files_zip=files_zip,
|
|
329
351
|
)
|
|
352
|
+
# Read eval log before cleanup
|
|
353
|
+
if result.eval_log_path and result.eval_log_path.exists():
|
|
354
|
+
eval_log_bytes = result.eval_log_path.read_bytes()
|
|
330
355
|
except KeyboardInterrupt:
|
|
356
|
+
events.docker_stopped(exit_code=130)
|
|
331
357
|
console.print()
|
|
332
358
|
console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
|
|
333
359
|
sys.exit(0)
|
|
360
|
+
except Exception as e:
|
|
361
|
+
events.docker_stopped(exit_code=1)
|
|
362
|
+
console.print(f"[red]Task execution failed: {e}[/red]")
|
|
363
|
+
sys.exit(1)
|
|
364
|
+
finally:
|
|
365
|
+
runner.cleanup()
|
|
366
|
+
|
|
367
|
+
events.docker_stopped(exit_code=0)
|
|
334
368
|
|
|
335
369
|
# Step 6: Upload result
|
|
336
370
|
if result and result.answer:
|
|
@@ -356,7 +390,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
356
390
|
session_id=session_id,
|
|
357
391
|
answer=result.answer or "",
|
|
358
392
|
client_active_seconds=result.time_seconds,
|
|
359
|
-
eval_log_bytes=
|
|
393
|
+
eval_log_bytes=eval_log_bytes,
|
|
360
394
|
score=result.score,
|
|
361
395
|
score_binarized=result.score_binarized,
|
|
362
396
|
agent_id=result.agent_id,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|