hte-cli 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {hte_cli-0.2.0 → hte_cli-0.2.1}/PKG-INFO +1 -1
  2. {hte_cli-0.2.0 → hte_cli-0.2.1}/pyproject.toml +1 -1
  3. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/cli.py +51 -34
  4. {hte_cli-0.2.0 → hte_cli-0.2.1}/.gitignore +0 -0
  5. {hte_cli-0.2.0 → hte_cli-0.2.1}/README.md +0 -0
  6. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/__init__.py +0 -0
  7. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/__main__.py +0 -0
  8. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/api_client.py +0 -0
  9. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/config.py +0 -0
  10. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/errors.py +0 -0
  11. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/events.py +0 -0
  12. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/image_utils.py +0 -0
  13. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/runner.py +0 -0
  14. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/scorers.py +0 -0
  15. {hte_cli-0.2.0 → hte_cli-0.2.1}/src/hte_cli/version_check.py +0 -0
  16. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/__init__.py +0 -0
  17. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/__init__.py +0 -0
  18. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/automated_runner.py +0 -0
  19. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/conftest.py +0 -0
  20. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/e2e_test.py +0 -0
  21. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/test_benchmark_flows.py +0 -0
  22. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/test_eval_logs.py +0 -0
  23. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/test_infrastructure.py +0 -0
  24. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/test_runtime_imports.py +0 -0
  25. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/test_session_lifecycle.py +0 -0
  26. {hte_cli-0.2.0 → hte_cli-0.2.1}/tests/e2e/verify_docker_deps.py +0 -0
  27. {hte_cli-0.2.0 → hte_cli-0.2.1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hte-cli"
3
- version = "0.2.0"
3
+ version = "0.2.1"
4
4
  description = "Human Time-to-Completion Evaluation CLI"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -217,8 +217,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
217
217
  console.print()
218
218
 
219
219
  # Import runner components
220
+ import time
220
221
  from hte_cli.events import EventStreamer
221
- from hte_cli.runner import TaskRunner, DockerComposeManager
222
+ from hte_cli.runner import TaskRunner
223
+ from hte_cli.image_utils import extract_images_from_compose, pull_image_with_progress
222
224
 
223
225
  # Create event streamer
224
226
  events = EventStreamer(api, session_id)
@@ -264,48 +266,45 @@ def session_join(ctx, session_id: str, force_setup: bool):
264
266
  # Build assignment dict for runner compatibility
265
267
  assignment = {
266
268
  "assignment_id": session_info.get("assignment_id"),
269
+ "session_id": session_id,
267
270
  "task_id": session_info["task_id"],
268
271
  "benchmark": session_info["benchmark"],
269
272
  "mode": session_info["mode"],
270
273
  "time_cap_seconds": session_info.get("time_cap_seconds"),
271
- }
272
-
273
- # Build task dict for runner
274
- task_data = {
275
- "instructions": session_info.get("instructions", ""),
276
- "metadata": session_info.get("metadata", {}),
277
- "scorer_type": session_info.get("scorer_type"),
278
- "intermediate_scoring": session_info.get("intermediate_scoring", False),
274
+ "task": {
275
+ "instructions": session_info.get("instructions", ""),
276
+ "metadata": session_info.get("metadata", {}),
277
+ "scorer_type": session_info.get("scorer_type"),
278
+ "intermediate_scoring": session_info.get("intermediate_scoring", False),
279
+ },
279
280
  }
280
281
 
281
282
  # Step 3: Run setup (skip if reconnecting without force)
283
+ setup_start_time = time.monotonic()
282
284
  if not is_reconnect or force_setup:
283
285
  # Send setup_started event
284
286
  events.setup_started({"cli_version": __version__, "task_id": assignment["task_id"]})
285
287
 
286
- # Extract files and run compose
287
- runner = TaskRunner(session_id, api, events, console)
288
- compose_manager = None
289
-
288
+ # Pull images if we have compose
290
289
  if compose_yaml:
291
- compose_manager = DockerComposeManager(
292
- compose_yaml=compose_yaml,
293
- files_zip=files_zip,
294
- session_id=session_id,
295
- task_id=assignment["task_id"],
296
- console=console,
297
- )
298
-
299
- # Pull images
300
- events.image_pull_started({})
301
- compose_manager.pull_images()
302
- events.image_pull_completed({})
303
-
304
- # Start containers
305
- compose_manager.up()
290
+ images = extract_images_from_compose(compose_yaml)
291
+ if images:
292
+ console.print(f"[bold]Pulling {len(images)} Docker image(s)...[/bold]")
293
+ events.image_pull_started({})
294
+ for img in images:
295
+ short_name = img.split("/")[-1][:40]
296
+ with console.status(f"[yellow]↓[/yellow] {short_name}") as status:
297
+ success = pull_image_with_progress(img)
298
+ if success:
299
+ console.print(f" [green]✓[/green] {short_name}")
300
+ else:
301
+ console.print(f" [red]✗[/red] {short_name} (failed)")
302
+ events.image_pull_completed({})
303
+ console.print()
306
304
 
307
305
  # Send setup_completed - THIS STARTS THE TIMER ON SERVER
308
- events.setup_completed({"cli_version": __version__})
306
+ total_setup = time.monotonic() - setup_start_time
307
+ events.setup_completed(total_seconds=total_setup)
309
308
  console.print("[green]Environment ready! Timer started.[/green]")
310
309
  console.print()
311
310
  else:
@@ -318,19 +317,37 @@ def session_join(ctx, session_id: str, force_setup: bool):
318
317
  console.print(Panel(session_info["instructions"], title="Task Instructions"))
319
318
  console.print()
320
319
 
321
- # Step 5: Run the task interaction loop
322
- runner = TaskRunner(session_id, api, events, console)
320
+ # Step 5: Run the task using TaskRunner
321
+ console.print("[bold]Starting task environment...[/bold]")
322
+ console.print("[dim]Launching Docker containers...[/dim]")
323
+ console.print()
324
+
325
+ events.docker_started()
326
+
327
+ runner = TaskRunner()
328
+ eval_log_bytes = None
323
329
  try:
324
- result = runner.run(
330
+ result = runner.run_from_assignment(
325
331
  assignment=assignment,
326
- task=task_data,
327
332
  compose_yaml=compose_yaml,
328
333
  files_zip=files_zip,
329
334
  )
335
+ # Read eval log before cleanup
336
+ if result.eval_log_path and result.eval_log_path.exists():
337
+ eval_log_bytes = result.eval_log_path.read_bytes()
330
338
  except KeyboardInterrupt:
339
+ events.docker_stopped(exit_code=130)
331
340
  console.print()
332
341
  console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
333
342
  sys.exit(0)
343
+ except Exception as e:
344
+ events.docker_stopped(exit_code=1)
345
+ console.print(f"[red]Task execution failed: {e}[/red]")
346
+ sys.exit(1)
347
+ finally:
348
+ runner.cleanup()
349
+
350
+ events.docker_stopped(exit_code=0)
334
351
 
335
352
  # Step 6: Upload result
336
353
  if result and result.answer:
@@ -356,7 +373,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
356
373
  session_id=session_id,
357
374
  answer=result.answer or "",
358
375
  client_active_seconds=result.time_seconds,
359
- eval_log_bytes=result.eval_log_bytes,
376
+ eval_log_bytes=eval_log_bytes,
360
377
  score=result.score,
361
378
  score_binarized=result.score_binarized,
362
379
  agent_id=result.agent_id,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes