hte-cli 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/cli.py CHANGED
@@ -217,8 +217,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
217
217
  console.print()
218
218
 
219
219
  # Import runner components
220
+ import time
220
221
  from hte_cli.events import EventStreamer
221
- from hte_cli.runner import TaskRunner, DockerComposeManager
222
+ from hte_cli.runner import TaskRunner
223
+ from hte_cli.image_utils import extract_images_from_compose, pull_image_with_progress
222
224
 
223
225
  # Create event streamer
224
226
  events = EventStreamer(api, session_id)
@@ -264,48 +266,62 @@ def session_join(ctx, session_id: str, force_setup: bool):
264
266
  # Build assignment dict for runner compatibility
265
267
  assignment = {
266
268
  "assignment_id": session_info.get("assignment_id"),
269
+ "session_id": session_id,
267
270
  "task_id": session_info["task_id"],
268
271
  "benchmark": session_info["benchmark"],
269
272
  "mode": session_info["mode"],
270
273
  "time_cap_seconds": session_info.get("time_cap_seconds"),
271
- }
272
-
273
- # Build task dict for runner
274
- task_data = {
275
- "instructions": session_info.get("instructions", ""),
276
- "metadata": session_info.get("metadata", {}),
277
- "scorer_type": session_info.get("scorer_type"),
278
- "intermediate_scoring": session_info.get("intermediate_scoring", False),
274
+ "task": {
275
+ "instructions": session_info.get("instructions", ""),
276
+ "metadata": session_info.get("metadata", {}),
277
+ "scorer_type": session_info.get("scorer_type"),
278
+ "intermediate_scoring": session_info.get("intermediate_scoring", False),
279
+ },
279
280
  }
280
281
 
281
282
  # Step 3: Run setup (skip if reconnecting without force)
283
+ setup_start_time = time.monotonic()
284
+ images = []
285
+ pulled_images = []
286
+ cached_images = []
287
+ failed_images = []
288
+
282
289
  if not is_reconnect or force_setup:
290
+ # Extract images from compose
291
+ if compose_yaml:
292
+ images = extract_images_from_compose(compose_yaml)
293
+
283
294
  # Send setup_started event
284
- events.setup_started({"cli_version": __version__, "task_id": assignment["task_id"]})
295
+ events.setup_started(images=images)
285
296
 
286
- # Extract files and run compose
287
- runner = TaskRunner(session_id, api, events, console)
288
- compose_manager = None
297
+ # Pull images if we have any
298
+ if images:
299
+ console.print(f"[bold]Pulling {len(images)} Docker image(s)...[/bold]")
300
+ pull_start = time.monotonic()
289
301
 
290
- if compose_yaml:
291
- compose_manager = DockerComposeManager(
292
- compose_yaml=compose_yaml,
293
- files_zip=files_zip,
294
- session_id=session_id,
295
- task_id=assignment["task_id"],
296
- console=console,
302
+ for img in images:
303
+ short_name = img.split("/")[-1][:40]
304
+ with console.status(f"[yellow]↓[/yellow] {short_name}") as status:
305
+ success = pull_image_with_progress(img)
306
+ if success:
307
+ console.print(f" [green]✓[/green] {short_name}")
308
+ pulled_images.append(img)
309
+ else:
310
+ console.print(f" [red]✗[/red] {short_name} (failed)")
311
+ failed_images.append(img)
312
+
313
+ pull_duration = time.monotonic() - pull_start
314
+ events.image_pull_completed(
315
+ duration_seconds=pull_duration,
316
+ pulled=pulled_images,
317
+ cached=cached_images,
318
+ failed=failed_images,
297
319
  )
298
-
299
- # Pull images
300
- events.image_pull_started({})
301
- compose_manager.pull_images()
302
- events.image_pull_completed({})
303
-
304
- # Start containers
305
- compose_manager.up()
320
+ console.print()
306
321
 
307
322
  # Send setup_completed - THIS STARTS THE TIMER ON SERVER
308
- events.setup_completed({"cli_version": __version__})
323
+ total_setup = time.monotonic() - setup_start_time
324
+ events.setup_completed(total_seconds=total_setup)
309
325
  console.print("[green]Environment ready! Timer started.[/green]")
310
326
  console.print()
311
327
  else:
@@ -318,19 +334,37 @@ def session_join(ctx, session_id: str, force_setup: bool):
318
334
  console.print(Panel(session_info["instructions"], title="Task Instructions"))
319
335
  console.print()
320
336
 
321
- # Step 5: Run the task interaction loop
322
- runner = TaskRunner(session_id, api, events, console)
337
+ # Step 5: Run the task using TaskRunner
338
+ console.print("[bold]Starting task environment...[/bold]")
339
+ console.print("[dim]Launching Docker containers...[/dim]")
340
+ console.print()
341
+
342
+ events.docker_started()
343
+
344
+ runner = TaskRunner()
345
+ eval_log_bytes = None
323
346
  try:
324
- result = runner.run(
347
+ result = runner.run_from_assignment(
325
348
  assignment=assignment,
326
- task=task_data,
327
349
  compose_yaml=compose_yaml,
328
350
  files_zip=files_zip,
329
351
  )
352
+ # Read eval log before cleanup
353
+ if result.eval_log_path and result.eval_log_path.exists():
354
+ eval_log_bytes = result.eval_log_path.read_bytes()
330
355
  except KeyboardInterrupt:
356
+ events.docker_stopped(exit_code=130)
331
357
  console.print()
332
358
  console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
333
359
  sys.exit(0)
360
+ except Exception as e:
361
+ events.docker_stopped(exit_code=1)
362
+ console.print(f"[red]Task execution failed: {e}[/red]")
363
+ sys.exit(1)
364
+ finally:
365
+ runner.cleanup()
366
+
367
+ events.docker_stopped(exit_code=0)
334
368
 
335
369
  # Step 6: Upload result
336
370
  if result and result.answer:
@@ -356,7 +390,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
356
390
  session_id=session_id,
357
391
  answer=result.answer or "",
358
392
  client_active_seconds=result.time_seconds,
359
- eval_log_bytes=result.eval_log_bytes,
393
+ eval_log_bytes=eval_log_bytes,
360
394
  score=result.score,
361
395
  score_binarized=result.score_binarized,
362
396
  agent_id=result.agent_id,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,7 +1,7 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
3
  hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
- hte_cli/cli.py,sha256=mMoy1xHKwbeSEHh2CvfdeHJKle_HT4PwKursXBUc__c,38389
4
+ hte_cli/cli.py,sha256=jl8wCzCc6EVpHLSyK3-mo6DskQiZWARv8R-r8lpV4iU,39713
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
7
  hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
@@ -9,7 +9,7 @@ hte_cli/image_utils.py,sha256=454yoZEI1duNYrZC8UjhfZzDRP4Nxdrf2TvnZ_54G1k,4439
9
9
  hte_cli/runner.py,sha256=DhC8FMjHwfLR193iP4thLDRZrNssYA9KH1WYKU2JKeg,13535
10
10
  hte_cli/scorers.py,sha256=sFoPJePRt-K191-Ga4cVmrldruJclYXTOLkU_C9nCDI,6025
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.2.0.dist-info/METADATA,sha256=A2RhmrFnjGkpnbVwjel_CNTtL2Mg_LefwNeWejIvul8,3767
13
- hte_cli-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.2.0.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.2.0.dist-info/RECORD,,
12
+ hte_cli-0.2.2.dist-info/METADATA,sha256=aHmC-LkN9nSUw3HOZ0oMD1hV5BRcBc3w_rSIDilMOF8,3767
13
+ hte_cli-0.2.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.2.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.2.dist-info/RECORD,,