hte-cli 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/cli.py CHANGED
@@ -194,7 +194,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
194
194
  elif e.status_code == 404:
195
195
  console.print("[red]Session not found. Check the session ID and try again.[/red]")
196
196
  elif e.status_code == 400 and "paused" in str(e).lower():
197
- console.print("[yellow]Session is paused. Please resume from the web UI first.[/yellow]")
197
+ console.print(
198
+ "[yellow]Session is paused. Please resume from the web UI first.[/yellow]"
199
+ )
198
200
  else:
199
201
  console.print(f"[red]Error: {e}[/red]")
200
202
  sys.exit(1)
@@ -236,16 +238,16 @@ def session_join(ctx, session_id: str, force_setup: bool):
236
238
  try:
237
239
  files_zip = api.get_session_files(session_id)
238
240
  console.print(" [green]✓[/green] Task files downloaded")
239
- except APIError as e:
240
- console.print(f" [dim]○ No task files (optional)[/dim]")
241
+ except APIError:
242
+ console.print(" [dim]○ No task files (optional)[/dim]")
241
243
  files_zip = None
242
244
 
243
245
  with console.status("[dim]Fetching compose configuration...[/dim]"):
244
246
  try:
245
247
  compose_yaml = api.get_session_compose(session_id)
246
248
  console.print(" [green]✓[/green] Docker compose downloaded")
247
- except APIError as e:
248
- console.print(f" [dim]○ No compose file (optional)[/dim]")
249
+ except APIError:
250
+ console.print(" [dim]○ No compose file (optional)[/dim]")
249
251
  compose_yaml = None
250
252
 
251
253
  console.print()
@@ -258,9 +260,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
258
260
  f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
259
261
  )
260
262
  console.print()
261
- console.print(
262
- f"Please contact support: {SUPPORT_EMAIL}"
263
- )
263
+ console.print(f"Please contact support: {SUPPORT_EMAIL}")
264
264
  sys.exit(1)
265
265
 
266
266
  # Build assignment dict for runner compatibility
@@ -280,6 +280,14 @@ def session_join(ctx, session_id: str, force_setup: bool):
280
280
  },
281
281
  }
282
282
 
283
+ # Send session_started event (records CLI version for debugging)
284
+ events.session_started(
285
+ {
286
+ "cli_version": __version__,
287
+ "task_id": session_info["task_id"],
288
+ }
289
+ )
290
+
283
291
  # Step 3: Run setup (skip if reconnecting without force)
284
292
  setup_start_time = time.monotonic()
285
293
  images = []
@@ -313,7 +321,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
313
321
 
314
322
  # Need to pull - show progress
315
323
  last_status = ["connecting..."]
316
- with console.status(f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]") as status:
324
+ with console.status(
325
+ f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
326
+ ) as status:
327
+
317
328
  def show_progress(image: str, line: str):
318
329
  # Show docker output directly - includes MB progress from PTY
319
330
  # Lines look like: "abc123: Downloading 360.9MB/4.075GB"
@@ -325,7 +336,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
325
336
  display = f"{layer_id}: {layer_status}"
326
337
  if display != last_status[0]:
327
338
  last_status[0] = display
328
- status.update(f"[yellow]↓[/yellow] {short_name} [dim]{display}[/dim]")
339
+ status.update(
340
+ f"[yellow]↓[/yellow] {short_name} [dim]{display}[/dim]"
341
+ )
329
342
 
330
343
  success = pull_image_with_progress(img, on_progress=show_progress)
331
344
 
@@ -370,7 +383,13 @@ def session_join(ctx, session_id: str, force_setup: bool):
370
383
  console.print()
371
384
 
372
385
  # Step 3: Run the task using TaskRunner
373
- step_num = "3" if (not is_reconnect or force_setup) and images else "2" if (not is_reconnect or force_setup) else "1"
386
+ step_num = (
387
+ "3"
388
+ if (not is_reconnect or force_setup) and images
389
+ else "2"
390
+ if (not is_reconnect or force_setup)
391
+ else "1"
392
+ )
374
393
  console.print(f"[bold]Step {step_num}:[/bold] Starting task environment...")
375
394
  console.print("[dim]Launching Docker containers...[/dim]")
376
395
  console.print()
@@ -391,7 +410,9 @@ def session_join(ctx, session_id: str, force_setup: bool):
391
410
  except KeyboardInterrupt:
392
411
  events.docker_stopped(exit_code=130)
393
412
  console.print()
394
- console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
413
+ console.print(
414
+ "[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]"
415
+ )
395
416
  sys.exit(0)
396
417
  except Exception as e:
397
418
  events.docker_stopped(exit_code=1)
@@ -415,10 +436,12 @@ def session_join(ctx, session_id: str, force_setup: bool):
415
436
  try:
416
437
  from io import BytesIO
417
438
  from zipfile import ZipFile
439
+
418
440
  with ZipFile(BytesIO(files_zip)) as zf:
419
441
  if "difficulty_levels.json" in zf.namelist():
420
442
  with zf.open("difficulty_levels.json") as f:
421
443
  import json
444
+
422
445
  difficulty_info = json.load(f)
423
446
  agent_id = difficulty_info.get("agent_id")
424
447
  except Exception:
@@ -429,13 +452,21 @@ def session_join(ctx, session_id: str, force_setup: bool):
429
452
  console.print(f"Answer: {result.answer}")
430
453
  console.print(f"Time: {result.time_seconds:.1f}s")
431
454
 
455
+ # Track upload size and timing
456
+ upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
457
+ upload_size_kb = upload_size_bytes / 1024
458
+
459
+ events.upload_started(size_bytes=upload_size_bytes)
460
+ upload_start_time = time.monotonic()
461
+
432
462
  # Upload to server
433
463
  with Progress(
434
464
  SpinnerColumn(),
435
465
  TextColumn("[progress.description]{task.description}"),
436
466
  console=console,
437
467
  ) as progress:
438
- progress.add_task("Uploading result...", total=None)
468
+ size_str = f" ({upload_size_kb:.0f} KB)" if upload_size_kb > 0 else ""
469
+ progress.add_task(f"Uploading result{size_str}...", total=None)
439
470
  try:
440
471
  upload_result = api.upload_result(
441
472
  session_id=session_id,
@@ -450,6 +481,10 @@ def session_join(ctx, session_id: str, force_setup: bool):
450
481
  console.print(f"[red]Failed to upload result: {e}[/red]")
451
482
  sys.exit(1)
452
483
 
484
+ # Record upload completion
485
+ upload_duration = time.monotonic() - upload_start_time
486
+ events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
487
+
453
488
  if upload_result.get("score") is not None:
454
489
  console.print(f"Score: {upload_result['score']}")
455
490
 
@@ -548,474 +583,6 @@ def tasks_run(ctx, task_id: str | None):
548
583
  sys.exit(1)
549
584
 
550
585
 
551
- # Keep the old implementation as _tasks_run_legacy for testing if needed
552
- def _tasks_run_legacy(ctx, task_id: str | None):
553
- """Legacy implementation of tasks run (for testing only)."""
554
- config: Config = ctx.obj["config"]
555
-
556
- if not config.is_authenticated():
557
- console.print("[red]Not logged in. Run: hte-cli auth login[/red]")
558
- sys.exit(1)
559
-
560
- # Check Docker and Compose version
561
- docker_ok, docker_error = _check_docker()
562
- if not docker_ok:
563
- console.print(f"[red]{docker_error}[/red]")
564
- sys.exit(1)
565
-
566
- api = APIClient(config)
567
-
568
- # Get assignments
569
- with Progress(
570
- SpinnerColumn(),
571
- TextColumn("[progress.description]{task.description}"),
572
- console=console,
573
- ) as progress:
574
- progress.add_task("Fetching assignments...", total=None)
575
- try:
576
- assignments = api.get_assignments()
577
- except APIError as e:
578
- console.print(f"[red]Error: {e}[/red]")
579
- sys.exit(1)
580
-
581
- if not assignments:
582
- console.print("[yellow]No pending assignments[/yellow]")
583
- return
584
-
585
- # Find the assignment to run
586
- assignment = None
587
- if task_id:
588
- for a in assignments:
589
- if a["task_id"] == task_id:
590
- assignment = a
591
- break
592
- if not assignment:
593
- console.print(f"[red]Task not found in your assignments: {task_id}[/red]")
594
- sys.exit(1)
595
- else:
596
- # Take highest priority (first in list, already sorted by server)
597
- assignment = assignments[0]
598
-
599
- console.print()
600
- console.print(
601
- Panel(
602
- f"[bold]Task:[/bold] {assignment['task_id']}\n"
603
- f"[bold]Benchmark:[/bold] {assignment['benchmark']}\n"
604
- f"[bold]Mode:[/bold] {assignment['mode']}",
605
- title="Starting Task",
606
- )
607
- )
608
- console.print()
609
-
610
- # Import runner and events
611
- from hte_cli.runner import TaskRunner
612
- from hte_cli.events import EventStreamer
613
-
614
- # Step 1: Start session (or resume existing)
615
- session_id = assignment.get("session_id")
616
- if not session_id:
617
- with Progress(
618
- SpinnerColumn(),
619
- TextColumn("[progress.description]{task.description}"),
620
- console=console,
621
- ) as progress:
622
- progress.add_task("Starting session...", total=None)
623
- try:
624
- session_info = api.start_session(assignment["assignment_id"])
625
- session_id = session_info["session_id"]
626
- except APIError as e:
627
- console.print(f"[red]Failed to start session: {e}[/red]")
628
- sys.exit(1)
629
- else:
630
- console.print(f"[yellow]Resuming session: {session_id}[/yellow]")
631
-
632
- # Create event streamer
633
- events = EventStreamer(api, session_id)
634
-
635
- # Step 2: Download task files and compose
636
- files_zip = None
637
- compose_yaml = None
638
-
639
- with Progress(
640
- SpinnerColumn(),
641
- TextColumn("[progress.description]{task.description}"),
642
- console=console,
643
- ) as progress:
644
- # Download files
645
- task_id_display = progress.add_task("Downloading task files...", total=None)
646
- try:
647
- files_zip = api.get_assignment_files(assignment["assignment_id"])
648
- except APIError as e:
649
- # Files are optional for some benchmarks
650
- console.print(f"[dim]Note: {e}[/dim]")
651
-
652
- # Download compose
653
- progress.update(task_id_display, description="Downloading compose file...")
654
- try:
655
- compose_yaml = api.get_assignment_compose(assignment["assignment_id"])
656
- except APIError as e:
657
- # Compose is optional for simple benchmarks
658
- console.print(f"[dim]Note: {e}[/dim]")
659
-
660
- # Validate compose for sandbox-required benchmarks
661
- benchmark = assignment.get("benchmark", "").lower()
662
- SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
663
- if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml:
664
- console.print(
665
- f"[red]Error: This {benchmark} task requires a Docker sandbox but none is available.[/red]"
666
- )
667
- if benchmark == "cvebench":
668
- console.print(
669
- "[yellow]This CVE may have infrastructure issues preventing Docker support.[/yellow]"
670
- )
671
- console.print("[dim]Contact support if you believe this is an error.[/dim]")
672
- sys.exit(1)
673
-
674
- # Step 3: Send session_started event
675
- events.session_started(
676
- {
677
- "cli_version": __version__,
678
- "task_id": assignment["task_id"],
679
- }
680
- )
681
-
682
- # Step 4: Show pre-task message (benchmark-specific)
683
- single_shot_benchmarks = {"nl2bash", "cybashbench"}
684
-
685
- console.print()
686
- if benchmark in single_shot_benchmarks:
687
- # Single-shot benchmarks - no task score available
688
- console.print(
689
- Panel(
690
- "[bold]Instructions[/bold]\n\n"
691
- "You are about to enter the task environment.\n\n"
692
- "[yellow]IMPORTANT: This is a SINGLE-SHOT task.[/yellow]\n"
693
- "[yellow]You get ONE submission attempt - no retries![/yellow]\n\n"
694
- "Commands available:\n"
695
- " [cyan]task status[/cyan] - Show elapsed time\n"
696
- ' [cyan]task submit "answer"[/cyan] - Submit your FINAL answer (ends task)\n'
697
- " [cyan]task quit[/cyan] - Quit without submitting\n"
698
- ' [cyan]task note "text"[/cyan] - Record observations\n',
699
- title="Task Environment",
700
- )
701
- )
702
- else:
703
- # CTF/sandbox benchmarks - task score available
704
- console.print(
705
- Panel(
706
- "[bold]Instructions[/bold]\n\n"
707
- "You are about to enter the task environment.\n\n"
708
- "Commands available:\n"
709
- " [cyan]task status[/cyan] - Show elapsed time\n"
710
- ' [cyan]task score "answer"[/cyan] - CHECK if correct (does NOT end task)\n'
711
- ' [cyan]task submit "answer"[/cyan] - Submit FINAL answer (ends task)\n'
712
- " [cyan]task quit[/cyan] - Quit without submitting\n"
713
- ' [cyan]task note "text"[/cyan] - Record observations\n\n'
714
- "[green]TIP: Use 'task score' to verify before submitting![/green]\n",
715
- title="Task Environment",
716
- )
717
- )
718
- console.print()
719
-
720
- if not click.confirm("Ready to start?"):
721
- console.print("[yellow]Cancelled[/yellow]")
722
- return
723
-
724
- # Step 5: Pre-pull Docker images with progress
725
- from hte_cli.image_utils import extract_images_from_compose
726
- import re
727
- import time
728
-
729
- setup_start_time = time.monotonic()
730
- images: list[str] = []
731
- results: list[tuple[str, bool, str]] = []
732
-
733
- if compose_yaml:
734
- images = extract_images_from_compose(compose_yaml)
735
- if images:
736
- events.setup_started(images)
737
- console.print()
738
- console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
739
-
740
- # Track layer progress per image: {layer_id: (status, downloaded_mb, total_mb)}
741
- image_layers: dict[str, dict[str, tuple[str, float, float]]] = {}
742
-
743
- def parse_size(size_str: str) -> float:
744
- """Parse size string like '1.2MB' or '500kB' to MB."""
745
- size_str = size_str.strip().upper()
746
- if "GB" in size_str:
747
- return float(size_str.replace("GB", "").strip()) * 1024
748
- elif "MB" in size_str:
749
- return float(size_str.replace("MB", "").strip())
750
- elif "KB" in size_str:
751
- return float(size_str.replace("KB", "").strip()) / 1024
752
- elif "B" in size_str:
753
- return float(size_str.replace("B", "").strip()) / (1024 * 1024)
754
- return 0
755
-
756
- def parse_docker_line(line: str) -> tuple[str | None, str, float, float]:
757
- """Parse Docker pull output to extract layer ID, status, and sizes.
758
-
759
- Returns: (layer_id, status, downloaded_mb, total_mb)
760
- """
761
- # Format: "79f742de2855: Downloading [==>] 1.2MB/50MB"
762
- # Or: "79f742de2855: Pull complete"
763
- match = re.match(r"([a-f0-9]+): (.+)", line)
764
- if not match:
765
- return None, "", 0, 0
766
-
767
- layer_id = match.group(1)
768
- status_part = match.group(2)
769
-
770
- # Try to extract size info from "Downloading [==>] 1.2MB/50MB"
771
- size_match = re.search(r"([\d.]+[kKmMgG]?[bB]?)/([\d.]+[kKmMgG]?[bB])", status_part)
772
- if size_match:
773
- downloaded = parse_size(size_match.group(1))
774
- total = parse_size(size_match.group(2))
775
- return layer_id, status_part, downloaded, total
776
-
777
- return layer_id, status_part, 0, 0
778
-
779
- def get_progress_summary(image: str) -> str:
780
- """Get a human-readable progress summary for an image with MB counts."""
781
- if image not in image_layers or not image_layers[image]:
782
- return "connecting..."
783
-
784
- layers = image_layers[image]
785
- total_layers = len(layers)
786
-
787
- # Count layers in different states
788
- complete = 0
789
- downloading = 0
790
- waiting = 0
791
- total_downloaded_mb = 0
792
- total_size_mb = 0
793
-
794
- for status, downloaded, total in layers.values():
795
- status_lower = status.lower()
796
- if "complete" in status_lower:
797
- complete += 1
798
- total_downloaded_mb += total
799
- total_size_mb += total
800
- elif "downloading" in status_lower:
801
- downloading += 1
802
- total_downloaded_mb += downloaded
803
- total_size_mb += total
804
- elif "waiting" in status_lower:
805
- waiting += 1
806
-
807
- # Choose the most informative display
808
- if complete == total_layers and total_layers > 0:
809
- if total_size_mb > 0:
810
- return f"done ({total_size_mb:.0f}MB)"
811
- return f"done ({total_layers} layers)"
812
- elif total_size_mb > 0:
813
- # Show MB progress when available
814
- pct = int(100 * total_downloaded_mb / total_size_mb) if total_size_mb > 0 else 0
815
- return f"{total_downloaded_mb:.0f}/{total_size_mb:.0f}MB ({pct}%)"
816
- elif downloading > 0:
817
- return f"downloading ({complete}/{total_layers} done)"
818
- elif complete > 0:
819
- return f"extracting ({complete}/{total_layers} done)"
820
- elif waiting > 0:
821
- return f"queued ({total_layers} layers)"
822
- else:
823
- return f"preparing ({total_layers} layers)"
824
-
825
- def on_image_progress(image: str, line: str):
826
- """Track layer-level progress with size info."""
827
- if image not in image_layers:
828
- image_layers[image] = {}
829
-
830
- layer_id, status, downloaded, total = parse_docker_line(line)
831
- if layer_id:
832
- image_layers[image][layer_id] = (status, downloaded, total)
833
-
834
- # Process images sequentially with clear output
835
- results = []
836
- for idx, img in enumerate(images, 1):
837
- short_name = img.split("/")[-1] if "/" in img else img
838
-
839
- # Check if cached first
840
- from hte_cli.image_utils import check_image_exists_locally, pull_image_with_progress
841
-
842
- if check_image_exists_locally(img):
843
- console.print(f" [green]✓[/green] {short_name} [dim](cached)[/dim]")
844
- results.append((img, True, "cached"))
845
- continue
846
-
847
- # Need to pull - use Rich Status for live updates
848
- image_layers[img] = {}
849
-
850
- with console.status(
851
- f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]"
852
- ) as status:
853
-
854
- def show_progress(image: str, line: str):
855
- on_image_progress(image, line)
856
- summary = get_progress_summary(image)
857
- status.update(f"[yellow]↓[/yellow] {short_name} [dim]{summary}[/dim]")
858
-
859
- success = pull_image_with_progress(img, on_progress=show_progress)
860
-
861
- # Final status (printed after status context exits)
862
- if success:
863
- console.print(f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]")
864
- results.append((img, True, "pulled"))
865
- else:
866
- console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
867
- results.append((img, False, "failed"))
868
-
869
- failed_count = sum(1 for _, ok, _ in results if not ok)
870
- if failed_count > 0:
871
- console.print(
872
- f"[yellow]Warning: {failed_count} image(s) failed to pull. "
873
- "Task may fail to start.[/yellow]"
874
- )
875
- console.print()
876
-
877
- # Record image pull timing
878
- if images:
879
- pull_duration = time.monotonic() - setup_start_time
880
- pulled = [img for img, ok, status in results if ok and status == "pulled"]
881
- cached = [img for img, ok, status in results if ok and status == "cached"]
882
- failed = [img for img, ok, status in results if not ok]
883
- events.image_pull_completed(
884
- duration_seconds=pull_duration,
885
- pulled=pulled,
886
- cached=cached,
887
- failed=failed,
888
- )
889
-
890
- # Step 6: Run Inspect's human_cli
891
- runner = TaskRunner()
892
- console.print("[bold]Starting task environment...[/bold]")
893
- console.print("[dim]Launching Docker containers...[/dim]")
894
- console.print()
895
-
896
- events.docker_started()
897
-
898
- # Record total setup time (image pulls + compose up)
899
- total_setup = time.monotonic() - setup_start_time
900
- events.setup_completed(total_seconds=total_setup)
901
-
902
- eval_log_bytes = None
903
- local_eval_path = None
904
- try:
905
- result = runner.run_from_assignment(
906
- assignment=assignment,
907
- compose_yaml=compose_yaml,
908
- files_zip=files_zip,
909
- )
910
- # Read eval log BEFORE cleanup (cleanup deletes the temp directory)
911
- if result.eval_log_path and result.eval_log_path.exists():
912
- eval_log_bytes = result.eval_log_path.read_bytes()
913
-
914
- # Save local copy for safety
915
- eval_logs_dir = get_eval_logs_dir()
916
- eval_logs_dir.mkdir(parents=True, exist_ok=True)
917
- local_eval_path = eval_logs_dir / result.eval_log_path.name
918
- local_eval_path.write_bytes(eval_log_bytes)
919
- except Exception as e:
920
- events.docker_stopped(exit_code=1)
921
- console.print(f"[red]Task execution failed: {e}[/red]")
922
- sys.exit(1)
923
- finally:
924
- runner.cleanup()
925
-
926
- events.docker_stopped(exit_code=0)
927
-
928
- # Step 6: Show post-task summary
929
- console.print()
930
- console.print(
931
- Panel(
932
- f"[bold]Time spent:[/bold] {result.time_seconds / 60:.1f} minutes\n"
933
- f"[bold]Answer:[/bold] {result.answer or '(none)'}\n"
934
- f"[bold]Score:[/bold] {result.score if result.score is not None else 'pending'}",
935
- title="Task Complete",
936
- )
937
- )
938
-
939
- # Defensive check: don't upload if task didn't actually run
940
- # (catches edge cases where runner returned without proper error)
941
- if result.time_seconds == 0.0 and result.answer is None:
942
- console.print()
943
- console.print("[red]Task did not complete successfully (0 time, no answer).[/red]")
944
- console.print("[yellow]Session preserved - run 'hte-cli tasks run' to retry.[/yellow]")
945
- sys.exit(1)
946
-
947
- # Step 7: Upload result
948
- events.session_completed(
949
- elapsed_seconds=result.time_seconds,
950
- answer=result.answer,
951
- )
952
-
953
- # Extract agent_id from task files for CyberGym post-hoc verification
954
- agent_id = None
955
- if files_zip:
956
- try:
957
- with ZipFile(BytesIO(files_zip)) as zf:
958
- if "difficulty_levels.json" in zf.namelist():
959
- with zf.open("difficulty_levels.json") as f:
960
- difficulty_info = json.load(f)
961
- agent_id = difficulty_info.get("agent_id")
962
- except Exception:
963
- pass # Not a CyberGym task or malformed zip
964
-
965
- # Show upload size info and track timing
966
- upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
967
- upload_size_kb = upload_size_bytes / 1024
968
- if upload_size_kb / 1024 > 50:
969
- console.print(f"[yellow]Warning: Large eval log ({upload_size_kb / 1024:.1f} MB)[/yellow]")
970
-
971
- events.upload_started(size_bytes=upload_size_bytes)
972
- upload_start_time = time.monotonic()
973
-
974
- with Progress(
975
- SpinnerColumn(),
976
- TextColumn("[progress.description]{task.description}"),
977
- console=console,
978
- ) as progress:
979
- size_str = f" ({upload_size_kb:.0f} KB)" if upload_size_kb > 0 else ""
980
- progress.add_task(f"Uploading result{size_str}...", total=None)
981
-
982
- try:
983
- upload_result = api.upload_result(
984
- session_id=session_id,
985
- answer=result.answer or "",
986
- client_active_seconds=result.time_seconds,
987
- eval_log_bytes=eval_log_bytes,
988
- score=result.score,
989
- score_binarized=result.score_binarized,
990
- agent_id=agent_id,
991
- )
992
- except APIError as e:
993
- console.print(f"[red]Failed to upload result: {e}[/red]")
994
- if local_eval_path:
995
- console.print(f"[yellow]Eval log saved locally: {local_eval_path}[/yellow]")
996
- console.print("[yellow]Your result was saved locally but not uploaded.[/yellow]")
997
- sys.exit(1)
998
-
999
- # Record upload completion
1000
- upload_duration = time.monotonic() - upload_start_time
1001
- events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
1002
-
1003
- console.print()
1004
- console.print("[green]Result uploaded successfully![/green]")
1005
-
1006
- # Show local eval log path (quote paths with spaces for easy copy-paste)
1007
- if local_eval_path:
1008
- path_str = str(local_eval_path)
1009
- if " " in path_str:
1010
- path_str = f'"{path_str}"'
1011
- console.print(f"[dim]Eval log: {path_str}[/dim]")
1012
-
1013
- # Show next task if available
1014
- if upload_result.get("next_assignment_id"):
1015
- console.print()
1016
- console.print("Run [bold]hte-cli tasks run[/bold] for the next task.")
1017
-
1018
-
1019
586
  @tasks.command("pull-images")
1020
587
  @click.option("--count", "-n", default=5, help="Number of upcoming tasks to pull images for")
1021
588
  @click.pass_context
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.21
3
+ Version: 0.2.23
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,7 +1,7 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
3
  hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
- hte_cli/cli.py,sha256=cJ4g9UgBXHfmcNe4mu9imL8DSKkYzVDp8sR785z8h8M,42315
4
+ hte_cli/cli.py,sha256=YCsaW1rAzOAusgi1qN9YWJWr68jpctTNG22JluEcCsQ,24416
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
7
  hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
@@ -9,7 +9,7 @@ hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
9
9
  hte_cli/runner.py,sha256=SWl9FF4X3e9eBbZyL0ujhmmSL5OK8J6st-Ty0jD5AWM,14550
10
10
  hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.2.21.dist-info/METADATA,sha256=19It6UKpLmlPXHGXnkjGyF_gh4TJt0jOvAAH499tFFQ,3820
13
- hte_cli-0.2.21.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.2.21.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.2.21.dist-info/RECORD,,
12
+ hte_cli-0.2.23.dist-info/METADATA,sha256=cNU9v5zaqLtSnSsgHC7SxiYOysMg00exWz2iSHp2n6w,3820
13
+ hte_cli-0.2.23.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.23.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.23.dist-info/RECORD,,