hte-cli 0.1.27__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/api_client.py CHANGED
@@ -201,6 +201,31 @@ class APIClient:
201
201
  },
202
202
  )
203
203
 
204
+ # =========================================================================
205
+ # Session-based API (new flow: hte-cli session join <session_id>)
206
+ # =========================================================================
207
+
208
+ def join_session(self, session_id: str) -> dict:
209
+ """Join an existing session created by web UI.
210
+
211
+ Returns session info including task data, benchmark, mode, etc.
212
+ Sets cli_connected_at on the server.
213
+ """
214
+ return self.post(f"/sessions/{session_id}/join")
215
+
216
+ def get_session_files(self, session_id: str) -> bytes:
217
+ """Download task files for a session as zip."""
218
+ return self.get_raw(f"/sessions/{session_id}/files")
219
+
220
+ def get_session_compose(self, session_id: str) -> str:
221
+ """Get compose.yaml content for a session."""
222
+ content = self.get_raw(f"/sessions/{session_id}/compose")
223
+ return content.decode("utf-8")
224
+
225
+ # =========================================================================
226
+ # Result Upload
227
+ # =========================================================================
228
+
204
229
  def upload_result(
205
230
  self,
206
231
  session_id: str,
hte_cli/cli.py CHANGED
@@ -147,13 +147,239 @@ def auth_status(ctx):
147
147
 
148
148
 
149
149
  # =============================================================================
150
- # Tasks Commands
150
+ # Session Commands (New flow: session join <session_id>)
151
+ # =============================================================================
152
+
153
+
154
+ @cli.group()
155
+ def session():
156
+ """Session management commands."""
157
+ pass
158
+
159
+
160
+ @session.command("join")
161
+ @click.argument("session_id")
162
+ @click.option("--force-setup", is_flag=True, help="Re-run setup even if reconnecting")
163
+ @click.pass_context
164
+ def session_join(ctx, session_id: str, force_setup: bool):
165
+ """Join an existing session by ID.
166
+
167
+ This is the primary way to start working on a task:
168
+ 1. Start the task from the web UI (creates session)
169
+ 2. Run this command with the session ID shown in the web UI
170
+ 3. The environment will be set up and the timer will start
171
+ """
172
+ config: Config = ctx.obj["config"]
173
+
174
+ if not config.is_authenticated():
175
+ console.print("[red]Not logged in. Run: hte-cli auth login[/red]")
176
+ sys.exit(1)
177
+
178
+ api = APIClient(config)
179
+
180
+ # Step 1: Join session
181
+ console.print()
182
+ with Progress(
183
+ SpinnerColumn(),
184
+ TextColumn("[progress.description]{task.description}"),
185
+ console=console,
186
+ ) as progress:
187
+ progress.add_task("Joining session...", total=None)
188
+
189
+ try:
190
+ session_info = api.join_session(session_id)
191
+ except APIError as e:
192
+ if "Invalid session ID format" in str(e):
193
+ console.print(f"[red]{e}[/red]")
194
+ elif e.status_code == 404:
195
+ console.print("[red]Session not found. Check the session ID and try again.[/red]")
196
+ elif e.status_code == 400 and "paused" in str(e).lower():
197
+ console.print("[yellow]Session is paused. Please resume from the web UI first.[/yellow]")
198
+ else:
199
+ console.print(f"[red]Error: {e}[/red]")
200
+ sys.exit(1)
201
+
202
+ # Check if reconnecting (session already in_progress)
203
+ is_reconnect = session_info.get("status") == "in_progress"
204
+
205
+ if is_reconnect and not force_setup:
206
+ console.print("[yellow]Reconnecting to existing session...[/yellow]")
207
+ console.print()
208
+
209
+ console.print(
210
+ Panel(
211
+ f"[bold]Task:[/bold] {session_info['task_id']}\n"
212
+ f"[bold]Benchmark:[/bold] {session_info['benchmark']}\n"
213
+ f"[bold]Mode:[/bold] {session_info['mode']}",
214
+ title="Session Joined",
215
+ )
216
+ )
217
+ console.print()
218
+
219
+ # Import runner components
220
+ from hte_cli.events import EventStreamer
221
+ from hte_cli.runner import TaskRunner, DockerComposeManager
222
+
223
+ # Create event streamer
224
+ events = EventStreamer(api, session_id)
225
+
226
+ # Step 2: Download task files and compose (skip if reconnecting without force)
227
+ files_zip = None
228
+ compose_yaml = None
229
+
230
+ if not is_reconnect or force_setup:
231
+ with Progress(
232
+ SpinnerColumn(),
233
+ TextColumn("[progress.description]{task.description}"),
234
+ console=console,
235
+ ) as progress:
236
+ task_id_display = progress.add_task("Downloading task files...", total=None)
237
+ try:
238
+ files_zip = api.get_session_files(session_id)
239
+ except APIError as e:
240
+ # Files are optional for some benchmarks
241
+ console.print(f"[dim]Note: {e}[/dim]")
242
+ files_zip = None
243
+
244
+ progress.update(task_id_display, description="Downloading compose file...")
245
+ try:
246
+ compose_yaml = api.get_session_compose(session_id)
247
+ except APIError as e:
248
+ # Compose is optional for simple benchmarks
249
+ compose_yaml = None
250
+
251
+ # Validate compose for sandbox-required benchmarks
252
+ benchmark = session_info.get("benchmark", "").lower()
253
+ SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
254
+ if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
255
+ console.print(
256
+ f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
257
+ )
258
+ console.print()
259
+ console.print(
260
+ f"Please contact support: {SUPPORT_EMAIL}"
261
+ )
262
+ sys.exit(1)
263
+
264
+ # Build assignment dict for runner compatibility
265
+ assignment = {
266
+ "assignment_id": session_info.get("assignment_id"),
267
+ "task_id": session_info["task_id"],
268
+ "benchmark": session_info["benchmark"],
269
+ "mode": session_info["mode"],
270
+ "time_cap_seconds": session_info.get("time_cap_seconds"),
271
+ }
272
+
273
+ # Build task dict for runner
274
+ task_data = {
275
+ "instructions": session_info.get("instructions", ""),
276
+ "metadata": session_info.get("metadata", {}),
277
+ "scorer_type": session_info.get("scorer_type"),
278
+ "intermediate_scoring": session_info.get("intermediate_scoring", False),
279
+ }
280
+
281
+ # Step 3: Run setup (skip if reconnecting without force)
282
+ if not is_reconnect or force_setup:
283
+ # Send setup_started event
284
+ events.setup_started({"cli_version": __version__, "task_id": assignment["task_id"]})
285
+
286
+ # Extract files and run compose
287
+ runner = TaskRunner(session_id, api, events, console)
288
+ compose_manager = None
289
+
290
+ if compose_yaml:
291
+ compose_manager = DockerComposeManager(
292
+ compose_yaml=compose_yaml,
293
+ files_zip=files_zip,
294
+ session_id=session_id,
295
+ task_id=assignment["task_id"],
296
+ console=console,
297
+ )
298
+
299
+ # Pull images
300
+ events.image_pull_started({})
301
+ compose_manager.pull_images()
302
+ events.image_pull_completed({})
303
+
304
+ # Start containers
305
+ compose_manager.up()
306
+
307
+ # Send setup_completed - THIS STARTS THE TIMER ON SERVER
308
+ events.setup_completed({"cli_version": __version__})
309
+ console.print("[green]Environment ready! Timer started.[/green]")
310
+ console.print()
311
+ else:
312
+ # Reconnecting - compose should already be running
313
+ console.print("[dim]Skipping setup (use --force-setup to re-run)[/dim]")
314
+ console.print()
315
+
316
+ # Step 4: Show instructions
317
+ if session_info.get("instructions"):
318
+ console.print(Panel(session_info["instructions"], title="Task Instructions"))
319
+ console.print()
320
+
321
+ # Step 5: Run the task interaction loop
322
+ runner = TaskRunner(session_id, api, events, console)
323
+ try:
324
+ result = runner.run(
325
+ assignment=assignment,
326
+ task=task_data,
327
+ compose_yaml=compose_yaml,
328
+ files_zip=files_zip,
329
+ )
330
+ except KeyboardInterrupt:
331
+ console.print()
332
+ console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
333
+ sys.exit(0)
334
+
335
+ # Step 6: Upload result
336
+ if result and result.answer:
337
+ events.session_completed(
338
+ elapsed_seconds=result.time_seconds,
339
+ answer=result.answer,
340
+ )
341
+
342
+ console.print()
343
+ console.print("[green]Task completed![/green]")
344
+ console.print(f"Answer: {result.answer}")
345
+ console.print(f"Time: {result.time_seconds:.1f}s")
346
+
347
+ # Upload to server
348
+ with Progress(
349
+ SpinnerColumn(),
350
+ TextColumn("[progress.description]{task.description}"),
351
+ console=console,
352
+ ) as progress:
353
+ progress.add_task("Uploading result...", total=None)
354
+ try:
355
+ upload_result = api.upload_result(
356
+ session_id=session_id,
357
+ answer=result.answer or "",
358
+ client_active_seconds=result.time_seconds,
359
+ eval_log_bytes=result.eval_log_bytes,
360
+ score=result.score,
361
+ score_binarized=result.score_binarized,
362
+ agent_id=result.agent_id,
363
+ )
364
+ except APIError as e:
365
+ console.print(f"[red]Failed to upload result: {e}[/red]")
366
+ sys.exit(1)
367
+
368
+ if upload_result.get("score") is not None:
369
+ console.print(f"Score: {upload_result['score']}")
370
+
371
+ console.print()
372
+ console.print("[green]Done! Return to the web UI to see your results.[/green]")
373
+
374
+
375
+ # =============================================================================
376
+ # Tasks Commands (DEPRECATED - use 'session join' instead)
151
377
  # =============================================================================
152
378
 
153
379
 
154
380
  @cli.group()
155
381
  def tasks():
156
- """Task commands."""
382
+ """Task commands (deprecated - use 'session join' instead)."""
157
383
  pass
158
384
 
159
385
 
@@ -223,7 +449,23 @@ def tasks_list(ctx):
223
449
  @click.argument("task_id", required=False)
224
450
  @click.pass_context
225
451
  def tasks_run(ctx, task_id: str | None):
226
- """Run a task (default: highest priority pending task)."""
452
+ """[DEPRECATED] Run a task - use 'session join' instead."""
453
+ console.print()
454
+ console.print("[red]This command is deprecated.[/red]")
455
+ console.print()
456
+ console.print("The new workflow is:")
457
+ console.print(" 1. Start the task from the web UI: https://cyber-task-horizons.com")
458
+ console.print(" 2. Run the command shown: [bold]hte-cli session join <session_id>[/bold]")
459
+ console.print()
460
+ console.print("This ensures accurate timing by starting the timer only when")
461
+ console.print("the environment is ready, not including Docker setup time.")
462
+ console.print()
463
+ sys.exit(1)
464
+
465
+
466
+ # Keep the old implementation as _tasks_run_legacy for testing if needed
467
+ def _tasks_run_legacy(ctx, task_id: str | None):
468
+ """Legacy implementation of tasks run (for testing only)."""
227
469
  config: Config = ctx.obj["config"]
228
470
 
229
471
  if not config.is_authenticated():
@@ -397,10 +639,16 @@ def tasks_run(ctx, task_id: str | None):
397
639
  # Step 5: Pre-pull Docker images with progress
398
640
  from hte_cli.image_utils import extract_images_from_compose
399
641
  import re
642
+ import time
643
+
644
+ setup_start_time = time.monotonic()
645
+ images: list[str] = []
646
+ results: list[tuple[str, bool, str]] = []
400
647
 
401
648
  if compose_yaml:
402
649
  images = extract_images_from_compose(compose_yaml)
403
650
  if images:
651
+ events.setup_started(images)
404
652
  console.print()
405
653
  console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
406
654
 
@@ -533,14 +781,27 @@ def tasks_run(ctx, task_id: str | None):
533
781
  console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
534
782
  results.append((img, False, "failed"))
535
783
 
536
- failed = sum(1 for _, ok, _ in results if not ok)
537
- if failed > 0:
784
+ failed_count = sum(1 for _, ok, _ in results if not ok)
785
+ if failed_count > 0:
538
786
  console.print(
539
- f"[yellow]Warning: {failed} image(s) failed to pull. "
787
+ f"[yellow]Warning: {failed_count} image(s) failed to pull. "
540
788
  "Task may fail to start.[/yellow]"
541
789
  )
542
790
  console.print()
543
791
 
792
+ # Record image pull timing
793
+ if images:
794
+ pull_duration = time.monotonic() - setup_start_time
795
+ pulled = [img for img, ok, status in results if ok and status == "pulled"]
796
+ cached = [img for img, ok, status in results if ok and status == "cached"]
797
+ failed = [img for img, ok, status in results if not ok]
798
+ events.image_pull_completed(
799
+ duration_seconds=pull_duration,
800
+ pulled=pulled,
801
+ cached=cached,
802
+ failed=failed,
803
+ )
804
+
544
805
  # Step 6: Run Inspect's human_cli
545
806
  runner = TaskRunner()
546
807
  console.print("[bold]Starting task environment...[/bold]")
@@ -549,6 +810,10 @@ def tasks_run(ctx, task_id: str | None):
549
810
 
550
811
  events.docker_started()
551
812
 
813
+ # Record total setup time (image pulls + compose up)
814
+ total_setup = time.monotonic() - setup_start_time
815
+ events.setup_completed(total_seconds=total_setup)
816
+
552
817
  eval_log_bytes = None
553
818
  local_eval_path = None
554
819
  try:
@@ -612,13 +877,14 @@ def tasks_run(ctx, task_id: str | None):
612
877
  except Exception:
613
878
  pass # Not a CyberGym task or malformed zip
614
879
 
615
- # Show upload size info
616
- upload_size_kb = 0
617
- if eval_log_bytes:
618
- upload_size_kb = len(eval_log_bytes) / 1024
619
- size_mb = upload_size_kb / 1024
620
- if size_mb > 50:
621
- console.print(f"[yellow]Warning: Large eval log ({size_mb:.1f} MB)[/yellow]")
880
+ # Show upload size info and track timing
881
+ upload_size_bytes = len(eval_log_bytes) if eval_log_bytes else 0
882
+ upload_size_kb = upload_size_bytes / 1024
883
+ if upload_size_kb / 1024 > 50:
884
+ console.print(f"[yellow]Warning: Large eval log ({upload_size_kb / 1024:.1f} MB)[/yellow]")
885
+
886
+ events.upload_started(size_bytes=upload_size_bytes)
887
+ upload_start_time = time.monotonic()
622
888
 
623
889
  with Progress(
624
890
  SpinnerColumn(),
@@ -645,6 +911,10 @@ def tasks_run(ctx, task_id: str | None):
645
911
  console.print("[yellow]Your result was saved locally but not uploaded.[/yellow]")
646
912
  sys.exit(1)
647
913
 
914
+ # Record upload completion
915
+ upload_duration = time.monotonic() - upload_start_time
916
+ events.upload_completed(duration_seconds=upload_duration, size_bytes=upload_size_bytes)
917
+
648
918
  console.print()
649
919
  console.print("[green]Result uploaded successfully![/green]")
650
920
 
hte_cli/events.py CHANGED
@@ -24,6 +24,12 @@ class EventStreamer:
24
24
  "docker_started",
25
25
  "docker_stopped",
26
26
  "session_completed",
27
+ # Overhead tracking events
28
+ "setup_started",
29
+ "image_pull_completed",
30
+ "setup_completed",
31
+ "upload_started",
32
+ "upload_completed",
27
33
  }
28
34
 
29
35
  def __init__(self, api: APIClient, session_id: str):
@@ -126,3 +132,45 @@ class EventStreamer:
126
132
  if answer is not None:
127
133
  data["answer_submitted"] = True
128
134
  return self.send("session_completed", data or None)
135
+
136
+ # Overhead tracking events
137
+
138
+ def setup_started(self, images: list[str]) -> bool:
139
+ """Record start of setup phase (before image pulls)."""
140
+ return self.send("setup_started", {"images": images})
141
+
142
+ def image_pull_completed(
143
+ self,
144
+ duration_seconds: float,
145
+ pulled: list[str],
146
+ cached: list[str],
147
+ failed: list[str],
148
+ ) -> bool:
149
+ """Record image pull results with timing."""
150
+ return self.send(
151
+ "image_pull_completed",
152
+ {
153
+ "duration_seconds": duration_seconds,
154
+ "pulled": pulled,
155
+ "cached": cached,
156
+ "failed": failed,
157
+ },
158
+ )
159
+
160
+ def setup_completed(self, total_seconds: float) -> bool:
161
+ """Record end of setup phase (environment ready for work)."""
162
+ return self.send("setup_completed", {"total_seconds": total_seconds})
163
+
164
+ def upload_started(self, size_bytes: int) -> bool:
165
+ """Record start of result upload."""
166
+ return self.send("upload_started", {"size_bytes": size_bytes})
167
+
168
+ def upload_completed(self, duration_seconds: float, size_bytes: int) -> bool:
169
+ """Record end of result upload with timing."""
170
+ return self.send(
171
+ "upload_completed",
172
+ {
173
+ "duration_seconds": duration_seconds,
174
+ "size_bytes": size_bytes,
175
+ },
176
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.1.27
3
+ Version: 0.2.0
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -23,6 +23,10 @@ Requires-Dist: platformdirs>=4.0
23
23
  Requires-Dist: pydantic>=2.0
24
24
  Requires-Dist: pyyaml>=6.0
25
25
  Requires-Dist: rich>=13.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pexpect>=4.8; extra == 'dev'
28
+ Requires-Dist: pytest>=7.0; extra == 'dev'
29
+ Requires-Dist: requests>=2.28; extra == 'dev'
26
30
  Description-Content-Type: text/markdown
27
31
 
28
32
  # hte-cli
@@ -1,15 +1,15 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
- hte_cli/api_client.py,sha256=mO4buDND5cIWESg4gSKb8WkdA1iPwkmTa0L3xL6lvNQ,8153
4
- hte_cli/cli.py,sha256=AolSWSqzeaAJKgsixPMqlwqfB24IGB60jhaDjdktF8A,28169
3
+ hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
+ hte_cli/cli.py,sha256=mMoy1xHKwbeSEHh2CvfdeHJKle_HT4PwKursXBUc__c,38389
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
- hte_cli/events.py,sha256=LCNLPJuk_Sz-rCl1Aa3k28y10_jwAx3urbnz3OXYPmE,3937
7
+ hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
8
8
  hte_cli/image_utils.py,sha256=454yoZEI1duNYrZC8UjhfZzDRP4Nxdrf2TvnZ_54G1k,4439
9
9
  hte_cli/runner.py,sha256=DhC8FMjHwfLR193iP4thLDRZrNssYA9KH1WYKU2JKeg,13535
10
10
  hte_cli/scorers.py,sha256=sFoPJePRt-K191-Ga4cVmrldruJclYXTOLkU_C9nCDI,6025
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.1.27.dist-info/METADATA,sha256=Puk8nyCfgXXVtyt89PpQtKn4e3iQsATSRxon6FWw4E0,3615
13
- hte_cli-0.1.27.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.1.27.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.1.27.dist-info/RECORD,,
12
+ hte_cli-0.2.0.dist-info/METADATA,sha256=A2RhmrFnjGkpnbVwjel_CNTtL2Mg_LefwNeWejIvul8,3767
13
+ hte_cli-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.0.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.0.dist-info/RECORD,,