hte-cli 0.1.28__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/api_client.py CHANGED
@@ -201,6 +201,31 @@ class APIClient:
201
201
  },
202
202
  )
203
203
 
204
+ # =========================================================================
205
+ # Session-based API (new flow: hte-cli session join <session_id>)
206
+ # =========================================================================
207
+
208
+ def join_session(self, session_id: str) -> dict:
209
+ """Join an existing session created by web UI.
210
+
211
+ Returns session info including task data, benchmark, mode, etc.
212
+ Sets cli_connected_at on the server.
213
+ """
214
+ return self.post(f"/sessions/{session_id}/join")
215
+
216
+ def get_session_files(self, session_id: str) -> bytes:
217
+ """Download task files for a session as zip."""
218
+ return self.get_raw(f"/sessions/{session_id}/files")
219
+
220
+ def get_session_compose(self, session_id: str) -> str:
221
+ """Get compose.yaml content for a session."""
222
+ content = self.get_raw(f"/sessions/{session_id}/compose")
223
+ return content.decode("utf-8")
224
+
225
+ # =========================================================================
226
+ # Result Upload
227
+ # =========================================================================
228
+
204
229
  def upload_result(
205
230
  self,
206
231
  session_id: str,
hte_cli/cli.py CHANGED
@@ -147,13 +147,308 @@ def auth_status(ctx):
147
147
 
148
148
 
149
149
  # =============================================================================
150
- # Tasks Commands
150
+ # Session Commands (New flow: session join <session_id>)
151
+ # =============================================================================
152
+
153
+
154
+ @cli.group()
155
+ def session():
156
+ """Session management commands."""
157
+ pass
158
+
159
+
160
+ @session.command("join")
161
+ @click.argument("session_id")
162
+ @click.option("--force-setup", is_flag=True, help="Re-run setup even if reconnecting")
163
+ @click.pass_context
164
+ def session_join(ctx, session_id: str, force_setup: bool):
165
+ """Join an existing session by ID.
166
+
167
+ This is the primary way to start working on a task:
168
+ 1. Start the task from the web UI (creates session)
169
+ 2. Run this command with the session ID shown in the web UI
170
+ 3. The environment will be set up and the timer will start
171
+ """
172
+ config: Config = ctx.obj["config"]
173
+
174
+ if not config.is_authenticated():
175
+ console.print("[red]Not logged in. Run: hte-cli auth login[/red]")
176
+ sys.exit(1)
177
+
178
+ api = APIClient(config)
179
+
180
+ # Step 1: Join session
181
+ console.print()
182
+ with Progress(
183
+ SpinnerColumn(),
184
+ TextColumn("[progress.description]{task.description}"),
185
+ console=console,
186
+ ) as progress:
187
+ progress.add_task("Joining session...", total=None)
188
+
189
+ try:
190
+ session_info = api.join_session(session_id)
191
+ except APIError as e:
192
+ if "Invalid session ID format" in str(e):
193
+ console.print(f"[red]{e}[/red]")
194
+ elif e.status_code == 404:
195
+ console.print("[red]Session not found. Check the session ID and try again.[/red]")
196
+ elif e.status_code == 400 and "paused" in str(e).lower():
197
+ console.print("[yellow]Session is paused. Please resume from the web UI first.[/yellow]")
198
+ else:
199
+ console.print(f"[red]Error: {e}[/red]")
200
+ sys.exit(1)
201
+
202
+ # Check if reconnecting (session already in_progress)
203
+ is_reconnect = session_info.get("status") == "in_progress"
204
+
205
+ if is_reconnect and not force_setup:
206
+ console.print("[yellow]Reconnecting to existing session...[/yellow]")
207
+ console.print()
208
+
209
+ console.print(
210
+ Panel(
211
+ f"[bold]Task:[/bold] {session_info['task_id']}\n"
212
+ f"[bold]Benchmark:[/bold] {session_info['benchmark']}\n"
213
+ f"[bold]Mode:[/bold] {session_info['mode']}",
214
+ title="Session Joined",
215
+ )
216
+ )
217
+ console.print()
218
+
219
+ # Import runner components
220
+ import time
221
+ from hte_cli.events import EventStreamer
222
+ from hte_cli.runner import TaskRunner
223
+ from hte_cli.image_utils import extract_images_from_compose, pull_image_with_progress
224
+
225
+ # Create event streamer
226
+ events = EventStreamer(api, session_id)
227
+
228
+ # Step 2: Download task files and compose (skip if reconnecting without force)
229
+ files_zip = None
230
+ compose_yaml = None
231
+
232
+ if not is_reconnect or force_setup:
233
+ console.print("[bold]Step 1:[/bold] Downloading task resources...")
234
+
235
+ with console.status("[dim]Fetching task files...[/dim]"):
236
+ try:
237
+ files_zip = api.get_session_files(session_id)
238
+ console.print(" [green]✓[/green] Task files downloaded")
239
+ except APIError as e:
240
+ console.print(f" [dim]○ No task files (optional)[/dim]")
241
+ files_zip = None
242
+
243
+ with console.status("[dim]Fetching compose configuration...[/dim]"):
244
+ try:
245
+ compose_yaml = api.get_session_compose(session_id)
246
+ console.print(" [green]✓[/green] Docker compose downloaded")
247
+ except APIError as e:
248
+ console.print(f" [dim]○ No compose file (optional)[/dim]")
249
+ compose_yaml = None
250
+
251
+ console.print()
252
+
253
+ # Validate compose for sandbox-required benchmarks
254
+ benchmark = session_info.get("benchmark", "").lower()
255
+ SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
256
+ if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml and not is_reconnect:
257
+ console.print(
258
+ f"[red]Error: {benchmark} requires a Docker sandbox but no compose file was found.[/red]"
259
+ )
260
+ console.print()
261
+ console.print(
262
+ f"Please contact support: {SUPPORT_EMAIL}"
263
+ )
264
+ sys.exit(1)
265
+
266
+ # Build assignment dict for runner compatibility
267
+ assignment = {
268
+ "assignment_id": session_info.get("assignment_id"),
269
+ "session_id": session_id,
270
+ "task_id": session_info["task_id"],
271
+ "benchmark": session_info["benchmark"],
272
+ "mode": session_info["mode"],
273
+ "time_cap_seconds": session_info.get("time_cap_seconds"),
274
+ "task": {
275
+ "instructions": session_info.get("instructions", ""),
276
+ "metadata": session_info.get("metadata", {}),
277
+ "scorer_type": session_info.get("scorer_type"),
278
+ "intermediate_scoring": session_info.get("intermediate_scoring", False),
279
+ },
280
+ }
281
+
282
+ # Step 3: Run setup (skip if reconnecting without force)
283
+ setup_start_time = time.monotonic()
284
+ images = []
285
+ pulled_images = []
286
+ cached_images = []
287
+ failed_images = []
288
+
289
+ if not is_reconnect or force_setup:
290
+ # Extract images from compose
291
+ if compose_yaml:
292
+ images = extract_images_from_compose(compose_yaml)
293
+
294
+ # Send setup_started event
295
+ events.setup_started(images=images)
296
+
297
+ # Pull images if we have any
298
+ if images:
299
+ from hte_cli.image_utils import check_image_exists_locally
300
+
301
+ console.print(f"[bold]Step 2:[/bold] Pulling {len(images)} Docker image(s)...")
302
+ pull_start = time.monotonic()
303
+
304
+ for img in images:
305
+ short_name = img.split("/")[-1][:40]
306
+
307
+ # Check if already cached
308
+ if check_image_exists_locally(img):
309
+ console.print(f" [green]✓[/green] {short_name} [dim](cached)[/dim]")
310
+ cached_images.append(img)
311
+ continue
312
+
313
+ # Need to pull - show progress
314
+ last_status = ["connecting..."]
315
+ with console.status(f"[yellow]↓[/yellow] {short_name} [dim]connecting...[/dim]") as status:
316
+ def show_progress(image: str, line: str):
317
+ # Show docker output directly - includes MB progress from PTY
318
+ # Lines look like: "abc123: Downloading 360.9MB/4.075GB"
319
+ if ": " in line:
320
+ parts = line.split(": ", 1)
321
+ if len(parts) == 2:
322
+ layer_id = parts[0][-8:]
323
+ layer_status = parts[1][:45]
324
+ display = f"{layer_id}: {layer_status}"
325
+ if display != last_status[0]:
326
+ last_status[0] = display
327
+ status.update(f"[yellow]↓[/yellow] {short_name} [dim]{display}[/dim]")
328
+
329
+ success = pull_image_with_progress(img, on_progress=show_progress)
330
+
331
+ if success:
332
+ console.print(f" [green]✓[/green] {short_name} [dim](downloaded)[/dim]")
333
+ pulled_images.append(img)
334
+ else:
335
+ console.print(f" [red]✗[/red] {short_name} [dim](failed)[/dim]")
336
+ failed_images.append(img)
337
+
338
+ pull_duration = time.monotonic() - pull_start
339
+ events.image_pull_completed(
340
+ duration_seconds=pull_duration,
341
+ pulled=pulled_images,
342
+ cached=cached_images,
343
+ failed=failed_images,
344
+ )
345
+ console.print()
346
+
347
+ # Send setup_completed - THIS STARTS THE TIMER ON SERVER
348
+ total_setup = time.monotonic() - setup_start_time
349
+ events.setup_completed(total_seconds=total_setup)
350
+ console.print("[green]Environment ready! Timer started.[/green]")
351
+ console.print()
352
+ else:
353
+ # Reconnecting - compose should already be running
354
+ console.print("[dim]Skipping setup (use --force-setup to re-run)[/dim]")
355
+ console.print()
356
+
357
+ # Check if session was cancelled during setup
358
+ try:
359
+ updated_session = api.join_session(session_id)
360
+ if updated_session.get("status") == "cancelled":
361
+ console.print("[yellow]Session was cancelled. Exiting.[/yellow]")
362
+ sys.exit(0)
363
+ except APIError:
364
+ pass # Continue if we can't check - server might be temporarily unavailable
365
+
366
+ # Step 4: Show instructions
367
+ if session_info.get("instructions"):
368
+ console.print(Panel(session_info["instructions"], title="Task Instructions"))
369
+ console.print()
370
+
371
+ # Step 3: Run the task using TaskRunner
372
+ step_num = "3" if (not is_reconnect or force_setup) and images else "2" if (not is_reconnect or force_setup) else "1"
373
+ console.print(f"[bold]Step {step_num}:[/bold] Starting task environment...")
374
+ console.print("[dim]Launching Docker containers...[/dim]")
375
+ console.print()
376
+
377
+ events.docker_started()
378
+
379
+ runner = TaskRunner()
380
+ eval_log_bytes = None
381
+ try:
382
+ result = runner.run_from_assignment(
383
+ assignment=assignment,
384
+ compose_yaml=compose_yaml,
385
+ files_zip=files_zip,
386
+ )
387
+ # Read eval log before cleanup
388
+ if result.eval_log_path and result.eval_log_path.exists():
389
+ eval_log_bytes = result.eval_log_path.read_bytes()
390
+ except KeyboardInterrupt:
391
+ events.docker_stopped(exit_code=130)
392
+ console.print()
393
+ console.print("[yellow]Interrupted. Session remains active - you can reconnect later.[/yellow]")
394
+ sys.exit(0)
395
+ except Exception as e:
396
+ events.docker_stopped(exit_code=1)
397
+ console.print(f"[red]Task execution failed: {e}[/red]")
398
+ sys.exit(1)
399
+ finally:
400
+ runner.cleanup()
401
+
402
+ events.docker_stopped(exit_code=0)
403
+
404
+ # Step 6: Upload result
405
+ if result and result.answer:
406
+ events.session_completed(
407
+ elapsed_seconds=result.time_seconds,
408
+ answer=result.answer,
409
+ )
410
+
411
+ console.print()
412
+ console.print("[green]Task completed![/green]")
413
+ console.print(f"Answer: {result.answer}")
414
+ console.print(f"Time: {result.time_seconds:.1f}s")
415
+
416
+ # Upload to server
417
+ with Progress(
418
+ SpinnerColumn(),
419
+ TextColumn("[progress.description]{task.description}"),
420
+ console=console,
421
+ ) as progress:
422
+ progress.add_task("Uploading result...", total=None)
423
+ try:
424
+ upload_result = api.upload_result(
425
+ session_id=session_id,
426
+ answer=result.answer or "",
427
+ client_active_seconds=result.time_seconds,
428
+ eval_log_bytes=eval_log_bytes,
429
+ score=result.score,
430
+ score_binarized=result.score_binarized,
431
+ agent_id=None, # TODO: extract from task files if needed
432
+ )
433
+ except APIError as e:
434
+ console.print(f"[red]Failed to upload result: {e}[/red]")
435
+ sys.exit(1)
436
+
437
+ if upload_result.get("score") is not None:
438
+ console.print(f"Score: {upload_result['score']}")
439
+
440
+ console.print()
441
+ console.print("[green]Done! Return to the web UI to see your results.[/green]")
442
+
443
+
444
+ # =============================================================================
445
+ # Tasks Commands (DEPRECATED - use 'session join' instead)
151
446
  # =============================================================================
152
447
 
153
448
 
154
449
  @cli.group()
155
450
  def tasks():
156
- """Task commands."""
451
+ """Task commands (deprecated - use 'session join' instead)."""
157
452
  pass
158
453
 
159
454
 
@@ -223,7 +518,23 @@ def tasks_list(ctx):
223
518
  @click.argument("task_id", required=False)
224
519
  @click.pass_context
225
520
  def tasks_run(ctx, task_id: str | None):
226
- """Run a task (default: highest priority pending task)."""
521
+ """[DEPRECATED] Run a task - use 'session join' instead."""
522
+ console.print()
523
+ console.print("[red]This command is deprecated.[/red]")
524
+ console.print()
525
+ console.print("The new workflow is:")
526
+ console.print(" 1. Start the task from the web UI: https://cyber-task-horizons.com")
527
+ console.print(" 2. Run the command shown: [bold]hte-cli session join <session_id>[/bold]")
528
+ console.print()
529
+ console.print("This ensures accurate timing by starting the timer only when")
530
+ console.print("the environment is ready, not including Docker setup time.")
531
+ console.print()
532
+ sys.exit(1)
533
+
534
+
535
+ # Keep the old implementation as _tasks_run_legacy for testing if needed
536
+ def _tasks_run_legacy(ctx, task_id: str | None):
537
+ """Legacy implementation of tasks run (for testing only)."""
227
538
  config: Config = ctx.obj["config"]
228
539
 
229
540
  if not config.is_authenticated():
hte_cli/image_utils.py CHANGED
@@ -1,6 +1,10 @@
1
1
  """Docker image utilities for pre-pulling compose images."""
2
2
 
3
3
  import logging
4
+ import os
5
+ import pty
6
+ import re
7
+ import select
4
8
  import subprocess
5
9
  from collections.abc import Callable
6
10
 
@@ -61,32 +65,77 @@ def pull_image_with_progress(
61
65
  on_complete: Callable[[str, bool], None] | None = None,
62
66
  ) -> bool:
63
67
  """
64
- Pull a Docker image with progress callbacks.
68
+ Pull a Docker image with progress callbacks using PTY for real progress output.
65
69
 
66
70
  Args:
67
71
  image: Image name to pull
68
- on_progress: Callback(image, status_line) called for each line of output
72
+ on_progress: Callback(image, status_line) called for each progress update
69
73
  on_complete: Callback(image, success) called when pull completes
70
74
 
71
75
  Returns:
72
76
  True if pull succeeded, False otherwise
73
77
  """
74
78
  try:
79
+ # Use PTY to get real progress output from docker
80
+ master_fd, slave_fd = pty.openpty()
81
+
75
82
  process = subprocess.Popen(
76
83
  ["docker", "pull", image],
77
- stdout=subprocess.PIPE,
78
- stderr=subprocess.STDOUT,
79
- text=True,
80
- bufsize=1,
84
+ stdout=slave_fd,
85
+ stderr=slave_fd,
86
+ stdin=slave_fd,
87
+ close_fds=True,
81
88
  )
82
89
 
83
- # Stream output line by line
84
- for line in iter(process.stdout.readline, ""):
85
- line = line.strip()
86
- if line and on_progress:
87
- on_progress(image, line)
90
+ os.close(slave_fd) # Close slave in parent
91
+
92
+ # Read output from master with timeout
93
+ output_buffer = ""
94
+ # Regex to parse docker progress: "abc123: Downloading [===> ] 10.5MB/50MB"
95
+ progress_pattern = re.compile(
96
+ r"([a-f0-9]+):\s*(Downloading|Extracting|Verifying Checksum|Download complete|Pull complete|Already exists|Waiting)(?:\s+\[.*?\]\s+)?(\d+\.?\d*\s*[kMG]?B)?(?:/(\d+\.?\d*\s*[kMG]?B))?"
97
+ )
88
98
 
89
- process.wait()
99
+ while True:
100
+ # Check if process is done
101
+ ret = process.poll()
102
+ if ret is not None:
103
+ # Read any remaining output
104
+ try:
105
+ while True:
106
+ ready, _, _ = select.select([master_fd], [], [], 0.1)
107
+ if not ready:
108
+ break
109
+ chunk = os.read(master_fd, 4096)
110
+ if not chunk:
111
+ break
112
+ except OSError:
113
+ pass
114
+ break
115
+
116
+ # Read available output
117
+ try:
118
+ ready, _, _ = select.select([master_fd], [], [], 0.1)
119
+ if ready:
120
+ chunk = os.read(master_fd, 4096)
121
+ if chunk:
122
+ output_buffer += chunk.decode("utf-8", errors="replace")
123
+
124
+ # Parse and report progress
125
+ # Docker uses carriage returns to update lines in place
126
+ lines = output_buffer.replace("\r", "\n").split("\n")
127
+ output_buffer = lines[-1] # Keep incomplete line
128
+
129
+ for line in lines[:-1]:
130
+ line = line.strip()
131
+ # Strip ANSI escape codes
132
+ line = re.sub(r"\x1b\[[0-9;]*[a-zA-Z]", "", line)
133
+ if line and on_progress:
134
+ on_progress(image, line)
135
+ except OSError:
136
+ break
137
+
138
+ os.close(master_fd)
90
139
  success = process.returncode == 0
91
140
 
92
141
  if on_complete:
@@ -94,7 +143,7 @@ def pull_image_with_progress(
94
143
 
95
144
  return success
96
145
 
97
- except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
146
+ except (FileNotFoundError, OSError) as e:
98
147
  logger.error(f"Failed to pull {image}: {e}")
99
148
  if on_complete:
100
149
  on_complete(image, False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.1.28
3
+ Version: 0.2.16
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -23,6 +23,10 @@ Requires-Dist: platformdirs>=4.0
23
23
  Requires-Dist: pydantic>=2.0
24
24
  Requires-Dist: pyyaml>=6.0
25
25
  Requires-Dist: rich>=13.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pexpect>=4.8; extra == 'dev'
28
+ Requires-Dist: pytest>=7.0; extra == 'dev'
29
+ Requires-Dist: requests>=2.28; extra == 'dev'
26
30
  Description-Content-Type: text/markdown
27
31
 
28
32
  # hte-cli
@@ -1,15 +1,15 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
- hte_cli/api_client.py,sha256=mO4buDND5cIWESg4gSKb8WkdA1iPwkmTa0L3xL6lvNQ,8153
4
- hte_cli/cli.py,sha256=yxqh-NacsrILjidE7CD2IHQ7fzZyaDCsPgEFl4lCG_w,29328
3
+ hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
+ hte_cli/cli.py,sha256=XpV7x2HcHlVg-ynr-Ih8MxbHc74PT9mQzCICavtpV_0,41623
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
7
  hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
8
- hte_cli/image_utils.py,sha256=454yoZEI1duNYrZC8UjhfZzDRP4Nxdrf2TvnZ_54G1k,4439
8
+ hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
9
9
  hte_cli/runner.py,sha256=DhC8FMjHwfLR193iP4thLDRZrNssYA9KH1WYKU2JKeg,13535
10
10
  hte_cli/scorers.py,sha256=sFoPJePRt-K191-Ga4cVmrldruJclYXTOLkU_C9nCDI,6025
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.1.28.dist-info/METADATA,sha256=JRF7leLDC5EpYXnq6yBVb9YNMpvI8Ijgodnvh5gvMqs,3615
13
- hte_cli-0.1.28.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.1.28.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.1.28.dist-info/RECORD,,
12
+ hte_cli-0.2.16.dist-info/METADATA,sha256=pNGPVDfi_RV-IWuC3pGzrqJCUzXgA2rf9-jXI2tNk3s,3768
13
+ hte_cli-0.2.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.16.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.16.dist-info/RECORD,,