hte-cli 0.1.20__tar.gz → 0.1.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.1.20
3
+ Version: 0.1.22
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "hte-cli"
3
- version = "0.1.20"
3
+ version = "0.1.22"
4
4
  description = "Human Time-to-Completion Evaluation CLI"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -333,6 +333,7 @@ def tasks_run(ctx, task_id: str | None):
333
333
  console.print(f"[dim]Note: {e}[/dim]")
334
334
 
335
335
  # Validate compose for sandbox-required benchmarks
336
+ benchmark = assignment.get("benchmark", "").lower()
336
337
  SANDBOX_REQUIRED_BENCHMARKS = {"cybench", "intercode-ctf", "cvebench", "cybergym", "nyuctf"}
337
338
  if benchmark in SANDBOX_REQUIRED_BENCHMARKS and not compose_yaml:
338
339
  console.print(
@@ -354,7 +355,6 @@ def tasks_run(ctx, task_id: str | None):
354
355
  )
355
356
 
356
357
  # Step 4: Show pre-task message (benchmark-specific)
357
- benchmark = assignment.get("benchmark", "").lower()
358
358
  single_shot_benchmarks = {"nl2bash", "cybashbench"}
359
359
 
360
360
  console.print()
@@ -398,72 +398,134 @@ def tasks_run(ctx, task_id: str | None):
398
398
 
399
399
  # Step 5: Pre-pull Docker images with progress
400
400
  from hte_cli.runner import prepull_compose_images, extract_images_from_compose
401
+ import re
401
402
 
402
403
  if compose_yaml:
403
404
  images = extract_images_from_compose(compose_yaml)
404
405
  if images:
405
406
  console.print()
406
- console.print("[bold]Preparing Docker environment...[/bold]")
407
-
408
- # Track pull state for live display
409
- pull_status = {}
410
-
411
- def on_image_start(image: str, idx: int, total: int):
412
- pull_status[image] = {"status": "pulling", "line": "Starting..."}
407
+ console.print(f"[bold]Preparing Docker environment ({len(images)} images)...[/bold]")
408
+
409
+ # Track layer progress per image: {layer_id: (status, downloaded_mb, total_mb)}
410
+ image_layers: dict[str, dict[str, tuple[str, float, float]]] = {}
411
+
412
+ def parse_size(size_str: str) -> float:
413
+ """Parse size string like '1.2MB' or '500kB' to MB."""
414
+ size_str = size_str.strip().upper()
415
+ if "GB" in size_str:
416
+ return float(size_str.replace("GB", "").strip()) * 1024
417
+ elif "MB" in size_str:
418
+ return float(size_str.replace("MB", "").strip())
419
+ elif "KB" in size_str:
420
+ return float(size_str.replace("KB", "").strip()) / 1024
421
+ elif "B" in size_str:
422
+ return float(size_str.replace("B", "").strip()) / (1024 * 1024)
423
+ return 0
424
+
425
+ def parse_docker_line(line: str) -> tuple[str | None, str, float, float]:
426
+ """Parse Docker pull output to extract layer ID, status, and sizes.
427
+
428
+ Returns: (layer_id, status, downloaded_mb, total_mb)
429
+ """
430
+ # Format: "79f742de2855: Downloading [==>] 1.2MB/50MB"
431
+ # Or: "79f742de2855: Pull complete"
432
+ match = re.match(r"([a-f0-9]+): (.+)", line)
433
+ if not match:
434
+ return None, "", 0, 0
435
+
436
+ layer_id = match.group(1)
437
+ status_part = match.group(2)
438
+
439
+ # Try to extract size info from "Downloading [==>] 1.2MB/50MB"
440
+ size_match = re.search(r"([\d.]+[kKmMgG]?[bB]?)/([\d.]+[kKmMgG]?[bB])", status_part)
441
+ if size_match:
442
+ downloaded = parse_size(size_match.group(1))
443
+ total = parse_size(size_match.group(2))
444
+ return layer_id, status_part, downloaded, total
445
+
446
+ return layer_id, status_part, 0, 0
447
+
448
+ def get_progress_summary(image: str) -> str:
449
+ """Get a human-readable progress summary for an image with MB counts."""
450
+ if image not in image_layers or not image_layers[image]:
451
+ return "Starting..."
452
+
453
+ layers = image_layers[image]
454
+ total_layers = len(layers)
455
+ complete = sum(1 for s, _, _ in layers.values() if "complete" in s.lower())
456
+
457
+ # Sum up download progress
458
+ total_downloaded_mb = 0
459
+ total_size_mb = 0
460
+ for status, downloaded, total in layers.values():
461
+ if "complete" in status.lower():
462
+ # Completed layers: use total as both downloaded and total
463
+ total_downloaded_mb += total
464
+ total_size_mb += total
465
+ elif total > 0:
466
+ total_downloaded_mb += downloaded
467
+ total_size_mb += total
468
+
469
+ if complete == total_layers and total_layers > 0:
470
+ if total_size_mb > 0:
471
+ return f"Done ({total_size_mb:.0f}MB)"
472
+ return f"Done ({total_layers} layers)"
473
+ elif total_size_mb > 0:
474
+ return f"{total_downloaded_mb:.0f}MB / {total_size_mb:.0f}MB"
475
+ elif complete > 0:
476
+ return f"Pulling ({complete}/{total_layers} layers)"
477
+ else:
478
+ return f"Preparing ({total_layers} layers)"
413
479
 
414
480
  def on_image_progress(image: str, line: str):
415
- # Update the status line (truncate long lines)
416
- display_line = line[:60] + "..." if len(line) > 60 else line
417
- pull_status[image] = {"status": "pulling", "line": display_line}
418
-
419
- def on_image_complete(image: str, success: bool, reason: str):
420
- if reason == "cached":
421
- pull_status[image] = {"status": "cached", "line": "Using cached image"}
422
- elif success:
423
- pull_status[image] = {"status": "done", "line": "Pull complete"}
481
+ """Track layer-level progress with size info."""
482
+ if image not in image_layers:
483
+ image_layers[image] = {}
484
+
485
+ layer_id, status, downloaded, total = parse_docker_line(line)
486
+ if layer_id:
487
+ image_layers[image][layer_id] = (status, downloaded, total)
488
+
489
+ # Process images sequentially with clear output
490
+ results = []
491
+ for idx, img in enumerate(images, 1):
492
+ short_name = img.split("/")[-1] if "/" in img else img
493
+
494
+ # Check if cached first
495
+ from hte_cli.runner import check_image_exists_locally, pull_image_with_progress
496
+
497
+ if check_image_exists_locally(img):
498
+ console.print(f" [green]✓[/green] {short_name} [dim](cached)[/dim]")
499
+ results.append((img, True, "cached"))
500
+ continue
501
+
502
+ # Need to pull - show live progress
503
+ console.print(f" [yellow]↓[/yellow] {short_name} [dim]pulling...[/dim]", end="")
504
+
505
+ # Clear the line and show progress updates
506
+ image_layers[img] = {}
507
+ last_summary = ""
508
+
509
+ def show_progress(image: str, line: str):
510
+ nonlocal last_summary
511
+ on_image_progress(image, line)
512
+ summary = get_progress_summary(image)
513
+ if summary != last_summary:
514
+ # Clear line and rewrite
515
+ console.print(f"\r [yellow]↓[/yellow] {short_name} [dim]{summary}[/dim]" + " " * 20, end="")
516
+ last_summary = summary
517
+
518
+ success = pull_image_with_progress(img, on_progress=show_progress)
519
+
520
+ # Final status
521
+ if success:
522
+ console.print(f"\r [green]✓[/green] {short_name} [dim](downloaded)[/dim]" + " " * 30)
523
+ results.append((img, True, "pulled"))
424
524
  else:
425
- pull_status[image] = {"status": "failed", "line": "Pull failed"}
426
-
427
- # Show progress for each image
428
- with Progress(
429
- SpinnerColumn(),
430
- TextColumn("[progress.description]{task.description}"),
431
- TextColumn("[dim]{task.fields[status]}[/dim]"),
432
- console=console,
433
- transient=False,
434
- ) as progress:
435
- # Create tasks for each image
436
- image_tasks = {}
437
- for img in images:
438
- short_name = img.split("/")[-1] if "/" in img else img
439
- image_tasks[img] = progress.add_task(
440
- f"[cyan]{short_name}[/cyan]",
441
- total=None,
442
- status="checking...",
443
- )
444
-
445
- pulled, failed = prepull_compose_images(
446
- compose_yaml,
447
- on_image_start=lambda img, idx, total: progress.update(
448
- image_tasks[img], status="pulling..."
449
- ),
450
- on_image_progress=lambda img, line: progress.update(
451
- image_tasks[img],
452
- status=line[:50] + "..." if len(line) > 50 else line,
453
- ),
454
- on_image_complete=lambda img, ok, reason: progress.update(
455
- image_tasks[img],
456
- status=(
457
- "[green]cached[/green]"
458
- if reason == "cached"
459
- else "[green]ready[/green]"
460
- if ok
461
- else "[red]failed[/red]"
462
- ),
463
- completed=True,
464
- ),
465
- )
525
+ console.print(f"\r [red]✗[/red] {short_name} [dim](failed)[/dim]" + " " * 30)
526
+ results.append((img, False, "failed"))
466
527
 
528
+ failed = sum(1 for _, ok, _ in results if not ok)
467
529
  if failed > 0:
468
530
  console.print(
469
531
  f"[yellow]Warning: {failed} image(s) failed to pull. "
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes