hte-cli 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/cli.py CHANGED
@@ -274,6 +274,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
274
274
  "task": {
275
275
  "instructions": session_info.get("instructions", ""),
276
276
  "metadata": session_info.get("metadata", {}),
277
+ "target": session_info.get("target", ""),
277
278
  "scorer_type": session_info.get("scorer_type"),
278
279
  "intermediate_scoring": session_info.get("intermediate_scoring", False),
279
280
  },
hte_cli/runner.py CHANGED
@@ -271,12 +271,22 @@ class TaskRunner:
271
271
  file_dest_base = _get_file_dest_from_compose(compose_path)
272
272
 
273
273
  files_to_mount: dict[str, str] = {}
274
- excluded_files = {"compose.yaml", "README.md", "instructions.txt"}
275
- for file_path in task_dir.iterdir():
276
- if file_path.is_file() and file_path.name not in excluded_files:
277
- dest_path = f"{file_dest_base}/{file_path.name}"
278
- files_to_mount[dest_path] = str(file_path)
279
- logger.info(f"Will mount file: {file_path.name} -> {dest_path}")
274
+ excluded_files = {"compose.yaml", "README.md", "instructions.txt", "logs"}
275
+
276
+ # Walk directory tree recursively to handle nested file structures
277
+ # (e.g., cybench tasks with blog/, nginx/, rcache/ subdirectories)
278
+ for file_path in task_dir.rglob("*"):
279
+ if not file_path.is_file():
280
+ continue
281
+ # Get relative path from task_dir
282
+ rel_path = file_path.relative_to(task_dir)
283
+ # Skip excluded files (check both filename and first directory component)
284
+ if rel_path.name in excluded_files or rel_path.parts[0] in excluded_files:
285
+ continue
286
+ # Build destination path preserving directory structure
287
+ dest_path = f"{file_dest_base}/{rel_path}"
288
+ files_to_mount[dest_path] = str(file_path)
289
+ logger.info(f"Will mount file: {rel_path} -> {dest_path}")
280
290
 
281
291
  # Create the Inspect task
282
292
  inspect_task = self.create_inspect_task(
@@ -364,16 +374,31 @@ class TaskRunner:
364
374
  task_id = assignment["task_id"]
365
375
  task_data = assignment.get("task", {})
366
376
  instructions = task_data.get("instructions", "")
367
- # Target can be at task level, in metadata, or in dataset_task_metadata.flag
368
- target = task_data.get("target", "") or task_data.get("metadata", {}).get("target", "")
369
- # Fallback to dataset_task_metadata.flag for CTF benchmarks (nyuctf, cybench, etc.)
377
+ # Target comes from backend (preferred) or can be extracted from metadata
378
+ target = task_data.get("target", "")
370
379
  if not target or target == "?":
371
- dataset_meta = task_data.get("dataset_task_metadata", {})
372
- target = dataset_meta.get("flag", "") or dataset_meta.get("answer", "")
380
+ # Fallback to metadata fields for various benchmarks
381
+ meta = task_data.get("metadata", {})
382
+ target = (
383
+ meta.get("target", "")
384
+ or meta.get("solution_flag", "") # intercode-ctf
385
+ or meta.get("flag", "") # cybench, nyuctf
386
+ or meta.get("answer", "") # generic
387
+ )
373
388
 
374
- # Extract scoring configuration from backend
375
- scorer_type = task_data["scorer_type"]
376
- intermediate_scoring = task_data["intermediate_scoring"]
389
+ # Extract scoring configuration from backend (required fields)
390
+ scorer_type = task_data.get("scorer_type")
391
+ if not scorer_type:
392
+ raise ValueError(
393
+ f"Backend did not return scorer_type for task {task_id}. "
394
+ "This is a backend configuration error."
395
+ )
396
+ intermediate_scoring = task_data.get("intermediate_scoring")
397
+ if intermediate_scoring is None:
398
+ raise ValueError(
399
+ f"Backend did not return intermediate_scoring for task {task_id}. "
400
+ "This is a backend configuration error."
401
+ )
377
402
 
378
403
  return self.run(
379
404
  task_id=task_id,
hte_cli/scorers.py CHANGED
@@ -137,16 +137,25 @@ def get_scorer(scorer_type: ScorerType | str, target: str) -> Scorer | None:
137
137
  Get the appropriate scorer based on type.
138
138
 
139
139
  Matches *_human.py implementations for human/AI parity.
140
+ Raises ValueError for invalid configurations.
140
141
  """
141
142
  # Handle string input
142
143
  if isinstance(scorer_type, str):
143
144
  try:
144
145
  scorer_type = ScorerType(scorer_type)
145
146
  except ValueError:
146
- return None
147
+ raise ValueError(
148
+ f"Unknown scorer type: {scorer_type}. "
149
+ f"Valid types: {[s.value for s in ScorerType]}"
150
+ )
147
151
 
148
152
  if scorer_type == ScorerType.FLAG_INCLUDES:
149
- return includes() if target else None
153
+ if not target:
154
+ raise ValueError(
155
+ "FLAG_INCLUDES scorer requires a target flag but none was provided. "
156
+ "Backend should return 'target' in session info."
157
+ )
158
+ return includes()
150
159
  elif scorer_type == ScorerType.LLM_BASH:
151
160
  # LLM-based scoring happens server-side, no client scorer
152
161
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.17
3
+ Version: 0.2.19
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -25,6 +25,7 @@ Requires-Dist: pyyaml>=6.0
25
25
  Requires-Dist: rich>=13.0
26
26
  Provides-Extra: dev
27
27
  Requires-Dist: pexpect>=4.8; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
28
29
  Requires-Dist: pytest>=7.0; extra == 'dev'
29
30
  Requires-Dist: requests>=2.28; extra == 'dev'
30
31
  Description-Content-Type: text/markdown
@@ -1,15 +1,15 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
3
  hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
- hte_cli/cli.py,sha256=lyCuhnSiViTKpWYAeVQ1C4D5idg4FRh_q39PXat7HXg,42261
4
+ hte_cli/cli.py,sha256=cJ4g9UgBXHfmcNe4mu9imL8DSKkYzVDp8sR785z8h8M,42315
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
7
  hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
8
8
  hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
9
- hte_cli/runner.py,sha256=DhC8FMjHwfLR193iP4thLDRZrNssYA9KH1WYKU2JKeg,13535
10
- hte_cli/scorers.py,sha256=sFoPJePRt-K191-Ga4cVmrldruJclYXTOLkU_C9nCDI,6025
9
+ hte_cli/runner.py,sha256=SWl9FF4X3e9eBbZyL0ujhmmSL5OK8J6st-Ty0jD5AWM,14550
10
+ hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.2.17.dist-info/METADATA,sha256=tVT3em2qTUXNMZeWKRk1vHp2fuO5SV3kl43dzPyCqEE,3768
13
- hte_cli-0.2.17.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.2.17.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.2.17.dist-info/RECORD,,
12
+ hte_cli-0.2.19.dist-info/METADATA,sha256=-JBgXEkarLFsDv82zZ36XVsEoOgMiIQR5UFnEmJ8ssI,3820
13
+ hte_cli-0.2.19.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.19.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.19.dist-info/RECORD,,