hte-cli 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hte_cli/cli.py CHANGED
@@ -274,6 +274,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
274
274
  "task": {
275
275
  "instructions": session_info.get("instructions", ""),
276
276
  "metadata": session_info.get("metadata", {}),
277
+ "target": session_info.get("target", ""),
277
278
  "scorer_type": session_info.get("scorer_type"),
278
279
  "intermediate_scoring": session_info.get("intermediate_scoring", False),
279
280
  },
hte_cli/runner.py CHANGED
@@ -364,16 +364,31 @@ class TaskRunner:
364
364
  task_id = assignment["task_id"]
365
365
  task_data = assignment.get("task", {})
366
366
  instructions = task_data.get("instructions", "")
367
- # Target can be at task level, in metadata, or in dataset_task_metadata.flag
368
- target = task_data.get("target", "") or task_data.get("metadata", {}).get("target", "")
369
- # Fallback to dataset_task_metadata.flag for CTF benchmarks (nyuctf, cybench, etc.)
367
+ # Target comes from backend (preferred) or can be extracted from metadata
368
+ target = task_data.get("target", "")
370
369
  if not target or target == "?":
371
- dataset_meta = task_data.get("dataset_task_metadata", {})
372
- target = dataset_meta.get("flag", "") or dataset_meta.get("answer", "")
370
+ # Fallback to metadata fields for various benchmarks
371
+ meta = task_data.get("metadata", {})
372
+ target = (
373
+ meta.get("target", "")
374
+ or meta.get("solution_flag", "") # intercode-ctf
375
+ or meta.get("flag", "") # cybench, nyuctf
376
+ or meta.get("answer", "") # generic
377
+ )
373
378
 
374
- # Extract scoring configuration from backend
375
- scorer_type = task_data["scorer_type"]
376
- intermediate_scoring = task_data["intermediate_scoring"]
379
+ # Extract scoring configuration from backend (required fields)
380
+ scorer_type = task_data.get("scorer_type")
381
+ if not scorer_type:
382
+ raise ValueError(
383
+ f"Backend did not return scorer_type for task {task_id}. "
384
+ "This is a backend configuration error."
385
+ )
386
+ intermediate_scoring = task_data.get("intermediate_scoring")
387
+ if intermediate_scoring is None:
388
+ raise ValueError(
389
+ f"Backend did not return intermediate_scoring for task {task_id}. "
390
+ "This is a backend configuration error."
391
+ )
377
392
 
378
393
  return self.run(
379
394
  task_id=task_id,
hte_cli/scorers.py CHANGED
@@ -137,16 +137,25 @@ def get_scorer(scorer_type: ScorerType | str, target: str) -> Scorer | None:
137
137
  Get the appropriate scorer based on type.
138
138
 
139
139
  Matches *_human.py implementations for human/AI parity.
140
+ Raises ValueError for invalid configurations.
140
141
  """
141
142
  # Handle string input
142
143
  if isinstance(scorer_type, str):
143
144
  try:
144
145
  scorer_type = ScorerType(scorer_type)
145
146
  except ValueError:
146
- return None
147
+ raise ValueError(
148
+ f"Unknown scorer type: {scorer_type}. "
149
+ f"Valid types: {[s.value for s in ScorerType]}"
150
+ )
147
151
 
148
152
  if scorer_type == ScorerType.FLAG_INCLUDES:
149
- return includes() if target else None
153
+ if not target:
154
+ raise ValueError(
155
+ "FLAG_INCLUDES scorer requires a target flag but none was provided. "
156
+ "Backend should return 'target' in session info."
157
+ )
158
+ return includes()
150
159
  elif scorer_type == ScorerType.LLM_BASH:
151
160
  # LLM-based scoring happens server-side, no client scorer
152
161
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hte-cli
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: Human Time-to-Completion Evaluation CLI
5
5
  Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
6
6
  Author: Lyptus Research
@@ -1,15 +1,15 @@
1
1
  hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
2
2
  hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
3
3
  hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
4
- hte_cli/cli.py,sha256=lyCuhnSiViTKpWYAeVQ1C4D5idg4FRh_q39PXat7HXg,42261
4
+ hte_cli/cli.py,sha256=cJ4g9UgBXHfmcNe4mu9imL8DSKkYzVDp8sR785z8h8M,42315
5
5
  hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
6
6
  hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
7
7
  hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
8
8
  hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
9
- hte_cli/runner.py,sha256=DhC8FMjHwfLR193iP4thLDRZrNssYA9KH1WYKU2JKeg,13535
10
- hte_cli/scorers.py,sha256=sFoPJePRt-K191-Ga4cVmrldruJclYXTOLkU_C9nCDI,6025
9
+ hte_cli/runner.py,sha256=dTlro9AJa6YUxdiyJInOpudHPw5S207LTasiJ2NL_nQ,14059
10
+ hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
11
11
  hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
12
- hte_cli-0.2.17.dist-info/METADATA,sha256=tVT3em2qTUXNMZeWKRk1vHp2fuO5SV3kl43dzPyCqEE,3768
13
- hte_cli-0.2.17.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
- hte_cli-0.2.17.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
- hte_cli-0.2.17.dist-info/RECORD,,
12
+ hte_cli-0.2.18.dist-info/METADATA,sha256=RTwT4AZ8rEWsQNv-jfwTlQqgHy4kRehIR7Ll7PPvEJM,3768
13
+ hte_cli-0.2.18.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
14
+ hte_cli-0.2.18.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
15
+ hte_cli-0.2.18.dist-info/RECORD,,