hte-cli 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hte_cli/cli.py +1 -0
- hte_cli/runner.py +23 -8
- hte_cli/scorers.py +11 -2
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/METADATA +1 -1
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/RECORD +7 -7
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/WHEEL +0 -0
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/entry_points.txt +0 -0
hte_cli/cli.py
CHANGED
|
@@ -274,6 +274,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
274
274
|
"task": {
|
|
275
275
|
"instructions": session_info.get("instructions", ""),
|
|
276
276
|
"metadata": session_info.get("metadata", {}),
|
|
277
|
+
"target": session_info.get("target", ""),
|
|
277
278
|
"scorer_type": session_info.get("scorer_type"),
|
|
278
279
|
"intermediate_scoring": session_info.get("intermediate_scoring", False),
|
|
279
280
|
},
|
hte_cli/runner.py
CHANGED
|
@@ -364,16 +364,31 @@ class TaskRunner:
|
|
|
364
364
|
task_id = assignment["task_id"]
|
|
365
365
|
task_data = assignment.get("task", {})
|
|
366
366
|
instructions = task_data.get("instructions", "")
|
|
367
|
-
# Target
|
|
368
|
-
target = task_data.get("target", "")
|
|
369
|
-
# Fallback to dataset_task_metadata.flag for CTF benchmarks (nyuctf, cybench, etc.)
|
|
367
|
+
# Target comes from backend (preferred) or can be extracted from metadata
|
|
368
|
+
target = task_data.get("target", "")
|
|
370
369
|
if not target or target == "?":
|
|
371
|
-
|
|
372
|
-
|
|
370
|
+
# Fallback to metadata fields for various benchmarks
|
|
371
|
+
meta = task_data.get("metadata", {})
|
|
372
|
+
target = (
|
|
373
|
+
meta.get("target", "")
|
|
374
|
+
or meta.get("solution_flag", "") # intercode-ctf
|
|
375
|
+
or meta.get("flag", "") # cybench, nyuctf
|
|
376
|
+
or meta.get("answer", "") # generic
|
|
377
|
+
)
|
|
373
378
|
|
|
374
|
-
# Extract scoring configuration from backend
|
|
375
|
-
scorer_type = task_data
|
|
376
|
-
|
|
379
|
+
# Extract scoring configuration from backend (required fields)
|
|
380
|
+
scorer_type = task_data.get("scorer_type")
|
|
381
|
+
if not scorer_type:
|
|
382
|
+
raise ValueError(
|
|
383
|
+
f"Backend did not return scorer_type for task {task_id}. "
|
|
384
|
+
"This is a backend configuration error."
|
|
385
|
+
)
|
|
386
|
+
intermediate_scoring = task_data.get("intermediate_scoring")
|
|
387
|
+
if intermediate_scoring is None:
|
|
388
|
+
raise ValueError(
|
|
389
|
+
f"Backend did not return intermediate_scoring for task {task_id}. "
|
|
390
|
+
"This is a backend configuration error."
|
|
391
|
+
)
|
|
377
392
|
|
|
378
393
|
return self.run(
|
|
379
394
|
task_id=task_id,
|
hte_cli/scorers.py
CHANGED
|
@@ -137,16 +137,25 @@ def get_scorer(scorer_type: ScorerType | str, target: str) -> Scorer | None:
|
|
|
137
137
|
Get the appropriate scorer based on type.
|
|
138
138
|
|
|
139
139
|
Matches *_human.py implementations for human/AI parity.
|
|
140
|
+
Raises ValueError for invalid configurations.
|
|
140
141
|
"""
|
|
141
142
|
# Handle string input
|
|
142
143
|
if isinstance(scorer_type, str):
|
|
143
144
|
try:
|
|
144
145
|
scorer_type = ScorerType(scorer_type)
|
|
145
146
|
except ValueError:
|
|
146
|
-
|
|
147
|
+
raise ValueError(
|
|
148
|
+
f"Unknown scorer type: {scorer_type}. "
|
|
149
|
+
f"Valid types: {[s.value for s in ScorerType]}"
|
|
150
|
+
)
|
|
147
151
|
|
|
148
152
|
if scorer_type == ScorerType.FLAG_INCLUDES:
|
|
149
|
-
|
|
153
|
+
if not target:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
"FLAG_INCLUDES scorer requires a target flag but none was provided. "
|
|
156
|
+
"Backend should return 'target' in session info."
|
|
157
|
+
)
|
|
158
|
+
return includes()
|
|
150
159
|
elif scorer_type == ScorerType.LLM_BASH:
|
|
151
160
|
# LLM-based scoring happens server-side, no client scorer
|
|
152
161
|
return None
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
|
|
2
2
|
hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
|
|
3
3
|
hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
|
|
4
|
-
hte_cli/cli.py,sha256=
|
|
4
|
+
hte_cli/cli.py,sha256=cJ4g9UgBXHfmcNe4mu9imL8DSKkYzVDp8sR785z8h8M,42315
|
|
5
5
|
hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
|
|
6
6
|
hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
|
|
7
7
|
hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
|
|
8
8
|
hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
|
|
9
|
-
hte_cli/runner.py,sha256=
|
|
10
|
-
hte_cli/scorers.py,sha256=
|
|
9
|
+
hte_cli/runner.py,sha256=dTlro9AJa6YUxdiyJInOpudHPw5S207LTasiJ2NL_nQ,14059
|
|
10
|
+
hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
|
|
11
11
|
hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
|
|
12
|
-
hte_cli-0.2.
|
|
13
|
-
hte_cli-0.2.
|
|
14
|
-
hte_cli-0.2.
|
|
15
|
-
hte_cli-0.2.
|
|
12
|
+
hte_cli-0.2.18.dist-info/METADATA,sha256=RTwT4AZ8rEWsQNv-jfwTlQqgHy4kRehIR7Ll7PPvEJM,3768
|
|
13
|
+
hte_cli-0.2.18.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
14
|
+
hte_cli-0.2.18.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
|
|
15
|
+
hte_cli-0.2.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|