hte-cli 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hte_cli/cli.py +1 -0
- hte_cli/runner.py +39 -14
- hte_cli/scorers.py +11 -2
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.19.dist-info}/METADATA +2 -1
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.19.dist-info}/RECORD +7 -7
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.19.dist-info}/WHEEL +0 -0
- {hte_cli-0.2.17.dist-info → hte_cli-0.2.19.dist-info}/entry_points.txt +0 -0
hte_cli/cli.py
CHANGED
|
@@ -274,6 +274,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
|
|
|
274
274
|
"task": {
|
|
275
275
|
"instructions": session_info.get("instructions", ""),
|
|
276
276
|
"metadata": session_info.get("metadata", {}),
|
|
277
|
+
"target": session_info.get("target", ""),
|
|
277
278
|
"scorer_type": session_info.get("scorer_type"),
|
|
278
279
|
"intermediate_scoring": session_info.get("intermediate_scoring", False),
|
|
279
280
|
},
|
hte_cli/runner.py
CHANGED
|
@@ -271,12 +271,22 @@ class TaskRunner:
|
|
|
271
271
|
file_dest_base = _get_file_dest_from_compose(compose_path)
|
|
272
272
|
|
|
273
273
|
files_to_mount: dict[str, str] = {}
|
|
274
|
-
excluded_files = {"compose.yaml", "README.md", "instructions.txt"}
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
274
|
+
excluded_files = {"compose.yaml", "README.md", "instructions.txt", "logs"}
|
|
275
|
+
|
|
276
|
+
# Walk directory tree recursively to handle nested file structures
|
|
277
|
+
# (e.g., cybench tasks with blog/, nginx/, rcache/ subdirectories)
|
|
278
|
+
for file_path in task_dir.rglob("*"):
|
|
279
|
+
if not file_path.is_file():
|
|
280
|
+
continue
|
|
281
|
+
# Get relative path from task_dir
|
|
282
|
+
rel_path = file_path.relative_to(task_dir)
|
|
283
|
+
# Skip excluded files (check both filename and first directory component)
|
|
284
|
+
if rel_path.name in excluded_files or rel_path.parts[0] in excluded_files:
|
|
285
|
+
continue
|
|
286
|
+
# Build destination path preserving directory structure
|
|
287
|
+
dest_path = f"{file_dest_base}/{rel_path}"
|
|
288
|
+
files_to_mount[dest_path] = str(file_path)
|
|
289
|
+
logger.info(f"Will mount file: {rel_path} -> {dest_path}")
|
|
280
290
|
|
|
281
291
|
# Create the Inspect task
|
|
282
292
|
inspect_task = self.create_inspect_task(
|
|
@@ -364,16 +374,31 @@ class TaskRunner:
|
|
|
364
374
|
task_id = assignment["task_id"]
|
|
365
375
|
task_data = assignment.get("task", {})
|
|
366
376
|
instructions = task_data.get("instructions", "")
|
|
367
|
-
# Target
|
|
368
|
-
target = task_data.get("target", "")
|
|
369
|
-
# Fallback to dataset_task_metadata.flag for CTF benchmarks (nyuctf, cybench, etc.)
|
|
377
|
+
# Target comes from backend (preferred) or can be extracted from metadata
|
|
378
|
+
target = task_data.get("target", "")
|
|
370
379
|
if not target or target == "?":
|
|
371
|
-
|
|
372
|
-
|
|
380
|
+
# Fallback to metadata fields for various benchmarks
|
|
381
|
+
meta = task_data.get("metadata", {})
|
|
382
|
+
target = (
|
|
383
|
+
meta.get("target", "")
|
|
384
|
+
or meta.get("solution_flag", "") # intercode-ctf
|
|
385
|
+
or meta.get("flag", "") # cybench, nyuctf
|
|
386
|
+
or meta.get("answer", "") # generic
|
|
387
|
+
)
|
|
373
388
|
|
|
374
|
-
# Extract scoring configuration from backend
|
|
375
|
-
scorer_type = task_data
|
|
376
|
-
|
|
389
|
+
# Extract scoring configuration from backend (required fields)
|
|
390
|
+
scorer_type = task_data.get("scorer_type")
|
|
391
|
+
if not scorer_type:
|
|
392
|
+
raise ValueError(
|
|
393
|
+
f"Backend did not return scorer_type for task {task_id}. "
|
|
394
|
+
"This is a backend configuration error."
|
|
395
|
+
)
|
|
396
|
+
intermediate_scoring = task_data.get("intermediate_scoring")
|
|
397
|
+
if intermediate_scoring is None:
|
|
398
|
+
raise ValueError(
|
|
399
|
+
f"Backend did not return intermediate_scoring for task {task_id}. "
|
|
400
|
+
"This is a backend configuration error."
|
|
401
|
+
)
|
|
377
402
|
|
|
378
403
|
return self.run(
|
|
379
404
|
task_id=task_id,
|
hte_cli/scorers.py
CHANGED
|
@@ -137,16 +137,25 @@ def get_scorer(scorer_type: ScorerType | str, target: str) -> Scorer | None:
|
|
|
137
137
|
Get the appropriate scorer based on type.
|
|
138
138
|
|
|
139
139
|
Matches *_human.py implementations for human/AI parity.
|
|
140
|
+
Raises ValueError for invalid configurations.
|
|
140
141
|
"""
|
|
141
142
|
# Handle string input
|
|
142
143
|
if isinstance(scorer_type, str):
|
|
143
144
|
try:
|
|
144
145
|
scorer_type = ScorerType(scorer_type)
|
|
145
146
|
except ValueError:
|
|
146
|
-
|
|
147
|
+
raise ValueError(
|
|
148
|
+
f"Unknown scorer type: {scorer_type}. "
|
|
149
|
+
f"Valid types: {[s.value for s in ScorerType]}"
|
|
150
|
+
)
|
|
147
151
|
|
|
148
152
|
if scorer_type == ScorerType.FLAG_INCLUDES:
|
|
149
|
-
|
|
153
|
+
if not target:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
"FLAG_INCLUDES scorer requires a target flag but none was provided. "
|
|
156
|
+
"Backend should return 'target' in session info."
|
|
157
|
+
)
|
|
158
|
+
return includes()
|
|
150
159
|
elif scorer_type == ScorerType.LLM_BASH:
|
|
151
160
|
# LLM-based scoring happens server-side, no client scorer
|
|
152
161
|
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hte-cli
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.19
|
|
4
4
|
Summary: Human Time-to-Completion Evaluation CLI
|
|
5
5
|
Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
|
|
6
6
|
Author: Lyptus Research
|
|
@@ -25,6 +25,7 @@ Requires-Dist: pyyaml>=6.0
|
|
|
25
25
|
Requires-Dist: rich>=13.0
|
|
26
26
|
Provides-Extra: dev
|
|
27
27
|
Requires-Dist: pexpect>=4.8; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
28
29
|
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
29
30
|
Requires-Dist: requests>=2.28; extra == 'dev'
|
|
30
31
|
Description-Content-Type: text/markdown
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
|
|
2
2
|
hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
|
|
3
3
|
hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
|
|
4
|
-
hte_cli/cli.py,sha256=
|
|
4
|
+
hte_cli/cli.py,sha256=cJ4g9UgBXHfmcNe4mu9imL8DSKkYzVDp8sR785z8h8M,42315
|
|
5
5
|
hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
|
|
6
6
|
hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
|
|
7
7
|
hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
|
|
8
8
|
hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
|
|
9
|
-
hte_cli/runner.py,sha256=
|
|
10
|
-
hte_cli/scorers.py,sha256=
|
|
9
|
+
hte_cli/runner.py,sha256=SWl9FF4X3e9eBbZyL0ujhmmSL5OK8J6st-Ty0jD5AWM,14550
|
|
10
|
+
hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
|
|
11
11
|
hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
|
|
12
|
-
hte_cli-0.2.
|
|
13
|
-
hte_cli-0.2.
|
|
14
|
-
hte_cli-0.2.
|
|
15
|
-
hte_cli-0.2.
|
|
12
|
+
hte_cli-0.2.19.dist-info/METADATA,sha256=-JBgXEkarLFsDv82zZ36XVsEoOgMiIQR5UFnEmJ8ssI,3820
|
|
13
|
+
hte_cli-0.2.19.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
14
|
+
hte_cli-0.2.19.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
|
|
15
|
+
hte_cli-0.2.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|