PyPI - hte-cli - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl - Mend

hte-cli 0.2.17py3-none-any.whl → 0.2.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

hte_cli/cli.py +1 -0
hte_cli/runner.py +23 -8
hte_cli/scorers.py +11 -2
{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/METADATA +1 -1
{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/RECORD +7 -7
{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/WHEEL +0 -0
{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/entry_points.txt +0 -0

hte_cli/cli.py CHANGED Viewed

@@ -274,6 +274,7 @@ def session_join(ctx, session_id: str, force_setup: bool):
         "task": {
             "instructions": session_info.get("instructions", ""),
             "metadata": session_info.get("metadata", {}),
+            "target": session_info.get("target", ""),
             "scorer_type": session_info.get("scorer_type"),
             "intermediate_scoring": session_info.get("intermediate_scoring", False),
         },

hte_cli/runner.py CHANGED Viewed

@@ -364,16 +364,31 @@ class TaskRunner:
         task_id = assignment["task_id"]
         task_data = assignment.get("task", {})
         instructions = task_data.get("instructions", "")
-        # Target can be at task level, in metadata, or in dataset_task_metadata.flag
-        target = task_data.get("target", "") or task_data.get("metadata", {}).get("target", "")
-        # Fallback to dataset_task_metadata.flag for CTF benchmarks (nyuctf, cybench, etc.)
+        # Target comes from backend (preferred) or can be extracted from metadata
+        target = task_data.get("target", "")
         if not target or target == "?":
-            dataset_meta = task_data.get("dataset_task_metadata", {})
-            target = dataset_meta.get("flag", "") or dataset_meta.get("answer", "")
+            # Fallback to metadata fields for various benchmarks
+            meta = task_data.get("metadata", {})
+            target = (
+                meta.get("target", "")
+                or meta.get("solution_flag", "")  # intercode-ctf
+                or meta.get("flag", "")  # cybench, nyuctf
+                or meta.get("answer", "")  # generic
+            )
-        # Extract scoring configuration from backend
-        scorer_type = task_data["scorer_type"]
-        intermediate_scoring = task_data["intermediate_scoring"]
+        # Extract scoring configuration from backend (required fields)
+        scorer_type = task_data.get("scorer_type")
+        if not scorer_type:
+            raise ValueError(
+                f"Backend did not return scorer_type for task {task_id}. "
+                "This is a backend configuration error."
+            )
+        intermediate_scoring = task_data.get("intermediate_scoring")
+        if intermediate_scoring is None:
+            raise ValueError(
+                f"Backend did not return intermediate_scoring for task {task_id}. "
+                "This is a backend configuration error."
+            )
         return self.run(
             task_id=task_id,

hte_cli/scorers.py CHANGED Viewed

@@ -137,16 +137,25 @@ def get_scorer(scorer_type: ScorerType | str, target: str) -> Scorer | None:
     Get the appropriate scorer based on type.
     Matches *_human.py implementations for human/AI parity.
+    Raises ValueError for invalid configurations.
     """
     # Handle string input
     if isinstance(scorer_type, str):
         try:
             scorer_type = ScorerType(scorer_type)
         except ValueError:
-            return None
+            raise ValueError(
+                f"Unknown scorer type: {scorer_type}. "
+                f"Valid types: {[s.value for s in ScorerType]}"
+            )
     if scorer_type == ScorerType.FLAG_INCLUDES:
-        return includes() if target else None
+        if not target:
+            raise ValueError(
+                "FLAG_INCLUDES scorer requires a target flag but none was provided. "
+                "Backend should return 'target' in session info."
+            )
+        return includes()
     elif scorer_type == ScorerType.LLM_BASH:
         # LLM-based scoring happens server-side, no client scorer
         return None

{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hte-cli
-Version: 0.2.17
+Version: 0.2.18
 Summary: Human Time-to-Completion Evaluation CLI
 Project-URL: Homepage, https://github.com/sean-peters-au/lyptus-mono
 Author: Lyptus Research

{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 hte_cli/__init__.py,sha256=fDGXp-r8bIoLtlQnn5xJ_CpwMhonvk9bGjZQsjA2mDI,914
 hte_cli/__main__.py,sha256=63n0gNGfskidWDU0aAIF2N8lylVCLYKVIkrN9QiORoo,107
 hte_cli/api_client.py,sha256=m42kfFZS72Nu_VuDwxRsLNy4ziCcvgk7KNWBh9gwqy0,9257
-hte_cli/cli.py,sha256=lyCuhnSiViTKpWYAeVQ1C4D5idg4FRh_q39PXat7HXg,42261
+hte_cli/cli.py,sha256=cJ4g9UgBXHfmcNe4mu9imL8DSKkYzVDp8sR785z8h8M,42315
 hte_cli/config.py,sha256=42Xv__YMSeRLs2zhGukJkIXFKtnBtYCHnONfViGyt2g,3387
 hte_cli/errors.py,sha256=1J5PpxcUKBu6XjigMMCPOq4Zc12tnv8LhAsiaVFWLQM,2762
 hte_cli/events.py,sha256=Zn-mroqaLHNzdT4DFf8st1Qclglshihdc09dBfCN070,5522
 hte_cli/image_utils.py,sha256=TLwJdswUQrSD2bQcAXW03R8j8WG2pbHzd12TWcE7zy4,6418
-hte_cli/runner.py,sha256=DhC8FMjHwfLR193iP4thLDRZrNssYA9KH1WYKU2JKeg,13535
-hte_cli/scorers.py,sha256=sFoPJePRt-K191-Ga4cVmrldruJclYXTOLkU_C9nCDI,6025
+hte_cli/runner.py,sha256=dTlro9AJa6YUxdiyJInOpudHPw5S207LTasiJ2NL_nQ,14059
+hte_cli/scorers.py,sha256=NZWMlS2h2Hczm-bldH35wRhL3RYzGhQgCCp3rP9zhJo,6414
 hte_cli/version_check.py,sha256=WVZyGy2XfAghQYdd2N9-0Qfg-7pgp9gt4761-PnmacI,1708
-hte_cli-0.2.17.dist-info/METADATA,sha256=tVT3em2qTUXNMZeWKRk1vHp2fuO5SV3kl43dzPyCqEE,3768
-hte_cli-0.2.17.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-hte_cli-0.2.17.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
-hte_cli-0.2.17.dist-info/RECORD,,
+hte_cli-0.2.18.dist-info/METADATA,sha256=RTwT4AZ8rEWsQNv-jfwTlQqgHy4kRehIR7Ll7PPvEJM,3768
+hte_cli-0.2.18.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+hte_cli-0.2.18.dist-info/entry_points.txt,sha256=XbyEEi1H14DFAt0Kdl22e_IRVEGzimSzYSh5HlhKlFA,41
+hte_cli-0.2.18.dist-info/RECORD,,

{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{hte_cli-0.2.17.dist-info → hte_cli-0.2.18.dist-info}/entry_points.txt RENAMED Viewed

File without changes

hte-cli 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl

hte-cli 0.2.17py3-none-any.whl → 0.2.18py3-none-any.whl