benchmax 0.1.2.dev27__py3-none-any.whl → 0.1.2.dev28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmax/envs/example_id.py +21 -19
- benchmax/envs/telestich/example.py +668 -0
- benchmax/envs/telestich/telestich_env.py +1107 -0
- benchmax/envs/types.py +99 -1
- benchmax/platform/__init__.py +3 -0
- benchmax/platform/client.py +13 -0
- benchmax/platform/credentials.py +35 -0
- benchmax/platform/training_run.py +24 -0
- benchmax/platform/validation.py +274 -61
- benchmax/rag/corpus/chroma/search.py +63 -6
- benchmax/rewards/__init__.py +0 -0
- benchmax/rewards/diversity.py +305 -0
- benchmax/rubrics/_utils.py +3 -2
- benchmax/rubrics/adaptive.py +4 -2
- benchmax/rubrics/rubric.py +127 -68
- benchmax/traces/__init__.py +6 -1
- benchmax/traces/adapter.py +113 -53
- benchmax/traces/braintrust/message_extraction.py +6 -79
- benchmax/traces/processing.py +16 -16
- benchmax-0.1.2.dev28.dist-info/METADATA +75 -0
- {benchmax-0.1.2.dev27.dist-info → benchmax-0.1.2.dev28.dist-info}/RECORD +24 -20
- benchmax-0.1.2.dev27.dist-info/METADATA +0 -188
- {benchmax-0.1.2.dev27.dist-info → benchmax-0.1.2.dev28.dist-info}/WHEEL +0 -0
- {benchmax-0.1.2.dev27.dist-info → benchmax-0.1.2.dev28.dist-info}/licenses/LICENSE +0 -0
- {benchmax-0.1.2.dev27.dist-info → benchmax-0.1.2.dev28.dist-info}/top_level.txt +0 -0
benchmax/envs/example_id.py
CHANGED
|
@@ -1,24 +1,23 @@
|
|
|
1
1
|
"""Canonical example identity.
|
|
2
2
|
|
|
3
3
|
``canonical_example_id(prompt_messages, task)`` returns a SHA-256 hex digest
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
v:2 bump went together with the ``seed_messages`` → ``prompt_messages``
|
|
20
|
-
field rename in 2026-05; v:1 hashes are obsolete.
|
|
4
|
+
stable across processes. Identity is computed only here, in Python — both the
|
|
5
|
+
trainer and rollout-service hash via this module.
|
|
6
|
+
|
|
7
|
+
Normalization keeps the digest loader-independent:
|
|
8
|
+
- integer-valued floats → int, -0.0 → 0; NaN/Inf rejected.
|
|
9
|
+
- dict keys whose value is ``None`` are dropped, so a key absent in one loader
|
|
10
|
+
and present-but-null in another (Arrow schema-unification) hashes the same;
|
|
11
|
+
nulls *inside lists* are kept (length/order are identity).
|
|
12
|
+
- ambiguous values rejected: non-str dict keys, ints beyond
|
|
13
|
+
``Number.MAX_SAFE_INTEGER``, byte strings, lone surrogates, unknown types.
|
|
14
|
+
- canonical JSON: sorted keys, no whitespace, no ASCII escaping.
|
|
15
|
+
|
|
16
|
+
Payload tag ``v:3``. History: v:1→v:2 = the 2026-05 ``seed_messages`` →
|
|
17
|
+
``prompt_messages`` rename; v:2→v:3 = drop null-valued dict keys (loader skew).
|
|
18
|
+
Older hashes are obsolete.
|
|
21
19
|
"""
|
|
20
|
+
|
|
22
21
|
from __future__ import annotations
|
|
23
22
|
|
|
24
23
|
import hashlib
|
|
@@ -78,7 +77,10 @@ def _normalize(v: Any) -> Any:
|
|
|
78
77
|
raise ValueError(
|
|
79
78
|
f"dict keys must be str for canonical hashing; got {type(k).__name__}"
|
|
80
79
|
)
|
|
81
|
-
|
|
80
|
+
nx = _normalize(x)
|
|
81
|
+
if nx is None:
|
|
82
|
+
continue
|
|
83
|
+
out[k] = nx
|
|
82
84
|
return out
|
|
83
85
|
raise ValueError(
|
|
84
86
|
f"type {type(v).__name__} is not JSON-canonicalizable; "
|
|
@@ -90,7 +92,7 @@ def canonical_example_id(
|
|
|
90
92
|
prompt_messages: Messages,
|
|
91
93
|
task: dict[str, Any] | None,
|
|
92
94
|
) -> str:
|
|
93
|
-
payload = {"v":
|
|
95
|
+
payload = {"v": 3, "prompt_messages": prompt_messages, "task": task}
|
|
94
96
|
serialized = json.dumps(
|
|
95
97
|
_normalize(payload),
|
|
96
98
|
sort_keys=True,
|