benchmax 0.1.2.dev31__py3-none-any.whl → 0.1.2.dev33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmax/envs/postgres_search/search_env.py +1 -7
- benchmax/envs/reward_helpers.py +3 -12
- benchmax/rag/corpus/chroma/client.py +30 -0
- benchmax/rag/corpus/chroma/search.py +23 -6
- benchmax/rag/corpus/chroma/source.py +22 -14
- benchmax/rag/corpus/search_schema/search_exceptions.py +18 -0
- {benchmax-0.1.2.dev31.dist-info → benchmax-0.1.2.dev33.dist-info}/METADATA +1 -1
- {benchmax-0.1.2.dev31.dist-info → benchmax-0.1.2.dev33.dist-info}/RECORD +12 -12
- {benchmax-0.1.2.dev31.dist-info → benchmax-0.1.2.dev33.dist-info}/WHEEL +0 -0
- {benchmax-0.1.2.dev31.dist-info → benchmax-0.1.2.dev33.dist-info}/entry_points.txt +0 -0
- {benchmax-0.1.2.dev31.dist-info → benchmax-0.1.2.dev33.dist-info}/licenses/LICENSE +0 -0
- {benchmax-0.1.2.dev31.dist-info → benchmax-0.1.2.dev33.dist-info}/top_level.txt +0 -0
|
@@ -285,14 +285,8 @@ tags. Cite your sources inline using [Source: <source_id>] next to each claim.
|
|
|
285
285
|
if not text.strip():
|
|
286
286
|
return zeros
|
|
287
287
|
|
|
288
|
-
# No final <answer> block → no answer to score. Return all-zero
|
|
289
|
-
# rewards so conciseness / citations / efficiency can't accrue
|
|
290
|
-
# from reasoning or tool-call text alone.
|
|
291
|
-
answer = extract_answer_block(text)
|
|
292
|
-
if not answer:
|
|
293
|
-
return zeros
|
|
294
|
-
|
|
295
288
|
t = task or {}
|
|
289
|
+
answer = extract_answer_block(text)
|
|
296
290
|
prompt = str(t.get("question") or t.get("prompt") or "")
|
|
297
291
|
gt_str = str(t.get("ground_truth") or "")
|
|
298
292
|
reference_chunks = t.get("reference_chunks", [])
|
benchmax/envs/reward_helpers.py
CHANGED
|
@@ -82,16 +82,9 @@ def extract_completion_text(completion: str | list[dict[str, Any]]) -> str:
|
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
def extract_answer_block(text: str) -> str:
|
|
85
|
-
"""Extract content from
|
|
86
|
-
|
|
87
|
-
Returns the (stripped) tag contents when an ``<answer>…</answer>`` block
|
|
88
|
-
is present, otherwise ``""``. A missing answer block is treated as "no
|
|
89
|
-
final answer" rather than silently falling back to the full completion —
|
|
90
|
-
consumers can gate rewards on a non-empty result. ``<answer></answer>``
|
|
91
|
-
likewise yields ``""``.
|
|
92
|
-
"""
|
|
85
|
+
"""Extract content from <answer> tags, or return full text."""
|
|
93
86
|
match = _ANSWER_TAG_RE.search(text or "")
|
|
94
|
-
return match.group(1)
|
|
87
|
+
return (match.group(1) if match else text).strip()
|
|
95
88
|
|
|
96
89
|
|
|
97
90
|
def clip01(value: Any) -> float:
|
|
@@ -169,10 +162,8 @@ def citation_score(
|
|
|
169
162
|
ref_ids.add(norm_sid)
|
|
170
163
|
break
|
|
171
164
|
|
|
172
|
-
if not cited:
|
|
165
|
+
if not cited or not ref_ids:
|
|
173
166
|
return {"precision": 0.0, "recall": 0.0}
|
|
174
|
-
if not ref_ids:
|
|
175
|
-
return {"precision": 1.0, "recall": 0.0}
|
|
176
167
|
|
|
177
168
|
precision = len(cited & ref_ids) / len(cited)
|
|
178
169
|
recall = len(cited & ref_ids) / len(ref_ids)
|
|
@@ -16,6 +16,13 @@ from typing import Any
|
|
|
16
16
|
# Sparse-key name used when setting up BM25 schema
|
|
17
17
|
BM25_KEY = "bm25_embedding"
|
|
18
18
|
|
|
19
|
+
# Embedding functions that run server-side on Chroma Cloud (embed.trychroma.com)
|
|
20
|
+
# — querying a collection that uses one never downloads a model. Everything else
|
|
21
|
+
# (default all-MiniLM, sentence-transformers / HF / Ollama / ONNX locals,
|
|
22
|
+
# third-party API EFs, or no EF) is treated as unsafe. Add hosted names here as
|
|
23
|
+
# they are verified server-side.
|
|
24
|
+
_SERVER_SIDE_EF_NAMES = frozenset({"chroma-cloud-qwen"})
|
|
25
|
+
|
|
19
26
|
|
|
20
27
|
def has_search_api() -> bool:
|
|
21
28
|
"""Return True when the chromadb package exposes the Search API."""
|
|
@@ -176,6 +183,29 @@ class ChromaClient:
|
|
|
176
183
|
|
|
177
184
|
return self._collection
|
|
178
185
|
|
|
186
|
+
def dense_embed_is_safe(self) -> bool:
|
|
187
|
+
"""True when a dense (vector) query embeds WITHOUT downloading a model.
|
|
188
|
+
|
|
189
|
+
Safe only when we can produce vectors without a client-side model
|
|
190
|
+
download: either a caller-supplied ``embed_fn``, or a Chroma-hosted
|
|
191
|
+
server-side embedding function (embeds at embed.trychroma.com). Every
|
|
192
|
+
other embedder — chromadb's default all-MiniLM, sentence-transformers /
|
|
193
|
+
HuggingFace / Ollama / ONNX locals, third-party API EFs we lack keys
|
|
194
|
+
for, or no EF at all — is treated as UNSAFE, so callers refuse the dense
|
|
195
|
+
path rather than trigger a model download. Conservative by design: an
|
|
196
|
+
unknown embedder is unsafe.
|
|
197
|
+
"""
|
|
198
|
+
if self.embed_fn is not None:
|
|
199
|
+
return True
|
|
200
|
+
col = self._collection
|
|
201
|
+
if col is None:
|
|
202
|
+
return False
|
|
203
|
+
try:
|
|
204
|
+
ef = (col._model.configuration_json or {}).get("embedding_function") or {}
|
|
205
|
+
except Exception:
|
|
206
|
+
return False
|
|
207
|
+
return ef.get("name") in _SERVER_SIDE_EF_NAMES
|
|
208
|
+
|
|
179
209
|
@staticmethod
|
|
180
210
|
def _repair_cloud_embedding_function(collection: Any) -> None:
|
|
181
211
|
"""Attach a working EF when chromadb can't rebuild a Cloud hosted one.
|
|
@@ -10,6 +10,9 @@ from collections.abc import Callable
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
from benchmax.platform.credentials import TokenProvider, as_token_provider, env_token
|
|
13
|
+
from benchmax.rag.corpus.search_schema.search_exceptions import (
|
|
14
|
+
LocalEmbeddingDownloadDisallowedError,
|
|
15
|
+
)
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
class ChromaSearch:
|
|
@@ -113,19 +116,33 @@ class ChromaSearch:
|
|
|
113
116
|
) -> list[dict[str, Any]]:
|
|
114
117
|
"""Search and return structured results."""
|
|
115
118
|
client = self._get_client()
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
+
# Initialize the collection first so capabilities reflect the real index
|
|
120
|
+
# (BM25 downgrade) and the embedder config is readable below.
|
|
121
|
+
client.get_collection()
|
|
122
|
+
modes = client.modes
|
|
123
|
+
has_lexical = "lexical" in modes
|
|
124
|
+
|
|
125
|
+
# Never download a client-side embedding model at inference/rollout time.
|
|
126
|
+
# When a dense embed isn't safe — no embed_fn and no Chroma-hosted
|
|
127
|
+
# server-side embedding function — use the BM25 lexical index if the
|
|
128
|
+
# collection has one, otherwise refuse rather than fetch all-MiniLM.
|
|
129
|
+
if not client.dense_embed_is_safe():
|
|
130
|
+
if not has_lexical:
|
|
131
|
+
raise LocalEmbeddingDownloadDisallowedError(
|
|
132
|
+
"chroma", self._collection_name
|
|
133
|
+
)
|
|
134
|
+
mode = "lexical"
|
|
135
|
+
elif mode == "auto":
|
|
119
136
|
if "hybrid" in modes:
|
|
120
137
|
mode = "hybrid"
|
|
121
|
-
elif
|
|
138
|
+
elif has_lexical:
|
|
122
139
|
mode = "lexical"
|
|
123
140
|
else:
|
|
124
141
|
mode = "vector"
|
|
125
|
-
elif mode not in
|
|
142
|
+
elif mode not in modes:
|
|
126
143
|
raise ValueError(
|
|
127
144
|
f"ChromaSearch does not support mode '{mode}'. "
|
|
128
|
-
f"Available modes: {sorted(
|
|
145
|
+
f"Available modes: {sorted(modes)}"
|
|
129
146
|
)
|
|
130
147
|
|
|
131
148
|
if client.search_api and mode in ("lexical", "hybrid"):
|
|
@@ -17,6 +17,7 @@ from tqdm.auto import tqdm
|
|
|
17
17
|
from benchmax.rag.chunkers.models import Chunk, ChunkCollection
|
|
18
18
|
from benchmax.rag.corpus.search_schema.search_exceptions import (
|
|
19
19
|
InvalidSearchSpecError,
|
|
20
|
+
LocalEmbeddingDownloadDisallowedError,
|
|
20
21
|
UnsupportedSearchModeError,
|
|
21
22
|
)
|
|
22
23
|
from benchmax.rag.corpus.search_schema.search_types import (
|
|
@@ -642,23 +643,30 @@ class ChromaChunkSource:
|
|
|
642
643
|
# lack a BM25 index, in which case modes was downgraded to vector-only.
|
|
643
644
|
modes = self._current_modes()
|
|
644
645
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
#
|
|
649
|
-
#
|
|
650
|
-
#
|
|
651
|
-
#
|
|
652
|
-
#
|
|
653
|
-
#
|
|
654
|
-
if
|
|
655
|
-
|
|
646
|
+
has_lexical = "lexical" in modes
|
|
647
|
+
has_hybrid = "hybrid" in modes
|
|
648
|
+
|
|
649
|
+
# Hard rule: never let chromadb embed a query with a client-side model
|
|
650
|
+
# (it downloads all-MiniLM and crawls in constrained executors). When a
|
|
651
|
+
# dense embed isn't safe — no embed_fn and no Chroma-hosted server-side
|
|
652
|
+
# embedding function — use the BM25 lexical index if the collection has
|
|
653
|
+
# one, otherwise refuse. This covers every requested mode, including the
|
|
654
|
+
# linker's "inference" preference for vector.
|
|
655
|
+
if not self._chroma.dense_embed_is_safe():
|
|
656
|
+
if not has_lexical:
|
|
657
|
+
raise LocalEmbeddingDownloadDisallowedError(
|
|
658
|
+
"chroma", self._chroma.collection_name
|
|
659
|
+
)
|
|
660
|
+
use_hybrid = False
|
|
661
|
+
use_lexical = True
|
|
656
662
|
elif mode == "lexical":
|
|
657
663
|
use_hybrid = False
|
|
658
|
-
use_lexical =
|
|
664
|
+
use_lexical = has_lexical
|
|
665
|
+
elif mode == "vector":
|
|
666
|
+
use_hybrid = use_lexical = False
|
|
659
667
|
else: # "hybrid", None, or unrecognized -> best available
|
|
660
|
-
use_hybrid =
|
|
661
|
-
use_lexical =
|
|
668
|
+
use_hybrid = has_hybrid
|
|
669
|
+
use_lexical = has_lexical
|
|
662
670
|
|
|
663
671
|
# Batch-embed all queries when embed_fn available and vectors needed
|
|
664
672
|
vectors: list[list[float]] | None = None
|
|
@@ -43,3 +43,21 @@ class UnsupportedSearchModeError(ValueError):
|
|
|
43
43
|
f"[{backend}] unsupported search mode '{mode}'. "
|
|
44
44
|
f"Supported modes: {sorted(supported_modes)}"
|
|
45
45
|
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class LocalEmbeddingDownloadDisallowedError(RuntimeError):
|
|
49
|
+
"""Raised when serving a search would download a client-side embedding model.
|
|
50
|
+
|
|
51
|
+
The collection has no server-side (hosted) embedding function and no BM25
|
|
52
|
+
index, and the caller supplied no ``embed_fn`` — so embedding a text query
|
|
53
|
+
would make chromadb download and run a local model (e.g. all-MiniLM). We
|
|
54
|
+
refuse rather than trigger that download.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, backend: str, collection: str):
|
|
58
|
+
super().__init__(
|
|
59
|
+
f"[{backend}] collection {collection!r} has no server-side embedding "
|
|
60
|
+
"function and no BM25 index, so search would download a local "
|
|
61
|
+
"embedding model. Re-ingest the corpus with a hosted embedder "
|
|
62
|
+
"(chroma-cloud-qwen) or a BM25 index, or supply an embed_fn."
|
|
63
|
+
)
|
|
@@ -5,7 +5,7 @@ benchmax/envs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
5
5
|
benchmax/envs/base_env.py,sha256=FoUgWsNGeNpTHeOop0bklRjLfHA90Yi7MW8zCaNh_V0,8976
|
|
6
6
|
benchmax/envs/example_id.py,sha256=WU967Pt2kFvn-W4k5BC6BvKyrTEqioLr7IyWZ3RjGgU,5685
|
|
7
7
|
benchmax/envs/logging.py,sha256=QnXADCp0vWoV_-MK91yX5OFu6GwgIE98dvhaQTPawqQ,5053
|
|
8
|
-
benchmax/envs/reward_helpers.py,sha256
|
|
8
|
+
benchmax/envs/reward_helpers.py,sha256=-pDqYBazvum8cc8KX7Q_Z0C-Daf3_4TVZuWt-ywhqyY,7364
|
|
9
9
|
benchmax/envs/types.py,sha256=sGKKibQJZQj9RYkFpB3vaUY75tdoHet8yUmdzpZ0SVk,4389
|
|
10
10
|
benchmax/envs/crm/crm_env.py,sha256=ltUtpA45YB_A_hYEpjFTp0nZKwkUvvLSLOAVkaUNz9E,4707
|
|
11
11
|
benchmax/envs/crm/workdir/reward_fn.py,sha256=RY_iy347j79xX4gyCGI7WS0qPmut8Th2rqOiErVbDro,5439
|
|
@@ -32,7 +32,7 @@ benchmax/envs/mcp/provisioners/skypilot_provisioner.py,sha256=ACHnzNZE7GfL1WIWf7
|
|
|
32
32
|
benchmax/envs/mcp/provisioners/utils.py,sha256=ORWJKtPzeS-IdD35p8aZyLMG2RxiB9BAFmU-0pVqiWw,3467
|
|
33
33
|
benchmax/envs/postgres_search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
benchmax/envs/postgres_search/linker_env.py,sha256=B3cn0TpiqgrYL5NvOQYW3Yxy5DdxPw1kmIgqDs-8Buo,8535
|
|
35
|
-
benchmax/envs/postgres_search/search_env.py,sha256=
|
|
35
|
+
benchmax/envs/postgres_search/search_env.py,sha256=IWpqbFr4hjaN_DzdeRchvwvf9qVj5Ut5D-eOsGPyWKQ,19917
|
|
36
36
|
benchmax/envs/telestich/example.py,sha256=cqHIBjD8g7H4-nmspWSKRB2rxeKPOIwkLn136Y04KfQ,28680
|
|
37
37
|
benchmax/envs/telestich/telestich_env.py,sha256=6p6GeyV-9ZIXrAX8zssMFjJgevkV5PfDLMZlslqO8js,61966
|
|
38
38
|
benchmax/envs/wikipedia/utils.py,sha256=YDlxpMfwiVpfMpiZet4kWoeKqNbgTBxeWVEYg5QY3Qs,2879
|
|
@@ -64,11 +64,11 @@ benchmax/rag/corpus/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKV
|
|
|
64
64
|
benchmax/rag/corpus/search_client.py,sha256=171IqQriU6kuQqvSCDgNwOT8SR5pxUPMfCifarrgrFg,1859
|
|
65
65
|
benchmax/rag/corpus/source.py,sha256=dnmReLC8mccHDkg8ZytfXa4AFXrRMCg9v8E2UuVxt8E,4183
|
|
66
66
|
benchmax/rag/corpus/chroma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
benchmax/rag/corpus/chroma/client.py,sha256=
|
|
67
|
+
benchmax/rag/corpus/chroma/client.py,sha256=cYZZKQG09u_VfyjsP6UdCBh-RRNGKa9XisBN4OEejQk,20839
|
|
68
68
|
benchmax/rag/corpus/chroma/files.py,sha256=hSP-J2osPNBAvMZHOWipMVXaWN4tila_tsQaTEPNzgc,5567
|
|
69
69
|
benchmax/rag/corpus/chroma/filter_mapper.py,sha256=Y1FzDwDDg15LZ0-Uh1jzOVcSORiVUy5f1qiaVky3pJI,5074
|
|
70
|
-
benchmax/rag/corpus/chroma/search.py,sha256=
|
|
71
|
-
benchmax/rag/corpus/chroma/source.py,sha256=
|
|
70
|
+
benchmax/rag/corpus/chroma/search.py,sha256=iO8fBPk50vG3NmkCmAJ2tKnjP_wKnymV3fbfLjkIAJ8,7688
|
|
71
|
+
benchmax/rag/corpus/chroma/source.py,sha256=0azMLUvZS9g4jvxv_KxsPa3-ArQW5WHCq77CQh-qmqY,30440
|
|
72
72
|
benchmax/rag/corpus/pinecone/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
73
|
benchmax/rag/corpus/pinecone/files.py,sha256=lhas7-mQ622Ku36QvOavXguBweJyYl78wXIeb_LNqig,5728
|
|
74
74
|
benchmax/rag/corpus/pinecone/filter_mapper.py,sha256=exJ3G34QKeQo1rQ8Pu-iGL0XDXVxCW5dc3q0QoYfCo0,6454
|
|
@@ -85,7 +85,7 @@ benchmax/rag/corpus/postgres/source.py,sha256=6ptGHatOscYih42MZ9Wt8MQOrcIEQiJ1X5
|
|
|
85
85
|
benchmax/rag/corpus/search_schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
86
86
|
benchmax/rag/corpus/search_schema/builders.py,sha256=qAMiEOGOLR7xSXWFf12KqzYlrwBZchU_78vkRcOKa8k,1764
|
|
87
87
|
benchmax/rag/corpus/search_schema/dsl_parser.py,sha256=vMijm_nRKztIrsVQP-0OySuCKnrBsbUzet_pwwlU1T8,1586
|
|
88
|
-
benchmax/rag/corpus/search_schema/search_exceptions.py,sha256=
|
|
88
|
+
benchmax/rag/corpus/search_schema/search_exceptions.py,sha256=1ccbLnDAuSMxUnjtyBt-5iXwoKjI3xaZvk9xplCyNFw,2413
|
|
89
89
|
benchmax/rag/corpus/search_schema/search_types.py,sha256=UTkteugSx5OigDRZ8Xqe6itxLUXj2sVeIVxtYbnXGSg,5831
|
|
90
90
|
benchmax/rag/corpus/turbopuffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
91
|
benchmax/rag/corpus/turbopuffer/files.py,sha256=DP80-3NmdyOD34fyQxlzovpLRs_UU1ezQ7PItpY2Nlk,5807
|
|
@@ -160,9 +160,9 @@ benchmax/traces/braintrust/adapter.py,sha256=KTeN9qKLwZJJ8TY-KtSudd4J3_nySz1bRts
|
|
|
160
160
|
benchmax/traces/braintrust/message_extraction.py,sha256=seh3eM_qd9FUPmGOEMChUq_UAMtaIQHYSYDttMgY1go,8409
|
|
161
161
|
benchmax/utils/__init__.py,sha256=FWJVm6jt0m57HS-84bgrb2M-c_EFhf60rWayioUGges,402
|
|
162
162
|
benchmax/utils/checkpoint.py,sha256=htIw9iYjUUHpJqLLZ0y6K4_UYYAkZIx3vdQVY7juKDw,3148
|
|
163
|
-
benchmax-0.1.2.
|
|
164
|
-
benchmax-0.1.2.
|
|
165
|
-
benchmax-0.1.2.
|
|
166
|
-
benchmax-0.1.2.
|
|
167
|
-
benchmax-0.1.2.
|
|
168
|
-
benchmax-0.1.2.
|
|
163
|
+
benchmax-0.1.2.dev33.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
164
|
+
benchmax-0.1.2.dev33.dist-info/METADATA,sha256=X5P1IBK9INVKaO8xzBqoW8CQYQ2VIVD9IkaQV4tVjFQ,2775
|
|
165
|
+
benchmax-0.1.2.dev33.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
166
|
+
benchmax-0.1.2.dev33.dist-info/entry_points.txt,sha256=qtjqAQsHIwRIaLzwAhGTiRvI91CynwcUO5G95uQuDR4,47
|
|
167
|
+
benchmax-0.1.2.dev33.dist-info/top_level.txt,sha256=ryj4zoahvAKL3BnxOpfJNfyIzhvlED9KJ3Q3k4bb9jc,9
|
|
168
|
+
benchmax-0.1.2.dev33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|