benchmax 0.1.2.dev30__py3-none-any.whl → 0.1.2.dev33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmax/bundle.py +74 -0
- benchmax/envs/postgres_search/search_env.py +1 -7
- benchmax/envs/reward_helpers.py +3 -12
- benchmax/envs/telestich/example.py +18 -11
- benchmax/platform/client.py +6 -2
- benchmax/platform/validation.py +43 -1
- benchmax/rag/corpus/chroma/client.py +30 -0
- benchmax/rag/corpus/chroma/search.py +23 -6
- benchmax/rag/corpus/chroma/source.py +22 -14
- benchmax/rag/corpus/pinecone/index_client.py +78 -5
- benchmax/rag/corpus/pinecone/search.py +5 -0
- benchmax/rag/corpus/pinecone/source.py +52 -26
- benchmax/rag/corpus/search_schema/search_exceptions.py +18 -0
- benchmax/rag/corpus/turbopuffer/namespace.py +21 -0
- benchmax/rag/corpus/turbopuffer/search.py +15 -3
- benchmax/rag/corpus/turbopuffer/source.py +14 -8
- {benchmax-0.1.2.dev30.dist-info → benchmax-0.1.2.dev33.dist-info}/METADATA +1 -1
- {benchmax-0.1.2.dev30.dist-info → benchmax-0.1.2.dev33.dist-info}/RECORD +22 -22
- {benchmax-0.1.2.dev30.dist-info → benchmax-0.1.2.dev33.dist-info}/WHEEL +0 -0
- {benchmax-0.1.2.dev30.dist-info → benchmax-0.1.2.dev33.dist-info}/entry_points.txt +0 -0
- {benchmax-0.1.2.dev30.dist-info → benchmax-0.1.2.dev33.dist-info}/licenses/LICENSE +0 -0
- {benchmax-0.1.2.dev30.dist-info → benchmax-0.1.2.dev33.dist-info}/top_level.txt +0 -0
benchmax/bundle.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import importlib
|
|
3
4
|
import inspect
|
|
4
5
|
import io
|
|
5
6
|
import json
|
|
@@ -76,6 +77,7 @@ def dump_bundle(
|
|
|
76
77
|
pip_dependencies: list[str] | None = None,
|
|
77
78
|
local_modules: list[ModuleType] | None = None,
|
|
78
79
|
env_class_source: str | None = None,
|
|
80
|
+
auto_local_modules: bool = True,
|
|
79
81
|
) -> Bundle:
|
|
80
82
|
"""Pickle ``(env_class, constructor_args)`` and stamp metadata.
|
|
81
83
|
|
|
@@ -90,6 +92,10 @@ def dump_bundle(
|
|
|
90
92
|
recover it — e.g. a class produced by ``exec()`` into an in-memory
|
|
91
93
|
namespace, which has no source file on disk. When ``None``
|
|
92
94
|
(default), source is introspected from ``env_class``.
|
|
95
|
+
auto_local_modules: When True (default), any local module the pickle
|
|
96
|
+
references but that wasn't passed in ``local_modules`` is imported
|
|
97
|
+
and pickled by value automatically (a warning names them). When
|
|
98
|
+
False, such a reference raises ``BundlingError`` instead.
|
|
93
99
|
|
|
94
100
|
Raises:
|
|
95
101
|
BundlingError: bad env_class, cloudpickle failure, or pickle references
|
|
@@ -124,6 +130,46 @@ def dump_bundle(
|
|
|
124
130
|
except Exception:
|
|
125
131
|
pass
|
|
126
132
|
|
|
133
|
+
if auto_local_modules and _unregistered_local_refs(pickled):
|
|
134
|
+
# Import each referenced local module and re-dump with it pickled by
|
|
135
|
+
# value. Loop because a by-value module can surface further local refs;
|
|
136
|
+
# registrations accumulate (and are torn down once at the end) so an
|
|
137
|
+
# earlier module stays by-value while we resolve the ones it pulled in.
|
|
138
|
+
seen: set[str] = {m.__name__ for m in local_modules}
|
|
139
|
+
registered: list[ModuleType] = []
|
|
140
|
+
with _BUNDLE_LOCK:
|
|
141
|
+
try:
|
|
142
|
+
for _ in range(10):
|
|
143
|
+
pending = [
|
|
144
|
+
m for m in _unregistered_local_refs(pickled) if m not in seen
|
|
145
|
+
]
|
|
146
|
+
if not pending:
|
|
147
|
+
break
|
|
148
|
+
new_mods: list[ModuleType] = []
|
|
149
|
+
for name in pending:
|
|
150
|
+
seen.add(name) # unimportable names fall through to the guard
|
|
151
|
+
try:
|
|
152
|
+
new_mods.append(importlib.import_module(name))
|
|
153
|
+
except Exception:
|
|
154
|
+
pass
|
|
155
|
+
if not new_mods:
|
|
156
|
+
break
|
|
157
|
+
logger.warning(
|
|
158
|
+
"[bundle] %s: auto-bundling local module(s): %s ",
|
|
159
|
+
env_class.__name__,
|
|
160
|
+
", ".join(sorted(m.__name__ for m in new_mods)),
|
|
161
|
+
)
|
|
162
|
+
for mod in new_mods:
|
|
163
|
+
cloudpickle.register_pickle_by_value(mod)
|
|
164
|
+
registered.append(mod)
|
|
165
|
+
pickled = cloudpickle.dumps((env_class, constructor_args))
|
|
166
|
+
finally:
|
|
167
|
+
for mod in registered:
|
|
168
|
+
try:
|
|
169
|
+
cloudpickle.unregister_pickle_by_value(mod)
|
|
170
|
+
except Exception:
|
|
171
|
+
pass
|
|
172
|
+
|
|
127
173
|
risky = _unregistered_local_refs(pickled)
|
|
128
174
|
if risky:
|
|
129
175
|
msg = (
|
|
@@ -259,6 +305,15 @@ def _referenced_modules(pickled: bytes) -> set[str]:
|
|
|
259
305
|
# Hooks find_class so we see every (module, name) the unpickler would import —
|
|
260
306
|
# i.e. exactly what'd raise ModuleNotFoundError on a fresh interpreter. The stub
|
|
261
307
|
# lets unpickling proceed past missing classes so we collect every ref.
|
|
308
|
+
#
|
|
309
|
+
# find_class alone has a blind spot: a bare ``import foo`` that leaves a
|
|
310
|
+
# module *object* in the env's globals is pickled as
|
|
311
|
+
# ``cloudpickle.subimport("foo")`` — the module name is a REDUCE argument,
|
|
312
|
+
# not a find_class path, so we'd only see ``cloudpickle.cloudpickle`` (which
|
|
313
|
+
# looks installed) and miss ``foo``. We shim subimport to record its arg and
|
|
314
|
+
# return a stub instead of importing, so a missing module is captured rather
|
|
315
|
+
# than aborting the whole load early. (``dynamic_subimport`` is by-value /
|
|
316
|
+
# self-contained — leave it to the real find_class so we don't flag it.)
|
|
262
317
|
refs: set[str] = set()
|
|
263
318
|
|
|
264
319
|
class _Stub:
|
|
@@ -271,9 +326,28 @@ def _referenced_modules(pickled: bytes) -> set[str]:
|
|
|
271
326
|
def __reduce__(self) -> tuple:
|
|
272
327
|
return (type(self), ())
|
|
273
328
|
|
|
329
|
+
def _recording_subimport(name: str, *a: Any, **kw: Any) -> ModuleType:
|
|
330
|
+
refs.add(name)
|
|
331
|
+
return ModuleType(str(name))
|
|
332
|
+
|
|
333
|
+
def _noop_setstate(obj: Any, *a: Any, **kw: Any) -> Any:
|
|
334
|
+
# cloudpickle's _make_skeleton_class resolves the class_tracker_id back
|
|
335
|
+
# to the *live* class (it was tracked when env_class was dumped), so the
|
|
336
|
+
# real ``_class_setstate``/``_function_setstate`` would setattr the
|
|
337
|
+
# reconstructed (stub-globals) members onto the live class/function —
|
|
338
|
+
# mutating the caller's class mid-bundle and poisoning any later dump.
|
|
339
|
+
# We only need the refs from ``state``, which are already recorded while
|
|
340
|
+
# it's unpickled; the setter itself is a no-op here.
|
|
341
|
+
return obj
|
|
342
|
+
|
|
274
343
|
class _Recorder(pickle.Unpickler):
|
|
275
344
|
def find_class(self, module: str, name: str) -> Any:
|
|
276
345
|
refs.add(module)
|
|
346
|
+
if module.startswith("cloudpickle"):
|
|
347
|
+
if name == "subimport":
|
|
348
|
+
return _recording_subimport
|
|
349
|
+
if name in ("_class_setstate", "_function_setstate"):
|
|
350
|
+
return _noop_setstate
|
|
277
351
|
try:
|
|
278
352
|
return super().find_class(module, name)
|
|
279
353
|
except Exception:
|
|
@@ -285,14 +285,8 @@ tags. Cite your sources inline using [Source: <source_id>] next to each claim.
|
|
|
285
285
|
if not text.strip():
|
|
286
286
|
return zeros
|
|
287
287
|
|
|
288
|
-
# No final <answer> block → no answer to score. Return all-zero
|
|
289
|
-
# rewards so conciseness / citations / efficiency can't accrue
|
|
290
|
-
# from reasoning or tool-call text alone.
|
|
291
|
-
answer = extract_answer_block(text)
|
|
292
|
-
if not answer:
|
|
293
|
-
return zeros
|
|
294
|
-
|
|
295
288
|
t = task or {}
|
|
289
|
+
answer = extract_answer_block(text)
|
|
296
290
|
prompt = str(t.get("question") or t.get("prompt") or "")
|
|
297
291
|
gt_str = str(t.get("ground_truth") or "")
|
|
298
292
|
reference_chunks = t.get("reference_chunks", [])
|
benchmax/envs/reward_helpers.py
CHANGED
|
@@ -82,16 +82,9 @@ def extract_completion_text(completion: str | list[dict[str, Any]]) -> str:
|
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
def extract_answer_block(text: str) -> str:
|
|
85
|
-
"""Extract content from
|
|
86
|
-
|
|
87
|
-
Returns the (stripped) tag contents when an ``<answer>…</answer>`` block
|
|
88
|
-
is present, otherwise ``""``. A missing answer block is treated as "no
|
|
89
|
-
final answer" rather than silently falling back to the full completion —
|
|
90
|
-
consumers can gate rewards on a non-empty result. ``<answer></answer>``
|
|
91
|
-
likewise yields ``""``.
|
|
92
|
-
"""
|
|
85
|
+
"""Extract content from <answer> tags, or return full text."""
|
|
93
86
|
match = _ANSWER_TAG_RE.search(text or "")
|
|
94
|
-
return match.group(1)
|
|
87
|
+
return (match.group(1) if match else text).strip()
|
|
95
88
|
|
|
96
89
|
|
|
97
90
|
def clip01(value: Any) -> float:
|
|
@@ -169,10 +162,8 @@ def citation_score(
|
|
|
169
162
|
ref_ids.add(norm_sid)
|
|
170
163
|
break
|
|
171
164
|
|
|
172
|
-
if not cited:
|
|
165
|
+
if not cited or not ref_ids:
|
|
173
166
|
return {"precision": 0.0, "recall": 0.0}
|
|
174
|
-
if not ref_ids:
|
|
175
|
-
return {"precision": 1.0, "recall": 0.0}
|
|
176
167
|
|
|
177
168
|
precision = len(cited & ref_ids) / len(cited)
|
|
178
169
|
recall = len(cited & ref_ids) / len(ref_ids)
|
|
@@ -12,10 +12,12 @@ Run it from the benchmax project root (the ``telestich`` extra pulls in the
|
|
|
12
12
|
env's word-list / rhyme dependencies):
|
|
13
13
|
|
|
14
14
|
cd core/benchmax
|
|
15
|
-
|
|
16
|
-
uv run --extra telestich python -m benchmax.envs.telestich.example
|
|
15
|
+
uv run --extra telestich python -m benchmax.envs.telestich.example
|
|
17
16
|
|
|
18
|
-
(``
|
|
17
|
+
Auth is the device-auth session (``ensure_session()`` opens a browser login if
|
|
18
|
+
``~/.castform`` has no valid session) — no API key needed. ``CASTFORM_API_KEY``
|
|
19
|
+
/ ``CASTFORM_LLM_API_KEY`` are only consulted by the offline dataset-generation
|
|
20
|
+
helpers, not the launch path.
|
|
19
21
|
|
|
20
22
|
This launches a real training run on the full committed seed dataset
|
|
21
23
|
(~90/10 train/eval split).
|
|
@@ -63,6 +65,8 @@ CONCURRENCY = 15
|
|
|
63
65
|
# pool) server-side. Supported: "Qwen/Qwen3.5-4B" (gpu4) or "Qwen/Qwen3.5-35B-A3B"
|
|
64
66
|
# (gpu8). Override via TELESTICH_MODEL.
|
|
65
67
|
MODEL = os.environ.get("TELESTICH_MODEL", "Qwen/Qwen3.5-4B")
|
|
68
|
+
# Run name — defaults to a unique telestich-full-<uuid>. Override via TELESTICH_RUN_NAME.
|
|
69
|
+
RUN_NAME = os.environ.get("TELESTICH_RUN_NAME", "")
|
|
66
70
|
|
|
67
71
|
# (model, weight). Weights reflect observed reliability on our checks:
|
|
68
72
|
# - Both grok models leak banned example words and rubber-stamp the CoT self-check.
|
|
@@ -558,12 +562,15 @@ def get_dataset():
|
|
|
558
562
|
if __name__ == "__main__":
|
|
559
563
|
import uuid
|
|
560
564
|
|
|
565
|
+
from benchmax.platform import ensure_session
|
|
561
566
|
from benchmax.platform.client import TrainerClient
|
|
562
567
|
from benchmax.platform.training_run import upload_training_run
|
|
563
568
|
from benchmax.platform.validation import validate_env
|
|
564
569
|
|
|
565
|
-
if
|
|
566
|
-
|
|
570
|
+
# Device-auth session bootstrap: browser login if no credential resolves.
|
|
571
|
+
# After this the platform bearer comes from ~/.castform — no API key needed,
|
|
572
|
+
# so we pass api_key="" to the platform calls below (resolves via the seam).
|
|
573
|
+
ensure_session()
|
|
567
574
|
|
|
568
575
|
print(f"Platform URL: {BASE_URL}")
|
|
569
576
|
print(f"LLM URL: {LLM_BASE_URL}\n")
|
|
@@ -603,7 +610,7 @@ if __name__ == "__main__":
|
|
|
603
610
|
eval_dataset=eval_data[:2],
|
|
604
611
|
local_modules=local_modules,
|
|
605
612
|
pip_dependencies=pip_dependencies,
|
|
606
|
-
api_key=
|
|
613
|
+
api_key="", # session bearer via ensure_session()
|
|
607
614
|
base_url=BASE_URL,
|
|
608
615
|
llm_base_url=LLM_BASE_URL,
|
|
609
616
|
llm_api_key="",
|
|
@@ -614,14 +621,14 @@ if __name__ == "__main__":
|
|
|
614
621
|
)
|
|
615
622
|
|
|
616
623
|
# 3. Bundle the env class and upload everything to platform storage.
|
|
617
|
-
run_name = f"telestich-full-{uuid.uuid4().hex[:8]}"
|
|
624
|
+
run_name = RUN_NAME or f"telestich-full-{uuid.uuid4().hex[:8]}"
|
|
618
625
|
print(f"\nUploading bundle + datasets as {run_name!r} ...")
|
|
619
626
|
uploaded = upload_training_run(
|
|
620
627
|
env_class=TelestichEnv,
|
|
621
628
|
train_dataset=train_data,
|
|
622
629
|
eval_dataset=eval_data,
|
|
623
630
|
run_name=run_name,
|
|
624
|
-
api_key=
|
|
631
|
+
api_key="", # session bearer via ensure_session()
|
|
625
632
|
base_url=BASE_URL,
|
|
626
633
|
local_modules=local_modules,
|
|
627
634
|
constructor_args=constructor_args,
|
|
@@ -638,7 +645,7 @@ if __name__ == "__main__":
|
|
|
638
645
|
# 4. Launch the training run. training_run_type="simple" + the `model` arg select
|
|
639
646
|
# the trainer YAML/pool server-side (Qwen3.5-4B→gpu4, Qwen3.5-35B-A3B→gpu8).
|
|
640
647
|
print(f"\nLaunching training run (model={MODEL}) ...")
|
|
641
|
-
with TrainerClient(api_key=
|
|
648
|
+
with TrainerClient(api_key="", base_url=BASE_URL) as trainer:
|
|
642
649
|
run_id = trainer.launch_training_run(
|
|
643
650
|
training_run_type="simple",
|
|
644
651
|
env_cls_path=uploaded.env_cls_path,
|
|
@@ -647,10 +654,10 @@ if __name__ == "__main__":
|
|
|
647
654
|
eval_dataset_path=uploaded.eval_dataset_path,
|
|
648
655
|
name=run_name,
|
|
649
656
|
# num_epochs: passes over the train set (platform default is 5).
|
|
650
|
-
#
|
|
657
|
+
# max_rollout_len 3000: a brief reason + 1-2 tool rounds + poem fits well
|
|
651
658
|
# under this; lowered from 4000 to cut off in-head enumeration rambles
|
|
652
659
|
# sooner (they truncate to a 0-reward anyway).
|
|
653
|
-
launcher_args={"model": MODEL, "
|
|
660
|
+
launcher_args={"model": MODEL, "max_rollout_len": 3000, "num_epochs": 10},
|
|
654
661
|
)
|
|
655
662
|
|
|
656
663
|
print(f"\n✓ Launched run_id={run_id}")
|
benchmax/platform/client.py
CHANGED
|
@@ -7,6 +7,7 @@ import hashlib
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
9
|
import textwrap
|
|
10
|
+
import warnings
|
|
10
11
|
from collections.abc import Iterator
|
|
11
12
|
from dataclasses import dataclass, field
|
|
12
13
|
from pathlib import Path
|
|
@@ -404,7 +405,7 @@ class TrainerClient:
|
|
|
404
405
|
eval_dataset_path: Path to the evaluation dataset
|
|
405
406
|
name: Optional name for the training run
|
|
406
407
|
launcher_args: Extra launcher args forwarded to the server
|
|
407
|
-
(e.g. {"
|
|
408
|
+
(e.g. {"max_rollout_len": 4000}). The 4 required paths
|
|
408
409
|
above always take precedence.
|
|
409
410
|
|
|
410
411
|
Returns:
|
|
@@ -431,8 +432,11 @@ class TrainerClient:
|
|
|
431
432
|
)
|
|
432
433
|
self._handle_response_errors(response)
|
|
433
434
|
body = response.json()
|
|
435
|
+
# Surface soft-cap / OOM-risk warnings via the warnings module (shown by
|
|
436
|
+
# default in notebooks/REPL) — a bare logger.warning is swallowed unless
|
|
437
|
+
# the caller configured logging.
|
|
434
438
|
for warning in body.get("warnings", []) or []:
|
|
435
|
-
|
|
439
|
+
warnings.warn(f"launch warning: {warning}", stacklevel=2)
|
|
436
440
|
return body["runId"]
|
|
437
441
|
|
|
438
442
|
def list_launch_args(self) -> list[LaunchArgSpec]:
|
benchmax/platform/validation.py
CHANGED
|
@@ -7,6 +7,7 @@ the env class contract matches what the trainer expects.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import asyncio
|
|
10
|
+
import importlib
|
|
10
11
|
import json
|
|
11
12
|
import math
|
|
12
13
|
import tempfile
|
|
@@ -578,6 +579,41 @@ def _run_local_checks(
|
|
|
578
579
|
from benchmax.bundle import unregistered_local_refs
|
|
579
580
|
|
|
580
581
|
risky = unregistered_local_refs(cloudpickle.dumps(env_class))
|
|
582
|
+
# Mirror dump_bundle's auto_local_modules: import + pickle-by-value
|
|
583
|
+
# any local refs the user didn't list, so validation reflects what
|
|
584
|
+
# the bundle will actually contain. Only genuinely unimportable refs
|
|
585
|
+
# (which the trainer also couldn't load) remain to be flagged.
|
|
586
|
+
auto: list[ModuleType] = []
|
|
587
|
+
if risky:
|
|
588
|
+
seen: set[str] = set()
|
|
589
|
+
try:
|
|
590
|
+
for _ in range(10):
|
|
591
|
+
pending = [
|
|
592
|
+
m
|
|
593
|
+
for m in unregistered_local_refs(cloudpickle.dumps(env_class))
|
|
594
|
+
if m not in seen
|
|
595
|
+
]
|
|
596
|
+
if not pending:
|
|
597
|
+
break
|
|
598
|
+
new_mods: list[ModuleType] = []
|
|
599
|
+
for name in pending:
|
|
600
|
+
seen.add(name)
|
|
601
|
+
try:
|
|
602
|
+
new_mods.append(importlib.import_module(name))
|
|
603
|
+
except Exception:
|
|
604
|
+
pass
|
|
605
|
+
if not new_mods:
|
|
606
|
+
break
|
|
607
|
+
for mod in new_mods:
|
|
608
|
+
cloudpickle.register_pickle_by_value(mod)
|
|
609
|
+
auto.append(mod)
|
|
610
|
+
risky = unregistered_local_refs(cloudpickle.dumps(env_class))
|
|
611
|
+
finally:
|
|
612
|
+
for mod in auto:
|
|
613
|
+
try:
|
|
614
|
+
cloudpickle.unregister_pickle_by_value(mod)
|
|
615
|
+
except Exception:
|
|
616
|
+
pass
|
|
581
617
|
if risky:
|
|
582
618
|
print(
|
|
583
619
|
f" \u2717 {env_class.__name__}: missing "
|
|
@@ -589,7 +625,13 @@ def _run_local_checks(
|
|
|
589
625
|
)
|
|
590
626
|
failed += 1
|
|
591
627
|
else:
|
|
592
|
-
|
|
628
|
+
if auto:
|
|
629
|
+
names = ", ".join(sorted(m.__name__ for m in auto))
|
|
630
|
+
print(
|
|
631
|
+
f" \u2713 auto-bundled local module(s): {names} "
|
|
632
|
+
)
|
|
633
|
+
else:
|
|
634
|
+
print(" \u2713 no unregistered local-module references")
|
|
593
635
|
passed += 1
|
|
594
636
|
except Exception as exc:
|
|
595
637
|
print(f" \u2717 local-modules check failed: {type(exc).__name__}: {exc}")
|
|
@@ -16,6 +16,13 @@ from typing import Any
|
|
|
16
16
|
# Sparse-key name used when setting up BM25 schema
|
|
17
17
|
BM25_KEY = "bm25_embedding"
|
|
18
18
|
|
|
19
|
+
# Embedding functions that run server-side on Chroma Cloud (embed.trychroma.com)
|
|
20
|
+
# — querying a collection that uses one never downloads a model. Everything else
|
|
21
|
+
# (default all-MiniLM, sentence-transformers / HF / Ollama / ONNX locals,
|
|
22
|
+
# third-party API EFs, or no EF) is treated as unsafe. Add hosted names here as
|
|
23
|
+
# they are verified server-side.
|
|
24
|
+
_SERVER_SIDE_EF_NAMES = frozenset({"chroma-cloud-qwen"})
|
|
25
|
+
|
|
19
26
|
|
|
20
27
|
def has_search_api() -> bool:
|
|
21
28
|
"""Return True when the chromadb package exposes the Search API."""
|
|
@@ -176,6 +183,29 @@ class ChromaClient:
|
|
|
176
183
|
|
|
177
184
|
return self._collection
|
|
178
185
|
|
|
186
|
+
def dense_embed_is_safe(self) -> bool:
|
|
187
|
+
"""True when a dense (vector) query embeds WITHOUT downloading a model.
|
|
188
|
+
|
|
189
|
+
Safe only when we can produce vectors without a client-side model
|
|
190
|
+
download: either a caller-supplied ``embed_fn``, or a Chroma-hosted
|
|
191
|
+
server-side embedding function (embeds at embed.trychroma.com). Every
|
|
192
|
+
other embedder — chromadb's default all-MiniLM, sentence-transformers /
|
|
193
|
+
HuggingFace / Ollama / ONNX locals, third-party API EFs we lack keys
|
|
194
|
+
for, or no EF at all — is treated as UNSAFE, so callers refuse the dense
|
|
195
|
+
path rather than trigger a model download. Conservative by design: an
|
|
196
|
+
unknown embedder is unsafe.
|
|
197
|
+
"""
|
|
198
|
+
if self.embed_fn is not None:
|
|
199
|
+
return True
|
|
200
|
+
col = self._collection
|
|
201
|
+
if col is None:
|
|
202
|
+
return False
|
|
203
|
+
try:
|
|
204
|
+
ef = (col._model.configuration_json or {}).get("embedding_function") or {}
|
|
205
|
+
except Exception:
|
|
206
|
+
return False
|
|
207
|
+
return ef.get("name") in _SERVER_SIDE_EF_NAMES
|
|
208
|
+
|
|
179
209
|
@staticmethod
|
|
180
210
|
def _repair_cloud_embedding_function(collection: Any) -> None:
|
|
181
211
|
"""Attach a working EF when chromadb can't rebuild a Cloud hosted one.
|
|
@@ -10,6 +10,9 @@ from collections.abc import Callable
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
12
|
from benchmax.platform.credentials import TokenProvider, as_token_provider, env_token
|
|
13
|
+
from benchmax.rag.corpus.search_schema.search_exceptions import (
|
|
14
|
+
LocalEmbeddingDownloadDisallowedError,
|
|
15
|
+
)
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
class ChromaSearch:
|
|
@@ -113,19 +116,33 @@ class ChromaSearch:
|
|
|
113
116
|
) -> list[dict[str, Any]]:
|
|
114
117
|
"""Search and return structured results."""
|
|
115
118
|
client = self._get_client()
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
+
# Initialize the collection first so capabilities reflect the real index
|
|
120
|
+
# (BM25 downgrade) and the embedder config is readable below.
|
|
121
|
+
client.get_collection()
|
|
122
|
+
modes = client.modes
|
|
123
|
+
has_lexical = "lexical" in modes
|
|
124
|
+
|
|
125
|
+
# Never download a client-side embedding model at inference/rollout time.
|
|
126
|
+
# When a dense embed isn't safe — no embed_fn and no Chroma-hosted
|
|
127
|
+
# server-side embedding function — use the BM25 lexical index if the
|
|
128
|
+
# collection has one, otherwise refuse rather than fetch all-MiniLM.
|
|
129
|
+
if not client.dense_embed_is_safe():
|
|
130
|
+
if not has_lexical:
|
|
131
|
+
raise LocalEmbeddingDownloadDisallowedError(
|
|
132
|
+
"chroma", self._collection_name
|
|
133
|
+
)
|
|
134
|
+
mode = "lexical"
|
|
135
|
+
elif mode == "auto":
|
|
119
136
|
if "hybrid" in modes:
|
|
120
137
|
mode = "hybrid"
|
|
121
|
-
elif
|
|
138
|
+
elif has_lexical:
|
|
122
139
|
mode = "lexical"
|
|
123
140
|
else:
|
|
124
141
|
mode = "vector"
|
|
125
|
-
elif mode not in
|
|
142
|
+
elif mode not in modes:
|
|
126
143
|
raise ValueError(
|
|
127
144
|
f"ChromaSearch does not support mode '{mode}'. "
|
|
128
|
-
f"Available modes: {sorted(
|
|
145
|
+
f"Available modes: {sorted(modes)}"
|
|
129
146
|
)
|
|
130
147
|
|
|
131
148
|
if client.search_api and mode in ("lexical", "hybrid"):
|
|
@@ -17,6 +17,7 @@ from tqdm.auto import tqdm
|
|
|
17
17
|
from benchmax.rag.chunkers.models import Chunk, ChunkCollection
|
|
18
18
|
from benchmax.rag.corpus.search_schema.search_exceptions import (
|
|
19
19
|
InvalidSearchSpecError,
|
|
20
|
+
LocalEmbeddingDownloadDisallowedError,
|
|
20
21
|
UnsupportedSearchModeError,
|
|
21
22
|
)
|
|
22
23
|
from benchmax.rag.corpus.search_schema.search_types import (
|
|
@@ -642,23 +643,30 @@ class ChromaChunkSource:
|
|
|
642
643
|
# lack a BM25 index, in which case modes was downgraded to vector-only.
|
|
643
644
|
modes = self._current_modes()
|
|
644
645
|
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
#
|
|
649
|
-
#
|
|
650
|
-
#
|
|
651
|
-
#
|
|
652
|
-
#
|
|
653
|
-
#
|
|
654
|
-
if
|
|
655
|
-
|
|
646
|
+
has_lexical = "lexical" in modes
|
|
647
|
+
has_hybrid = "hybrid" in modes
|
|
648
|
+
|
|
649
|
+
# Hard rule: never let chromadb embed a query with a client-side model
|
|
650
|
+
# (it downloads all-MiniLM and crawls in constrained executors). When a
|
|
651
|
+
# dense embed isn't safe — no embed_fn and no Chroma-hosted server-side
|
|
652
|
+
# embedding function — use the BM25 lexical index if the collection has
|
|
653
|
+
# one, otherwise refuse. This covers every requested mode, including the
|
|
654
|
+
# linker's "inference" preference for vector.
|
|
655
|
+
if not self._chroma.dense_embed_is_safe():
|
|
656
|
+
if not has_lexical:
|
|
657
|
+
raise LocalEmbeddingDownloadDisallowedError(
|
|
658
|
+
"chroma", self._chroma.collection_name
|
|
659
|
+
)
|
|
660
|
+
use_hybrid = False
|
|
661
|
+
use_lexical = True
|
|
656
662
|
elif mode == "lexical":
|
|
657
663
|
use_hybrid = False
|
|
658
|
-
use_lexical =
|
|
664
|
+
use_lexical = has_lexical
|
|
665
|
+
elif mode == "vector":
|
|
666
|
+
use_hybrid = use_lexical = False
|
|
659
667
|
else: # "hybrid", None, or unrecognized -> best available
|
|
660
|
-
use_hybrid =
|
|
661
|
-
use_lexical =
|
|
668
|
+
use_hybrid = has_hybrid
|
|
669
|
+
use_lexical = has_lexical
|
|
662
670
|
|
|
663
671
|
# Batch-embed all queries when embed_fn available and vectors needed
|
|
664
672
|
vectors: list[list[float]] | None = None
|
|
@@ -60,9 +60,17 @@ class PineconeIndexClient:
|
|
|
60
60
|
embed_model: Pinecone hosted embedding model name. Ignored when
|
|
61
61
|
``embed_fn`` is provided. Defaults to
|
|
62
62
|
``"multilingual-e5-large"``.
|
|
63
|
-
field_mapping:
|
|
64
|
-
field names
|
|
65
|
-
|
|
63
|
+
field_mapping: Low-level escape hatch — maps *Pinecone metadata
|
|
64
|
+
field names* → *internal field names* for schemas that also
|
|
65
|
+
relocate structural fields (``file_path``, ``chunk_index``,
|
|
66
|
+
headers). For the common "my text is under a different key"
|
|
67
|
+
case, prefer ``content_field``.
|
|
68
|
+
content_field: Pinecone metadata key holding the chunk text, for
|
|
69
|
+
"bring your own index" schemas that don't use ``content`` (e.g.
|
|
70
|
+
``"summary"`` or ``"passage"``). The canonical way to point at
|
|
71
|
+
your text column. Empty / None means the default ``content``
|
|
72
|
+
key. Raises if ``field_mapping`` already maps a *different*
|
|
73
|
+
key to ``content``.
|
|
66
74
|
"""
|
|
67
75
|
|
|
68
76
|
def __init__(
|
|
@@ -75,15 +83,35 @@ class PineconeIndexClient:
|
|
|
75
83
|
embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
|
|
76
84
|
embed_model: str = "multilingual-e5-large",
|
|
77
85
|
field_mapping: dict[str, str] | None = None,
|
|
86
|
+
content_field: str | None = None,
|
|
78
87
|
) -> None:
|
|
79
88
|
# Store config for lazy init / pickle safety.
|
|
80
89
|
self._api_key = api_key
|
|
81
90
|
self._index_name = index_name
|
|
82
91
|
self._index_host = index_host
|
|
83
|
-
|
|
92
|
+
# Platform codegen may pass None for an unset namespace; Pinecone's
|
|
93
|
+
# default namespace is "".
|
|
94
|
+
self._namespace = namespace or ""
|
|
84
95
|
self._embed_model = embed_model
|
|
85
96
|
self.embed_fn = embed_fn or self._build_pinecone_embed_fn()
|
|
86
|
-
|
|
97
|
+
mapping = dict(field_mapping) if field_mapping else dict(DEFAULT_FIELD_MAPPING)
|
|
98
|
+
if content_field and content_field != "content":
|
|
99
|
+
conflicting = [
|
|
100
|
+
k
|
|
101
|
+
for k, v in mapping.items()
|
|
102
|
+
if v == "content" and k not in ("content", content_field)
|
|
103
|
+
]
|
|
104
|
+
if field_mapping and conflicting:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"content_field={content_field!r} conflicts with field_mapping "
|
|
107
|
+
f"entries {conflicting} that already map to 'content'. "
|
|
108
|
+
"Specify the text column one way or the other."
|
|
109
|
+
)
|
|
110
|
+
# Drop the default content→content entry so the reverse mapping
|
|
111
|
+
# resolves "content" to the custom key unambiguously.
|
|
112
|
+
mapping.pop("content", None)
|
|
113
|
+
mapping[content_field] = "content"
|
|
114
|
+
self._field_mapping = mapping
|
|
87
115
|
# Reverse mapping: internal name → pinecone metadata key
|
|
88
116
|
self._reverse_mapping = {v: k for k, v in self._field_mapping.items()}
|
|
89
117
|
self._index: Any | None = None
|
|
@@ -91,6 +119,8 @@ class PineconeIndexClient:
|
|
|
91
119
|
self._known_ids: list[str] | None = None
|
|
92
120
|
# Cached vector dimension (detected on first embed or describe_index).
|
|
93
121
|
self._vector_dim: int | None = None
|
|
122
|
+
# Cached index vector type ("dense" | "sparse"), probed lazily.
|
|
123
|
+
self._vector_type: str | None = None
|
|
94
124
|
|
|
95
125
|
def _build_pinecone_embed_fn(self) -> Callable[[list[str]], list[list[float]]]:
|
|
96
126
|
"""Build an embed_fn using Pinecone's hosted Inference API.
|
|
@@ -157,6 +187,35 @@ class PineconeIndexClient:
|
|
|
157
187
|
self._index = pc.Index(self._index_name)
|
|
158
188
|
return self._index
|
|
159
189
|
|
|
190
|
+
def vector_type(self) -> str:
|
|
191
|
+
"""Return the index vector type, ``"dense"`` or ``"sparse"``.
|
|
192
|
+
|
|
193
|
+
Probes the index via ``describe_index_stats`` on first call and
|
|
194
|
+
caches the result.
|
|
195
|
+
"""
|
|
196
|
+
if self._vector_type is None:
|
|
197
|
+
index = self._get_index()
|
|
198
|
+
stats = index.describe_index_stats()
|
|
199
|
+
self._vector_type = getattr(stats, "vector_type", None) or "dense"
|
|
200
|
+
return self._vector_type
|
|
201
|
+
|
|
202
|
+
def namespace_vector_count(self) -> int:
|
|
203
|
+
"""Return the vector count for this client's namespace.
|
|
204
|
+
|
|
205
|
+
Scoped to the namespace, NOT the index-wide total — an index-wide
|
|
206
|
+
count would disagree with what list/fetch/query in this namespace
|
|
207
|
+
can actually see. The SDK keys the default namespace as
|
|
208
|
+
``"__default__"`` (the REST API uses ``""``).
|
|
209
|
+
"""
|
|
210
|
+
stats = self._get_index().describe_index_stats()
|
|
211
|
+
namespaces = getattr(stats, "namespaces", None) or {}
|
|
212
|
+
ns_stats = namespaces.get(self._namespace or "__default__")
|
|
213
|
+
if ns_stats is None and not self._namespace:
|
|
214
|
+
ns_stats = namespaces.get("")
|
|
215
|
+
if ns_stats is None:
|
|
216
|
+
return 0
|
|
217
|
+
return int(getattr(ns_stats, "vector_count", 0) or 0)
|
|
218
|
+
|
|
160
219
|
def zero_vector(self) -> list[float]:
|
|
161
220
|
"""Return a zero-vector with the correct dimension for this index.
|
|
162
221
|
|
|
@@ -168,6 +227,12 @@ class PineconeIndexClient:
|
|
|
168
227
|
index = self._get_index()
|
|
169
228
|
stats = index.describe_index_stats()
|
|
170
229
|
self._vector_dim = stats.dimension
|
|
230
|
+
if self._vector_dim is None:
|
|
231
|
+
# Sparse indexes have no fixed dimension.
|
|
232
|
+
raise ValueError(
|
|
233
|
+
f"Pinecone index '{self._index_name}' has no dimension — it is "
|
|
234
|
+
"a sparse index, which has no dense zero-vector."
|
|
235
|
+
)
|
|
171
236
|
return [0.0] * self._vector_dim
|
|
172
237
|
|
|
173
238
|
# ------------------------------------------------------------------
|
|
@@ -305,6 +370,14 @@ class PineconeIndexClient:
|
|
|
305
370
|
include_metadata: bool = True,
|
|
306
371
|
) -> Any:
|
|
307
372
|
"""Run a vector query against the index."""
|
|
373
|
+
if self.vector_type() == "sparse":
|
|
374
|
+
# A dense query vector against a sparse index is rejected by
|
|
375
|
+
# Pinecone with an opaque error; fail with an actionable one.
|
|
376
|
+
raise ValueError(
|
|
377
|
+
f"Pinecone index '{self._index_name}' is a sparse index — "
|
|
378
|
+
"search against sparse indexes is not supported yet. "
|
|
379
|
+
"Use a dense index."
|
|
380
|
+
)
|
|
308
381
|
index = self._get_index()
|
|
309
382
|
kwargs: dict[str, Any] = {
|
|
310
383
|
"vector": vector,
|
|
@@ -36,6 +36,8 @@ class PineconeSearch:
|
|
|
36
36
|
embed_model: Pinecone hosted embedding model name. Ignored
|
|
37
37
|
when ``embed_fn`` is provided.
|
|
38
38
|
field_mapping: Maps Pinecone metadata keys to internal names.
|
|
39
|
+
content_field: Pinecone metadata key holding the chunk text — sugar
|
|
40
|
+
over ``field_mapping`` for BYO indexes that don't use ``content``.
|
|
39
41
|
token_provider: Optional override — a callable resolving the key per
|
|
40
42
|
call, or a literal key (string sugar). Defaults to reading
|
|
41
43
|
``PINECONE_API_KEY``.
|
|
@@ -50,6 +52,7 @@ class PineconeSearch:
|
|
|
50
52
|
embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
|
|
51
53
|
embed_model: str = "multilingual-e5-large",
|
|
52
54
|
field_mapping: dict[str, str] | None = None,
|
|
55
|
+
content_field: str | None = None,
|
|
53
56
|
token_provider: str | TokenProvider | None = None,
|
|
54
57
|
) -> None:
|
|
55
58
|
self._index_name = index_name
|
|
@@ -58,6 +61,7 @@ class PineconeSearch:
|
|
|
58
61
|
self._embed_fn = embed_fn
|
|
59
62
|
self._embed_model = embed_model
|
|
60
63
|
self._field_mapping = field_mapping
|
|
64
|
+
self._content_field = content_field
|
|
61
65
|
self._token_provider = as_token_provider(
|
|
62
66
|
token_provider, env_token("PINECONE_API_KEY")
|
|
63
67
|
)
|
|
@@ -75,6 +79,7 @@ class PineconeSearch:
|
|
|
75
79
|
embed_fn=self._embed_fn,
|
|
76
80
|
embed_model=self._embed_model,
|
|
77
81
|
field_mapping=self._field_mapping,
|
|
82
|
+
content_field=self._content_field,
|
|
78
83
|
)
|
|
79
84
|
return self._client
|
|
80
85
|
|
|
@@ -26,6 +26,9 @@ from .index_client import PineconeIndexClient
|
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|
|
28
28
|
|
|
29
|
+
#: Max IDs per vectors/fetch call — Pinecone caps fetch batches at 100.
|
|
30
|
+
_FETCH_BATCH_SIZE = 100
|
|
31
|
+
|
|
29
32
|
|
|
30
33
|
def _raw_to_chunk(raw: dict[str, Any]) -> Chunk:
|
|
31
34
|
"""Convert a raw dict from PineconeIndexClient to a Chunk."""
|
|
@@ -64,8 +67,13 @@ class PineconeChunkSource:
|
|
|
64
67
|
embed_model: Pinecone hosted embedding model name. Ignored when
|
|
65
68
|
``embed_fn`` is provided. Defaults to
|
|
66
69
|
``"multilingual-e5-large"``.
|
|
67
|
-
field_mapping:
|
|
68
|
-
|
|
70
|
+
field_mapping: Low-level escape hatch — maps Pinecone metadata field
|
|
71
|
+
names to internal names when structural fields (``file_path``,
|
|
72
|
+
``chunk_index``, headers) are also relocated. For the common
|
|
73
|
+
case, prefer ``content_field``.
|
|
74
|
+
content_field: Pinecone metadata key holding the chunk text — the
|
|
75
|
+
canonical way to point at your text column for pre-existing
|
|
76
|
+
indexes that don't use ``content``.
|
|
69
77
|
|
|
70
78
|
Example:
|
|
71
79
|
>>> # Using Pinecone's built-in embeddings (simplest)
|
|
@@ -82,12 +90,12 @@ class PineconeChunkSource:
|
|
|
82
90
|
... embed_fn=my_embed_fn,
|
|
83
91
|
... )
|
|
84
92
|
|
|
85
|
-
>>> # Pre-existing index
|
|
93
|
+
>>> # Pre-existing index whose text lives under another key
|
|
86
94
|
>>> source = PineconeChunkSource(
|
|
87
95
|
... api_key="pcsk_...",
|
|
88
96
|
... index_name="product-catalog",
|
|
89
97
|
... embed_model="llama-text-embed-v2",
|
|
90
|
-
...
|
|
98
|
+
... content_field="description",
|
|
91
99
|
... )
|
|
92
100
|
"""
|
|
93
101
|
|
|
@@ -101,6 +109,7 @@ class PineconeChunkSource:
|
|
|
101
109
|
embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
|
|
102
110
|
embed_model: str = "multilingual-e5-large",
|
|
103
111
|
field_mapping: dict[str, str] | None = None,
|
|
112
|
+
content_field: str | None = None,
|
|
104
113
|
) -> None:
|
|
105
114
|
self._client = PineconeIndexClient(
|
|
106
115
|
api_key=api_key,
|
|
@@ -110,6 +119,7 @@ class PineconeChunkSource:
|
|
|
110
119
|
embed_fn=embed_fn,
|
|
111
120
|
embed_model=embed_model,
|
|
112
121
|
field_mapping=field_mapping,
|
|
122
|
+
content_field=content_field,
|
|
113
123
|
)
|
|
114
124
|
self._files = FileAwareness(self._client)
|
|
115
125
|
|
|
@@ -237,40 +247,56 @@ class PineconeChunkSource:
|
|
|
237
247
|
# ------------------------------------------------------------------
|
|
238
248
|
|
|
239
249
|
def get_chunk_count(self) -> int:
|
|
240
|
-
"""Return the
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
250
|
+
"""Return the number of vectors in the configured namespace.
|
|
251
|
+
|
|
252
|
+
Scoped to the namespace this source reads from — an index-wide
|
|
253
|
+
total would disagree with what sampling/search can actually see.
|
|
254
|
+
"""
|
|
255
|
+
return self._client.namespace_vector_count()
|
|
244
256
|
|
|
245
257
|
def sample_chunks(self, n: int, min_chars: int = 0) -> list[Chunk]:
|
|
246
258
|
"""Return n randomly sampled chunks, optionally filtered by
|
|
247
259
|
minimum length.
|
|
248
260
|
|
|
249
|
-
|
|
250
|
-
|
|
261
|
+
Samples uniformly from the paginated ID listing and hydrates the
|
|
262
|
+
sample via fetch — no query vector involved, so the draw is
|
|
263
|
+
genuinely uniform (not nearest-to-a-random-point) and works for
|
|
264
|
+
dense and sparse indexes alike.
|
|
251
265
|
"""
|
|
252
|
-
#
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
# Fetch more than needed to allow for min_chars filtering
|
|
257
|
-
fetch_k = min(n * 3, 10000) if min_chars > 0 else min(n, 10000)
|
|
258
|
-
result = self._client.query(
|
|
259
|
-
vector=rand_vec,
|
|
260
|
-
top_k=fetch_k,
|
|
261
|
-
include_metadata=True,
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
matches = result.matches or []
|
|
265
|
-
if not matches:
|
|
266
|
+
# Oversample when a length filter will discard part of the draw
|
|
267
|
+
fetch_n = min(n * 3, 10000) if min_chars > 0 else min(n, 10000)
|
|
268
|
+
ids = self._client.sample_ids(fetch_n)
|
|
269
|
+
if not ids:
|
|
266
270
|
return []
|
|
267
271
|
|
|
268
|
-
|
|
272
|
+
raws: list[dict[str, Any]] = []
|
|
273
|
+
for batch_start in range(0, len(ids), _FETCH_BATCH_SIZE):
|
|
274
|
+
raws.extend(
|
|
275
|
+
self._client.fetch_by_ids_raw(
|
|
276
|
+
ids[batch_start : batch_start + _FETCH_BATCH_SIZE]
|
|
277
|
+
)
|
|
278
|
+
)
|
|
279
|
+
chunks = [_raw_to_chunk(r) for r in raws]
|
|
280
|
+
|
|
281
|
+
# Every fetched record decoding to empty content means the text key
|
|
282
|
+
# is wrong (BYO index whose schema doesn't use the configured field),
|
|
283
|
+
# not that the corpus is empty. Without this, the pipeline dies later
|
|
284
|
+
# with an unactionable "No eligible chunks were found".
|
|
285
|
+
if chunks and all(not c.content for c in chunks):
|
|
286
|
+
content_key = self._client._pc_field("content")
|
|
287
|
+
seen_keys = sorted(
|
|
288
|
+
{k for r in raws for k in r.get("metadata", {}) if not k.startswith("_")}
|
|
289
|
+
)
|
|
290
|
+
raise ValueError(
|
|
291
|
+
f"No text found under metadata field '{content_key}' in any "
|
|
292
|
+
f"sampled record. This index's metadata fields are: "
|
|
293
|
+
f"{seen_keys}. Set content_field to the one holding the "
|
|
294
|
+
f"chunk text."
|
|
295
|
+
)
|
|
269
296
|
|
|
270
297
|
if min_chars > 0:
|
|
271
298
|
chunks = [c for c in chunks if len(c.content) >= min_chars]
|
|
272
299
|
|
|
273
|
-
# Shuffle to avoid bias from similarity ordering
|
|
274
300
|
random.shuffle(chunks)
|
|
275
301
|
return chunks[:n]
|
|
276
302
|
|
|
@@ -43,3 +43,21 @@ class UnsupportedSearchModeError(ValueError):
|
|
|
43
43
|
f"[{backend}] unsupported search mode '{mode}'. "
|
|
44
44
|
f"Supported modes: {sorted(supported_modes)}"
|
|
45
45
|
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class LocalEmbeddingDownloadDisallowedError(RuntimeError):
|
|
49
|
+
"""Raised when serving a search would download a client-side embedding model.
|
|
50
|
+
|
|
51
|
+
The collection has no server-side (hosted) embedding function and no BM25
|
|
52
|
+
index, and the caller supplied no ``embed_fn`` — so embedding a text query
|
|
53
|
+
would make chromadb download and run a local model (e.g. all-MiniLM). We
|
|
54
|
+
refuse rather than trigger that download.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, backend: str, collection: str):
|
|
58
|
+
super().__init__(
|
|
59
|
+
f"[{backend}] collection {collection!r} has no server-side embedding "
|
|
60
|
+
"function and no BM25 index, so search would download a local "
|
|
61
|
+
"embedding model. Re-ingest the corpus with a hosted embedder "
|
|
62
|
+
"(chroma-cloud-qwen) or a BM25 index, or supply an embed_fn."
|
|
63
|
+
)
|
|
@@ -19,6 +19,27 @@ from benchmax.rag.corpus.search_schema.search_types import (
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def resolve_content_attr(
|
|
23
|
+
content_attr: list[str] | None, content_field: str | None
|
|
24
|
+
) -> list[str] | None:
|
|
25
|
+
"""Resolve the ``content_field`` sugar against an explicit ``content_attr``.
|
|
26
|
+
|
|
27
|
+
``content_field`` is the canonical single-column param; ``content_attr``
|
|
28
|
+
is the low-level multi-field escape hatch. Specifying the text column
|
|
29
|
+
both ways with different values raises instead of silently picking a
|
|
30
|
+
winner.
|
|
31
|
+
"""
|
|
32
|
+
if not content_field:
|
|
33
|
+
return content_attr
|
|
34
|
+
if content_attr is not None and content_attr != [content_field]:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
f"content_field={content_field!r} conflicts with "
|
|
37
|
+
f"content_attr={content_attr!r}. Specify the text column one way "
|
|
38
|
+
"or the other."
|
|
39
|
+
)
|
|
40
|
+
return [content_field]
|
|
41
|
+
|
|
42
|
+
|
|
22
43
|
class TpufNamespace:
|
|
23
44
|
"""Thin wrapper around a Turbopuffer namespace.
|
|
24
45
|
|
|
@@ -30,7 +30,12 @@ class TpufSearch:
|
|
|
30
30
|
Args:
|
|
31
31
|
namespace: Turbopuffer namespace name.
|
|
32
32
|
region: Turbopuffer region (default ``"aws-us-east-1"``).
|
|
33
|
-
content_attr:
|
|
33
|
+
content_attr: Low-level escape hatch — list of BM25-indexed content
|
|
34
|
+
fields for multi-field schemas. Prefer ``content_field``.
|
|
35
|
+
content_field: Turbopuffer attribute holding the chunk text — the
|
|
36
|
+
canonical single-column param. Must be BM25-indexed for lexical
|
|
37
|
+
search. Raises if ``content_attr`` is also supplied with a
|
|
38
|
+
different value.
|
|
34
39
|
embed_fn: Custom embedding function. Required for vector/hybrid.
|
|
35
40
|
vector_attr: Vector attribute name (default ``"vector"``).
|
|
36
41
|
distance_metric: Distance metric (default ``"cosine_distance"``).
|
|
@@ -48,11 +53,14 @@ class TpufSearch:
|
|
|
48
53
|
embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
|
|
49
54
|
vector_attr: str = "vector",
|
|
50
55
|
distance_metric: str = "cosine_distance",
|
|
56
|
+
content_field: str | None = None,
|
|
51
57
|
token_provider: str | TokenProvider | None = None,
|
|
52
58
|
) -> None:
|
|
59
|
+
from .namespace import resolve_content_attr
|
|
60
|
+
|
|
53
61
|
self._namespace = namespace
|
|
54
62
|
self._region = region
|
|
55
|
-
self._content_attr = content_attr
|
|
63
|
+
self._content_attr = resolve_content_attr(content_attr, content_field)
|
|
56
64
|
self._embed_fn = embed_fn
|
|
57
65
|
self._vector_attr = vector_attr
|
|
58
66
|
self._distance_metric = distance_metric
|
|
@@ -92,7 +100,6 @@ class TpufSearch:
|
|
|
92
100
|
top_k: int = 10,
|
|
93
101
|
) -> list[dict[str, Any]]:
|
|
94
102
|
"""Search and return structured results."""
|
|
95
|
-
ns = self._get_client()
|
|
96
103
|
modes = self.available_modes
|
|
97
104
|
content_fields = self._content_attr or ["content"]
|
|
98
105
|
|
|
@@ -111,6 +118,11 @@ class TpufSearch:
|
|
|
111
118
|
f"{'Provide embed_fn for vector/hybrid.' if mode in ('vector', 'hybrid') else ''}"
|
|
112
119
|
)
|
|
113
120
|
|
|
121
|
+
# Validate the request before constructing the client — an invalid
|
|
122
|
+
# mode should fail as such, not as a missing-credential error from
|
|
123
|
+
# the token provider.
|
|
124
|
+
ns = self._get_client()
|
|
125
|
+
|
|
114
126
|
if mode == "lexical":
|
|
115
127
|
rank_by = [content_fields[0], "BM25", query]
|
|
116
128
|
result = ns.query(rank_by=rank_by, top_k=top_k, include_attributes=True)
|
|
@@ -23,7 +23,7 @@ from benchmax.rag.corpus.search_schema.search_types import (
|
|
|
23
23
|
|
|
24
24
|
from .files import FileAwareness
|
|
25
25
|
from .filter_mapper import to_turbopuffer_filters
|
|
26
|
-
from .namespace import TpufNamespace
|
|
26
|
+
from .namespace import TpufNamespace, resolve_content_attr
|
|
27
27
|
|
|
28
28
|
_DEFAULT_RELATED_SEARCH_MODE: SearchMode = "lexical"
|
|
29
29
|
_HYBRID_FUSION_RRF_K = 60.0
|
|
@@ -50,10 +50,15 @@ class TpufChunkSource:
|
|
|
50
50
|
api_key: Turbopuffer API key
|
|
51
51
|
namespace: Turbopuffer namespace name
|
|
52
52
|
region: Turbopuffer region (default "aws-us-east-1")
|
|
53
|
-
content_attr:
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
content_attr: Low-level escape hatch — list of Turbopuffer attribute
|
|
54
|
+
names to use as the chunk's searchable text content (multi-field
|
|
55
|
+
schemas, e.g. ["title", "content"]). For the common single-column
|
|
56
|
+
case, prefer ``content_field``. Defaults to ["content"].
|
|
57
|
+
content_field: Turbopuffer attribute holding the chunk text — the
|
|
58
|
+
canonical way to point at your text column for pre-existing
|
|
59
|
+
namespaces that don't use ``content``. Must be BM25-indexed for
|
|
60
|
+
lexical search. Raises if ``content_attr`` is also supplied with
|
|
61
|
+
a different value.
|
|
57
62
|
vector_attr: Name of the vector attribute in the namespace. Defaults to
|
|
58
63
|
"vector". Set this if your namespace stores embeddings under a
|
|
59
64
|
different attribute name.
|
|
@@ -64,11 +69,11 @@ class TpufChunkSource:
|
|
|
64
69
|
>>> source.populate_from_folder("./docs", embed_fn=my_embed_fn)
|
|
65
70
|
>>> chunks = source.sample_chunks(n=10, min_chars=400)
|
|
66
71
|
|
|
67
|
-
>>> # Pre-existing namespace
|
|
72
|
+
>>> # Pre-existing namespace whose text lives under another key
|
|
68
73
|
>>> source = TpufChunkSource(
|
|
69
74
|
... api_key="tpuf_...",
|
|
70
75
|
... namespace="product-catalog",
|
|
71
|
-
...
|
|
76
|
+
... content_field="description",
|
|
72
77
|
... )
|
|
73
78
|
"""
|
|
74
79
|
|
|
@@ -81,12 +86,13 @@ class TpufChunkSource:
|
|
|
81
86
|
embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
|
|
82
87
|
vector_attr: str = "vector",
|
|
83
88
|
distance_metric: str = "cosine_distance",
|
|
89
|
+
content_field: str | None = None,
|
|
84
90
|
) -> None:
|
|
85
91
|
self._client = TpufNamespace(
|
|
86
92
|
api_key=api_key,
|
|
87
93
|
namespace=namespace,
|
|
88
94
|
region=region,
|
|
89
|
-
content_attr=content_attr,
|
|
95
|
+
content_attr=resolve_content_attr(content_attr, content_field),
|
|
90
96
|
embed_fn=embed_fn,
|
|
91
97
|
vector_attr=vector_attr,
|
|
92
98
|
distance_metric=distance_metric,
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
benchmax/bundle.py,sha256=
|
|
1
|
+
benchmax/bundle.py,sha256=HJ0ZCojI6DRSGqgF6uMNazDsQJM6o5rqsfoTKV0kZAU,13879
|
|
2
2
|
benchmax/cli.py,sha256=N9gC_ilTutbF7nNplWo7-e-hw674PbBsw_iuCtt0xyA,2366
|
|
3
3
|
benchmax/config.py,sha256=qTtr8-VO0XjjxKfXh0jE58bmpuw0UnirYI_8aH2gb3g,2112
|
|
4
4
|
benchmax/envs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
benchmax/envs/base_env.py,sha256=FoUgWsNGeNpTHeOop0bklRjLfHA90Yi7MW8zCaNh_V0,8976
|
|
6
6
|
benchmax/envs/example_id.py,sha256=WU967Pt2kFvn-W4k5BC6BvKyrTEqioLr7IyWZ3RjGgU,5685
|
|
7
7
|
benchmax/envs/logging.py,sha256=QnXADCp0vWoV_-MK91yX5OFu6GwgIE98dvhaQTPawqQ,5053
|
|
8
|
-
benchmax/envs/reward_helpers.py,sha256
|
|
8
|
+
benchmax/envs/reward_helpers.py,sha256=-pDqYBazvum8cc8KX7Q_Z0C-Daf3_4TVZuWt-ywhqyY,7364
|
|
9
9
|
benchmax/envs/types.py,sha256=sGKKibQJZQj9RYkFpB3vaUY75tdoHet8yUmdzpZ0SVk,4389
|
|
10
10
|
benchmax/envs/crm/crm_env.py,sha256=ltUtpA45YB_A_hYEpjFTp0nZKwkUvvLSLOAVkaUNz9E,4707
|
|
11
11
|
benchmax/envs/crm/workdir/reward_fn.py,sha256=RY_iy347j79xX4gyCGI7WS0qPmut8Th2rqOiErVbDro,5439
|
|
@@ -32,8 +32,8 @@ benchmax/envs/mcp/provisioners/skypilot_provisioner.py,sha256=ACHnzNZE7GfL1WIWf7
|
|
|
32
32
|
benchmax/envs/mcp/provisioners/utils.py,sha256=ORWJKtPzeS-IdD35p8aZyLMG2RxiB9BAFmU-0pVqiWw,3467
|
|
33
33
|
benchmax/envs/postgres_search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
benchmax/envs/postgres_search/linker_env.py,sha256=B3cn0TpiqgrYL5NvOQYW3Yxy5DdxPw1kmIgqDs-8Buo,8535
|
|
35
|
-
benchmax/envs/postgres_search/search_env.py,sha256=
|
|
36
|
-
benchmax/envs/telestich/example.py,sha256=
|
|
35
|
+
benchmax/envs/postgres_search/search_env.py,sha256=IWpqbFr4hjaN_DzdeRchvwvf9qVj5Ut5D-eOsGPyWKQ,19917
|
|
36
|
+
benchmax/envs/telestich/example.py,sha256=cqHIBjD8g7H4-nmspWSKRB2rxeKPOIwkLn136Y04KfQ,28680
|
|
37
37
|
benchmax/envs/telestich/telestich_env.py,sha256=6p6GeyV-9ZIXrAX8zssMFjJgevkV5PfDLMZlslqO8js,61966
|
|
38
38
|
benchmax/envs/wikipedia/utils.py,sha256=YDlxpMfwiVpfMpiZet4kWoeKqNbgTBxeWVEYg5QY3Qs,2879
|
|
39
39
|
benchmax/envs/wikipedia/wiki_env.py,sha256=FigVZ0P0WVJG66CUZHOXq8tbSHWz8gNFr9cdeDePqfI,9288
|
|
@@ -45,13 +45,13 @@ benchmax/multi_model/inspector.py,sha256=j730w35YpZ4tGpzoVHza763GkUjyRxmqzRTwXqI
|
|
|
45
45
|
benchmax/multi_model/models.py,sha256=bYLBJ0uybsB_tg2jkWHQGhyqakb21bHgstnZZCcAq58,3218
|
|
46
46
|
benchmax/multi_model/pricing.py,sha256=x6Gz9dET7hBvQJb2SvQ8IVPvH-xenmoHrqp3Wpa4dI8,2122
|
|
47
47
|
benchmax/platform/__init__.py,sha256=GI4U-qPyU-lPCQDxA1mw6Lnqj58gP5PptxSts_h7uPo,926
|
|
48
|
-
benchmax/platform/client.py,sha256=
|
|
48
|
+
benchmax/platform/client.py,sha256=mA0bQiIPrkimaBkW4Zv1byPJyEPnc_O7-E2dossWy4k,54919
|
|
49
49
|
benchmax/platform/credentials.py,sha256=ABn44ChybWT9UQNd9_sc4yvfTdkw93521u9ZRI3H4Kk,15151
|
|
50
50
|
benchmax/platform/device_auth.py,sha256=OCWCzTdQLNeB57cBFNpqDea2YV846r_ukDDIWRDDER4,2670
|
|
51
51
|
benchmax/platform/exceptions.py,sha256=rkGrsSL2efqr3k15LOVv4k8mVfTXxjIDbWnKBKKz7Hk,1018
|
|
52
52
|
benchmax/platform/login.py,sha256=aQJ9_QJir4pq_pWl85_LoyMKtOAs3ZVjE7TtulwEQbg,3471
|
|
53
53
|
benchmax/platform/training_run.py,sha256=lzcUaigASRheASLN48BueCDu2fyESrWZdUejd0ZueIs,7000
|
|
54
|
-
benchmax/platform/validation.py,sha256=
|
|
54
|
+
benchmax/platform/validation.py,sha256=ZmQkhEyeBuAKT_ViCycQnyfaA2fAgmzQFxjIDzVRK3U,36736
|
|
55
55
|
benchmax/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
benchmax/prompts/tools.py,sha256=dhUkLfqNuFhwsZ3etNc_xiOIn_7CC8HhZatr657Rmq4,2978
|
|
57
57
|
benchmax/rag/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -64,17 +64,17 @@ benchmax/rag/corpus/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKV
|
|
|
64
64
|
benchmax/rag/corpus/search_client.py,sha256=171IqQriU6kuQqvSCDgNwOT8SR5pxUPMfCifarrgrFg,1859
|
|
65
65
|
benchmax/rag/corpus/source.py,sha256=dnmReLC8mccHDkg8ZytfXa4AFXrRMCg9v8E2UuVxt8E,4183
|
|
66
66
|
benchmax/rag/corpus/chroma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
benchmax/rag/corpus/chroma/client.py,sha256=
|
|
67
|
+
benchmax/rag/corpus/chroma/client.py,sha256=cYZZKQG09u_VfyjsP6UdCBh-RRNGKa9XisBN4OEejQk,20839
|
|
68
68
|
benchmax/rag/corpus/chroma/files.py,sha256=hSP-J2osPNBAvMZHOWipMVXaWN4tila_tsQaTEPNzgc,5567
|
|
69
69
|
benchmax/rag/corpus/chroma/filter_mapper.py,sha256=Y1FzDwDDg15LZ0-Uh1jzOVcSORiVUy5f1qiaVky3pJI,5074
|
|
70
|
-
benchmax/rag/corpus/chroma/search.py,sha256=
|
|
71
|
-
benchmax/rag/corpus/chroma/source.py,sha256=
|
|
70
|
+
benchmax/rag/corpus/chroma/search.py,sha256=iO8fBPk50vG3NmkCmAJ2tKnjP_wKnymV3fbfLjkIAJ8,7688
|
|
71
|
+
benchmax/rag/corpus/chroma/source.py,sha256=0azMLUvZS9g4jvxv_KxsPa3-ArQW5WHCq77CQh-qmqY,30440
|
|
72
72
|
benchmax/rag/corpus/pinecone/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
73
|
benchmax/rag/corpus/pinecone/files.py,sha256=lhas7-mQ622Ku36QvOavXguBweJyYl78wXIeb_LNqig,5728
|
|
74
74
|
benchmax/rag/corpus/pinecone/filter_mapper.py,sha256=exJ3G34QKeQo1rQ8Pu-iGL0XDXVxCW5dc3q0QoYfCo0,6454
|
|
75
|
-
benchmax/rag/corpus/pinecone/index_client.py,sha256=
|
|
76
|
-
benchmax/rag/corpus/pinecone/search.py,sha256=
|
|
77
|
-
benchmax/rag/corpus/pinecone/source.py,sha256=
|
|
75
|
+
benchmax/rag/corpus/pinecone/index_client.py,sha256=eZ6LzBg82X9HCvs9HUrgLRuuHPVtM_hoICYYvznC2dg,18045
|
|
76
|
+
benchmax/rag/corpus/pinecone/search.py,sha256=XoDKouj5Y-THW0cG00zUIgcpbJxRKv1y5mM5z2a-ZdA,4928
|
|
77
|
+
benchmax/rag/corpus/pinecone/source.py,sha256=aJey4d5Pz_FB-G9MXusODAnszun57HAztFVzu8RWC_8,21305
|
|
78
78
|
benchmax/rag/corpus/postgres/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
79
|
benchmax/rag/corpus/postgres/client.py,sha256=JbRUelHpXlZhDlXdEUWooF7UpqrHmf0uAjWoweJ-Dio,19821
|
|
80
80
|
benchmax/rag/corpus/postgres/exceptions.py,sha256=tykCt_4H9ewe5Qh_qzIg_PoSmuJpY-aox1QCku9PVmI,1572
|
|
@@ -85,14 +85,14 @@ benchmax/rag/corpus/postgres/source.py,sha256=6ptGHatOscYih42MZ9Wt8MQOrcIEQiJ1X5
|
|
|
85
85
|
benchmax/rag/corpus/search_schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
86
86
|
benchmax/rag/corpus/search_schema/builders.py,sha256=qAMiEOGOLR7xSXWFf12KqzYlrwBZchU_78vkRcOKa8k,1764
|
|
87
87
|
benchmax/rag/corpus/search_schema/dsl_parser.py,sha256=vMijm_nRKztIrsVQP-0OySuCKnrBsbUzet_pwwlU1T8,1586
|
|
88
|
-
benchmax/rag/corpus/search_schema/search_exceptions.py,sha256=
|
|
88
|
+
benchmax/rag/corpus/search_schema/search_exceptions.py,sha256=1ccbLnDAuSMxUnjtyBt-5iXwoKjI3xaZvk9xplCyNFw,2413
|
|
89
89
|
benchmax/rag/corpus/search_schema/search_types.py,sha256=UTkteugSx5OigDRZ8Xqe6itxLUXj2sVeIVxtYbnXGSg,5831
|
|
90
90
|
benchmax/rag/corpus/turbopuffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
91
|
benchmax/rag/corpus/turbopuffer/files.py,sha256=DP80-3NmdyOD34fyQxlzovpLRs_UU1ezQ7PItpY2Nlk,5807
|
|
92
92
|
benchmax/rag/corpus/turbopuffer/filter_mapper.py,sha256=r9YRn1A3XfoFUaD6KDRtUr-ufvMhGBA6VoR-YTuCvcY,4676
|
|
93
|
-
benchmax/rag/corpus/turbopuffer/namespace.py,sha256=
|
|
94
|
-
benchmax/rag/corpus/turbopuffer/search.py,sha256=
|
|
95
|
-
benchmax/rag/corpus/turbopuffer/source.py,sha256=
|
|
93
|
+
benchmax/rag/corpus/turbopuffer/namespace.py,sha256=LP0Gpwv91ZzgRhHaUBI0ITvutmS-er0W4o07QNyqSU8,12303
|
|
94
|
+
benchmax/rag/corpus/turbopuffer/search.py,sha256=MF0E3kWzAQf5C2fjWV3TyyFWpy_-3DwOBM0XBIkgZqM,8293
|
|
95
|
+
benchmax/rag/corpus/turbopuffer/source.py,sha256=f0G3RzJkdCxM0TSoyI_eB_JH-wFhHiPgl8SmINj3XbQ,28577
|
|
96
96
|
benchmax/rag/preprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
97
|
benchmax/rag/preprocess/email/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
98
98
|
benchmax/rag/preprocess/email/clean_bodies.py,sha256=OQ1fwsB3Dfy9iyzjX5ZpaHweB33Cs6hfwsULm8xeBZk,18097
|
|
@@ -160,9 +160,9 @@ benchmax/traces/braintrust/adapter.py,sha256=KTeN9qKLwZJJ8TY-KtSudd4J3_nySz1bRts
|
|
|
160
160
|
benchmax/traces/braintrust/message_extraction.py,sha256=seh3eM_qd9FUPmGOEMChUq_UAMtaIQHYSYDttMgY1go,8409
|
|
161
161
|
benchmax/utils/__init__.py,sha256=FWJVm6jt0m57HS-84bgrb2M-c_EFhf60rWayioUGges,402
|
|
162
162
|
benchmax/utils/checkpoint.py,sha256=htIw9iYjUUHpJqLLZ0y6K4_UYYAkZIx3vdQVY7juKDw,3148
|
|
163
|
-
benchmax-0.1.2.
|
|
164
|
-
benchmax-0.1.2.
|
|
165
|
-
benchmax-0.1.2.
|
|
166
|
-
benchmax-0.1.2.
|
|
167
|
-
benchmax-0.1.2.
|
|
168
|
-
benchmax-0.1.2.
|
|
163
|
+
benchmax-0.1.2.dev33.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
164
|
+
benchmax-0.1.2.dev33.dist-info/METADATA,sha256=X5P1IBK9INVKaO8xzBqoW8CQYQ2VIVD9IkaQV4tVjFQ,2775
|
|
165
|
+
benchmax-0.1.2.dev33.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
166
|
+
benchmax-0.1.2.dev33.dist-info/entry_points.txt,sha256=qtjqAQsHIwRIaLzwAhGTiRvI91CynwcUO5G95uQuDR4,47
|
|
167
|
+
benchmax-0.1.2.dev33.dist-info/top_level.txt,sha256=ryj4zoahvAKL3BnxOpfJNfyIzhvlED9KJ3Q3k4bb9jc,9
|
|
168
|
+
benchmax-0.1.2.dev33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|