benchmax 0.1.2.dev30__py3-none-any.whl → 0.1.2.dev33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
benchmax/bundle.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
3
4
  import inspect
4
5
  import io
5
6
  import json
@@ -76,6 +77,7 @@ def dump_bundle(
76
77
  pip_dependencies: list[str] | None = None,
77
78
  local_modules: list[ModuleType] | None = None,
78
79
  env_class_source: str | None = None,
80
+ auto_local_modules: bool = True,
79
81
  ) -> Bundle:
80
82
  """Pickle ``(env_class, constructor_args)`` and stamp metadata.
81
83
 
@@ -90,6 +92,10 @@ def dump_bundle(
90
92
  recover it — e.g. a class produced by ``exec()`` into an in-memory
91
93
  namespace, which has no source file on disk. When ``None``
92
94
  (default), source is introspected from ``env_class``.
95
+ auto_local_modules: When True (default), any local module the pickle
96
+ references but that wasn't passed in ``local_modules`` is imported
97
+ and pickled by value automatically (a warning names them). When
98
+ False, such a reference raises ``BundlingError`` instead.
93
99
 
94
100
  Raises:
95
101
  BundlingError: bad env_class, cloudpickle failure, or pickle references
@@ -124,6 +130,46 @@ def dump_bundle(
124
130
  except Exception:
125
131
  pass
126
132
 
133
+ if auto_local_modules and _unregistered_local_refs(pickled):
134
+ # Import each referenced local module and re-dump with it pickled by
135
+ # value. Loop because a by-value module can surface further local refs;
136
+ # registrations accumulate (and are torn down once at the end) so an
137
+ # earlier module stays by-value while we resolve the ones it pulled in.
138
+ seen: set[str] = {m.__name__ for m in local_modules}
139
+ registered: list[ModuleType] = []
140
+ with _BUNDLE_LOCK:
141
+ try:
142
+ for _ in range(10):
143
+ pending = [
144
+ m for m in _unregistered_local_refs(pickled) if m not in seen
145
+ ]
146
+ if not pending:
147
+ break
148
+ new_mods: list[ModuleType] = []
149
+ for name in pending:
150
+ seen.add(name) # unimportable names fall through to the guard
151
+ try:
152
+ new_mods.append(importlib.import_module(name))
153
+ except Exception:
154
+ pass
155
+ if not new_mods:
156
+ break
157
+ logger.warning(
158
+ "[bundle] %s: auto-bundling local module(s): %s ",
159
+ env_class.__name__,
160
+ ", ".join(sorted(m.__name__ for m in new_mods)),
161
+ )
162
+ for mod in new_mods:
163
+ cloudpickle.register_pickle_by_value(mod)
164
+ registered.append(mod)
165
+ pickled = cloudpickle.dumps((env_class, constructor_args))
166
+ finally:
167
+ for mod in registered:
168
+ try:
169
+ cloudpickle.unregister_pickle_by_value(mod)
170
+ except Exception:
171
+ pass
172
+
127
173
  risky = _unregistered_local_refs(pickled)
128
174
  if risky:
129
175
  msg = (
@@ -259,6 +305,15 @@ def _referenced_modules(pickled: bytes) -> set[str]:
259
305
  # Hooks find_class so we see every (module, name) the unpickler would import —
260
306
  # i.e. exactly what'd raise ModuleNotFoundError on a fresh interpreter. The stub
261
307
  # lets unpickling proceed past missing classes so we collect every ref.
308
+ #
309
+ # find_class alone has a blind spot: a bare ``import foo`` that leaves a
310
+ # module *object* in the env's globals is pickled as
311
+ # ``cloudpickle.subimport("foo")`` — the module name is a REDUCE argument,
312
+ # not a find_class path, so we'd only see ``cloudpickle.cloudpickle`` (which
313
+ # looks installed) and miss ``foo``. We shim subimport to record its arg and
314
+ # return a stub instead of importing, so a missing module is captured rather
315
+ # than aborting the whole load early. (``dynamic_subimport`` is by-value /
316
+ # self-contained — leave it to the real find_class so we don't flag it.)
262
317
  refs: set[str] = set()
263
318
 
264
319
  class _Stub:
@@ -271,9 +326,28 @@ def _referenced_modules(pickled: bytes) -> set[str]:
271
326
  def __reduce__(self) -> tuple:
272
327
  return (type(self), ())
273
328
 
329
+ def _recording_subimport(name: str, *a: Any, **kw: Any) -> ModuleType:
330
+ refs.add(name)
331
+ return ModuleType(str(name))
332
+
333
+ def _noop_setstate(obj: Any, *a: Any, **kw: Any) -> Any:
334
+ # cloudpickle's _make_skeleton_class resolves the class_tracker_id back
335
+ # to the *live* class (it was tracked when env_class was dumped), so the
336
+ # real ``_class_setstate``/``_function_setstate`` would setattr the
337
+ # reconstructed (stub-globals) members onto the live class/function —
338
+ # mutating the caller's class mid-bundle and poisoning any later dump.
339
+ # We only need the refs from ``state``, which are already recorded while
340
+ # it's unpickled; the setter itself is a no-op here.
341
+ return obj
342
+
274
343
  class _Recorder(pickle.Unpickler):
275
344
  def find_class(self, module: str, name: str) -> Any:
276
345
  refs.add(module)
346
+ if module.startswith("cloudpickle"):
347
+ if name == "subimport":
348
+ return _recording_subimport
349
+ if name in ("_class_setstate", "_function_setstate"):
350
+ return _noop_setstate
277
351
  try:
278
352
  return super().find_class(module, name)
279
353
  except Exception:
@@ -285,14 +285,8 @@ tags. Cite your sources inline using [Source: <source_id>] next to each claim.
285
285
  if not text.strip():
286
286
  return zeros
287
287
 
288
- # No final <answer> block → no answer to score. Return all-zero
289
- # rewards so conciseness / citations / efficiency can't accrue
290
- # from reasoning or tool-call text alone.
291
- answer = extract_answer_block(text)
292
- if not answer:
293
- return zeros
294
-
295
288
  t = task or {}
289
+ answer = extract_answer_block(text)
296
290
  prompt = str(t.get("question") or t.get("prompt") or "")
297
291
  gt_str = str(t.get("ground_truth") or "")
298
292
  reference_chunks = t.get("reference_chunks", [])
@@ -82,16 +82,9 @@ def extract_completion_text(completion: str | list[dict[str, Any]]) -> str:
82
82
 
83
83
 
84
84
  def extract_answer_block(text: str) -> str:
85
- """Extract content from ``<answer>`` tags.
86
-
87
- Returns the (stripped) tag contents when an ``<answer>…</answer>`` block
88
- is present, otherwise ``""``. A missing answer block is treated as "no
89
- final answer" rather than silently falling back to the full completion —
90
- consumers can gate rewards on a non-empty result. ``<answer></answer>``
91
- likewise yields ``""``.
92
- """
85
+ """Extract content from <answer> tags, or return full text."""
93
86
  match = _ANSWER_TAG_RE.search(text or "")
94
- return match.group(1).strip() if match else ""
87
+ return (match.group(1) if match else text).strip()
95
88
 
96
89
 
97
90
  def clip01(value: Any) -> float:
@@ -169,10 +162,8 @@ def citation_score(
169
162
  ref_ids.add(norm_sid)
170
163
  break
171
164
 
172
- if not cited:
165
+ if not cited or not ref_ids:
173
166
  return {"precision": 0.0, "recall": 0.0}
174
- if not ref_ids:
175
- return {"precision": 1.0, "recall": 0.0}
176
167
 
177
168
  precision = len(cited & ref_ids) / len(cited)
178
169
  recall = len(cited & ref_ids) / len(ref_ids)
@@ -12,10 +12,12 @@ Run it from the benchmax project root (the ``telestich`` extra pulls in the
12
12
  env's word-list / rhyme dependencies):
13
13
 
14
14
  cd core/benchmax
15
- CASTFORM_API_KEY=sk_... \
16
- uv run --extra telestich python -m benchmax.envs.telestich.example
15
+ uv run --extra telestich python -m benchmax.envs.telestich.example
17
16
 
18
- (``CASTFORM_LLM_API_KEY`` is optional it defaults to ``CASTFORM_API_KEY``.)
17
+ Auth is the device-auth session (``ensure_session()`` opens a browser login if
18
+ ``~/.castform`` has no valid session) — no API key needed. ``CASTFORM_API_KEY``
19
+ / ``CASTFORM_LLM_API_KEY`` are only consulted by the offline dataset-generation
20
+ helpers, not the launch path.
19
21
 
20
22
  This launches a real training run on the full committed seed dataset
21
23
  (~90/10 train/eval split).
@@ -63,6 +65,8 @@ CONCURRENCY = 15
63
65
  # pool) server-side. Supported: "Qwen/Qwen3.5-4B" (gpu4) or "Qwen/Qwen3.5-35B-A3B"
64
66
  # (gpu8). Override via TELESTICH_MODEL.
65
67
  MODEL = os.environ.get("TELESTICH_MODEL", "Qwen/Qwen3.5-4B")
68
+ # Run name — defaults to a unique telestich-full-<uuid>. Override via TELESTICH_RUN_NAME.
69
+ RUN_NAME = os.environ.get("TELESTICH_RUN_NAME", "")
66
70
 
67
71
  # (model, weight). Weights reflect observed reliability on our checks:
68
72
  # - Both grok models leak banned example words and rubber-stamp the CoT self-check.
@@ -558,12 +562,15 @@ def get_dataset():
558
562
  if __name__ == "__main__":
559
563
  import uuid
560
564
 
565
+ from benchmax.platform import ensure_session
561
566
  from benchmax.platform.client import TrainerClient
562
567
  from benchmax.platform.training_run import upload_training_run
563
568
  from benchmax.platform.validation import validate_env
564
569
 
565
- if not API_KEY:
566
- raise SystemExit("Set CASTFORM_API_KEY before running this example.")
570
+ # Device-auth session bootstrap: browser login if no credential resolves.
571
+ # After this the platform bearer comes from ~/.castform — no API key needed,
572
+ # so we pass api_key="" to the platform calls below (resolves via the seam).
573
+ ensure_session()
567
574
 
568
575
  print(f"Platform URL: {BASE_URL}")
569
576
  print(f"LLM URL: {LLM_BASE_URL}\n")
@@ -603,7 +610,7 @@ if __name__ == "__main__":
603
610
  eval_dataset=eval_data[:2],
604
611
  local_modules=local_modules,
605
612
  pip_dependencies=pip_dependencies,
606
- api_key=API_KEY,
613
+ api_key="", # session bearer via ensure_session()
607
614
  base_url=BASE_URL,
608
615
  llm_base_url=LLM_BASE_URL,
609
616
  llm_api_key="",
@@ -614,14 +621,14 @@ if __name__ == "__main__":
614
621
  )
615
622
 
616
623
  # 3. Bundle the env class and upload everything to platform storage.
617
- run_name = f"telestich-full-{uuid.uuid4().hex[:8]}"
624
+ run_name = RUN_NAME or f"telestich-full-{uuid.uuid4().hex[:8]}"
618
625
  print(f"\nUploading bundle + datasets as {run_name!r} ...")
619
626
  uploaded = upload_training_run(
620
627
  env_class=TelestichEnv,
621
628
  train_dataset=train_data,
622
629
  eval_dataset=eval_data,
623
630
  run_name=run_name,
624
- api_key=API_KEY,
631
+ api_key="", # session bearer via ensure_session()
625
632
  base_url=BASE_URL,
626
633
  local_modules=local_modules,
627
634
  constructor_args=constructor_args,
@@ -638,7 +645,7 @@ if __name__ == "__main__":
638
645
  # 4. Launch the training run. training_run_type="simple" + the `model` arg select
639
646
  # the trainer YAML/pool server-side (Qwen3.5-4B→gpu4, Qwen3.5-35B-A3B→gpu8).
640
647
  print(f"\nLaunching training run (model={MODEL}) ...")
641
- with TrainerClient(api_key=API_KEY, base_url=BASE_URL) as trainer:
648
+ with TrainerClient(api_key="", base_url=BASE_URL) as trainer:
642
649
  run_id = trainer.launch_training_run(
643
650
  training_run_type="simple",
644
651
  env_cls_path=uploaded.env_cls_path,
@@ -647,10 +654,10 @@ if __name__ == "__main__":
647
654
  eval_dataset_path=uploaded.eval_dataset_path,
648
655
  name=run_name,
649
656
  # num_epochs: passes over the train set (platform default is 5).
650
- # max_response_len 3000: a brief reason + 1-2 tool rounds + poem fits well
657
+ # max_rollout_len 3000: a brief reason + 1-2 tool rounds + poem fits well
651
658
  # under this; lowered from 4000 to cut off in-head enumeration rambles
652
659
  # sooner (they truncate to a 0-reward anyway).
653
- launcher_args={"model": MODEL, "max_response_len": 3000, "num_epochs": 10},
660
+ launcher_args={"model": MODEL, "max_rollout_len": 3000, "num_epochs": 10},
654
661
  )
655
662
 
656
663
  print(f"\n✓ Launched run_id={run_id}")
@@ -7,6 +7,7 @@ import hashlib
7
7
  import json
8
8
  import logging
9
9
  import textwrap
10
+ import warnings
10
11
  from collections.abc import Iterator
11
12
  from dataclasses import dataclass, field
12
13
  from pathlib import Path
@@ -404,7 +405,7 @@ class TrainerClient:
404
405
  eval_dataset_path: Path to the evaluation dataset
405
406
  name: Optional name for the training run
406
407
  launcher_args: Extra launcher args forwarded to the server
407
- (e.g. {"max_response_len": 4000}). The 4 required paths
408
+ (e.g. {"max_rollout_len": 4000}). The 4 required paths
408
409
  above always take precedence.
409
410
 
410
411
  Returns:
@@ -431,8 +432,11 @@ class TrainerClient:
431
432
  )
432
433
  self._handle_response_errors(response)
433
434
  body = response.json()
435
+ # Surface soft-cap / OOM-risk warnings via the warnings module (shown by
436
+ # default in notebooks/REPL) — a bare logger.warning is swallowed unless
437
+ # the caller configured logging.
434
438
  for warning in body.get("warnings", []) or []:
435
- logger.warning("launch warning: %s", warning)
439
+ warnings.warn(f"launch warning: {warning}", stacklevel=2)
436
440
  return body["runId"]
437
441
 
438
442
  def list_launch_args(self) -> list[LaunchArgSpec]:
@@ -7,6 +7,7 @@ the env class contract matches what the trainer expects.
7
7
  from __future__ import annotations
8
8
 
9
9
  import asyncio
10
+ import importlib
10
11
  import json
11
12
  import math
12
13
  import tempfile
@@ -578,6 +579,41 @@ def _run_local_checks(
578
579
  from benchmax.bundle import unregistered_local_refs
579
580
 
580
581
  risky = unregistered_local_refs(cloudpickle.dumps(env_class))
582
+ # Mirror dump_bundle's auto_local_modules: import + pickle-by-value
583
+ # any local refs the user didn't list, so validation reflects what
584
+ # the bundle will actually contain. Only genuinely unimportable refs
585
+ # (which the trainer also couldn't load) remain to be flagged.
586
+ auto: list[ModuleType] = []
587
+ if risky:
588
+ seen: set[str] = set()
589
+ try:
590
+ for _ in range(10):
591
+ pending = [
592
+ m
593
+ for m in unregistered_local_refs(cloudpickle.dumps(env_class))
594
+ if m not in seen
595
+ ]
596
+ if not pending:
597
+ break
598
+ new_mods: list[ModuleType] = []
599
+ for name in pending:
600
+ seen.add(name)
601
+ try:
602
+ new_mods.append(importlib.import_module(name))
603
+ except Exception:
604
+ pass
605
+ if not new_mods:
606
+ break
607
+ for mod in new_mods:
608
+ cloudpickle.register_pickle_by_value(mod)
609
+ auto.append(mod)
610
+ risky = unregistered_local_refs(cloudpickle.dumps(env_class))
611
+ finally:
612
+ for mod in auto:
613
+ try:
614
+ cloudpickle.unregister_pickle_by_value(mod)
615
+ except Exception:
616
+ pass
581
617
  if risky:
582
618
  print(
583
619
  f" \u2717 {env_class.__name__}: missing "
@@ -589,7 +625,13 @@ def _run_local_checks(
589
625
  )
590
626
  failed += 1
591
627
  else:
592
- print(" \u2713 no unregistered local-module references")
628
+ if auto:
629
+ names = ", ".join(sorted(m.__name__ for m in auto))
630
+ print(
631
+ f" \u2713 auto-bundled local module(s): {names} "
632
+ )
633
+ else:
634
+ print(" \u2713 no unregistered local-module references")
593
635
  passed += 1
594
636
  except Exception as exc:
595
637
  print(f" \u2717 local-modules check failed: {type(exc).__name__}: {exc}")
@@ -16,6 +16,13 @@ from typing import Any
16
16
  # Sparse-key name used when setting up BM25 schema
17
17
  BM25_KEY = "bm25_embedding"
18
18
 
19
+ # Embedding functions that run server-side on Chroma Cloud (embed.trychroma.com)
20
+ # — querying a collection that uses one never downloads a model. Everything else
21
+ # (default all-MiniLM, sentence-transformers / HF / Ollama / ONNX locals,
22
+ # third-party API EFs, or no EF) is treated as unsafe. Add hosted names here as
23
+ # they are verified server-side.
24
+ _SERVER_SIDE_EF_NAMES = frozenset({"chroma-cloud-qwen"})
25
+
19
26
 
20
27
  def has_search_api() -> bool:
21
28
  """Return True when the chromadb package exposes the Search API."""
@@ -176,6 +183,29 @@ class ChromaClient:
176
183
 
177
184
  return self._collection
178
185
 
186
+ def dense_embed_is_safe(self) -> bool:
187
+ """True when a dense (vector) query embeds WITHOUT downloading a model.
188
+
189
+ Safe only when we can produce vectors without a client-side model
190
+ download: either a caller-supplied ``embed_fn``, or a Chroma-hosted
191
+ server-side embedding function (embeds at embed.trychroma.com). Every
192
+ other embedder — chromadb's default all-MiniLM, sentence-transformers /
193
+ HuggingFace / Ollama / ONNX locals, third-party API EFs we lack keys
194
+ for, or no EF at all — is treated as UNSAFE, so callers refuse the dense
195
+ path rather than trigger a model download. Conservative by design: an
196
+ unknown embedder is unsafe.
197
+ """
198
+ if self.embed_fn is not None:
199
+ return True
200
+ col = self._collection
201
+ if col is None:
202
+ return False
203
+ try:
204
+ ef = (col._model.configuration_json or {}).get("embedding_function") or {}
205
+ except Exception:
206
+ return False
207
+ return ef.get("name") in _SERVER_SIDE_EF_NAMES
208
+
179
209
  @staticmethod
180
210
  def _repair_cloud_embedding_function(collection: Any) -> None:
181
211
  """Attach a working EF when chromadb can't rebuild a Cloud hosted one.
@@ -10,6 +10,9 @@ from collections.abc import Callable
10
10
  from typing import Any
11
11
 
12
12
  from benchmax.platform.credentials import TokenProvider, as_token_provider, env_token
13
+ from benchmax.rag.corpus.search_schema.search_exceptions import (
14
+ LocalEmbeddingDownloadDisallowedError,
15
+ )
13
16
 
14
17
 
15
18
  class ChromaSearch:
@@ -113,19 +116,33 @@ class ChromaSearch:
113
116
  ) -> list[dict[str, Any]]:
114
117
  """Search and return structured results."""
115
118
  client = self._get_client()
116
-
117
- if mode == "auto":
118
- modes = client.modes
119
+ # Initialize the collection first so capabilities reflect the real index
120
+ # (BM25 downgrade) and the embedder config is readable below.
121
+ client.get_collection()
122
+ modes = client.modes
123
+ has_lexical = "lexical" in modes
124
+
125
+ # Never download a client-side embedding model at inference/rollout time.
126
+ # When a dense embed isn't safe — no embed_fn and no Chroma-hosted
127
+ # server-side embedding function — use the BM25 lexical index if the
128
+ # collection has one, otherwise refuse rather than fetch all-MiniLM.
129
+ if not client.dense_embed_is_safe():
130
+ if not has_lexical:
131
+ raise LocalEmbeddingDownloadDisallowedError(
132
+ "chroma", self._collection_name
133
+ )
134
+ mode = "lexical"
135
+ elif mode == "auto":
119
136
  if "hybrid" in modes:
120
137
  mode = "hybrid"
121
- elif "lexical" in modes:
138
+ elif has_lexical:
122
139
  mode = "lexical"
123
140
  else:
124
141
  mode = "vector"
125
- elif mode not in client.modes:
142
+ elif mode not in modes:
126
143
  raise ValueError(
127
144
  f"ChromaSearch does not support mode '{mode}'. "
128
- f"Available modes: {sorted(client.modes)}"
145
+ f"Available modes: {sorted(modes)}"
129
146
  )
130
147
 
131
148
  if client.search_api and mode in ("lexical", "hybrid"):
@@ -17,6 +17,7 @@ from tqdm.auto import tqdm
17
17
  from benchmax.rag.chunkers.models import Chunk, ChunkCollection
18
18
  from benchmax.rag.corpus.search_schema.search_exceptions import (
19
19
  InvalidSearchSpecError,
20
+ LocalEmbeddingDownloadDisallowedError,
20
21
  UnsupportedSearchModeError,
21
22
  )
22
23
  from benchmax.rag.corpus.search_schema.search_types import (
@@ -642,23 +643,30 @@ class ChromaChunkSource:
642
643
  # lack a BM25 index, in which case modes was downgraded to vector-only.
643
644
  modes = self._current_modes()
644
645
 
645
- # Pick mode. "hybrid"/None use the best available strategy and KEEP
646
- # lexical enabled as a fallback: hybrid = dense + sparse, and when we
647
- # can't produce dense query vectors (no embed_fn, the usual remote case)
648
- # the per-query loop below degrades to the sparse/lexical leg which
649
- # needs no embedding. Only an explicit "vector" disables lexical; that's
650
- # the dense-only recovery path a caller uses after a lexical/hybrid
651
- # failure. (Disabling lexical for "hybrid" silently forced vector search,
652
- # which made remote collections dense-embed every query — slow, and on a
653
- # default-EF collection it pulls the all-MiniLM model.)
654
- if mode == "vector":
655
- use_hybrid = use_lexical = False
646
+ has_lexical = "lexical" in modes
647
+ has_hybrid = "hybrid" in modes
648
+
649
+ # Hard rule: never let chromadb embed a query with a client-side model
650
+ # (it downloads all-MiniLM and crawls in constrained executors). When a
651
+ # dense embed isn't safe no embed_fn and no Chroma-hosted server-side
652
+ # embedding function — use the BM25 lexical index if the collection has
653
+ # one, otherwise refuse. This covers every requested mode, including the
654
+ # linker's "inference" preference for vector.
655
+ if not self._chroma.dense_embed_is_safe():
656
+ if not has_lexical:
657
+ raise LocalEmbeddingDownloadDisallowedError(
658
+ "chroma", self._chroma.collection_name
659
+ )
660
+ use_hybrid = False
661
+ use_lexical = True
656
662
  elif mode == "lexical":
657
663
  use_hybrid = False
658
- use_lexical = "lexical" in modes
664
+ use_lexical = has_lexical
665
+ elif mode == "vector":
666
+ use_hybrid = use_lexical = False
659
667
  else: # "hybrid", None, or unrecognized -> best available
660
- use_hybrid = "hybrid" in modes
661
- use_lexical = "lexical" in modes
668
+ use_hybrid = has_hybrid
669
+ use_lexical = has_lexical
662
670
 
663
671
  # Batch-embed all queries when embed_fn available and vectors needed
664
672
  vectors: list[list[float]] | None = None
@@ -60,9 +60,17 @@ class PineconeIndexClient:
60
60
  embed_model: Pinecone hosted embedding model name. Ignored when
61
61
  ``embed_fn`` is provided. Defaults to
62
62
  ``"multilingual-e5-large"``.
63
- field_mapping: Maps *Pinecone metadata field names* → *internal
64
- field names*. Useful for "bring your own index" scenarios where
65
- the user's metadata schema differs from the default.
63
+ field_mapping: Low-level escape hatch maps *Pinecone metadata
64
+ field names* *internal field names* for schemas that also
65
+ relocate structural fields (``file_path``, ``chunk_index``,
66
+ headers). For the common "my text is under a different key"
67
+ case, prefer ``content_field``.
68
+ content_field: Pinecone metadata key holding the chunk text, for
69
+ "bring your own index" schemas that don't use ``content`` (e.g.
70
+ ``"summary"`` or ``"passage"``). The canonical way to point at
71
+ your text column. Empty / None means the default ``content``
72
+ key. Raises if ``field_mapping`` already maps a *different*
73
+ key to ``content``.
66
74
  """
67
75
 
68
76
  def __init__(
@@ -75,15 +83,35 @@ class PineconeIndexClient:
75
83
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
76
84
  embed_model: str = "multilingual-e5-large",
77
85
  field_mapping: dict[str, str] | None = None,
86
+ content_field: str | None = None,
78
87
  ) -> None:
79
88
  # Store config for lazy init / pickle safety.
80
89
  self._api_key = api_key
81
90
  self._index_name = index_name
82
91
  self._index_host = index_host
83
- self._namespace = namespace
92
+ # Platform codegen may pass None for an unset namespace; Pinecone's
93
+ # default namespace is "".
94
+ self._namespace = namespace or ""
84
95
  self._embed_model = embed_model
85
96
  self.embed_fn = embed_fn or self._build_pinecone_embed_fn()
86
- self._field_mapping = field_mapping or dict(DEFAULT_FIELD_MAPPING)
97
+ mapping = dict(field_mapping) if field_mapping else dict(DEFAULT_FIELD_MAPPING)
98
+ if content_field and content_field != "content":
99
+ conflicting = [
100
+ k
101
+ for k, v in mapping.items()
102
+ if v == "content" and k not in ("content", content_field)
103
+ ]
104
+ if field_mapping and conflicting:
105
+ raise ValueError(
106
+ f"content_field={content_field!r} conflicts with field_mapping "
107
+ f"entries {conflicting} that already map to 'content'. "
108
+ "Specify the text column one way or the other."
109
+ )
110
+ # Drop the default content→content entry so the reverse mapping
111
+ # resolves "content" to the custom key unambiguously.
112
+ mapping.pop("content", None)
113
+ mapping[content_field] = "content"
114
+ self._field_mapping = mapping
87
115
  # Reverse mapping: internal name → pinecone metadata key
88
116
  self._reverse_mapping = {v: k for k, v in self._field_mapping.items()}
89
117
  self._index: Any | None = None
@@ -91,6 +119,8 @@ class PineconeIndexClient:
91
119
  self._known_ids: list[str] | None = None
92
120
  # Cached vector dimension (detected on first embed or describe_index).
93
121
  self._vector_dim: int | None = None
122
+ # Cached index vector type ("dense" | "sparse"), probed lazily.
123
+ self._vector_type: str | None = None
94
124
 
95
125
  def _build_pinecone_embed_fn(self) -> Callable[[list[str]], list[list[float]]]:
96
126
  """Build an embed_fn using Pinecone's hosted Inference API.
@@ -157,6 +187,35 @@ class PineconeIndexClient:
157
187
  self._index = pc.Index(self._index_name)
158
188
  return self._index
159
189
 
190
+ def vector_type(self) -> str:
191
+ """Return the index vector type, ``"dense"`` or ``"sparse"``.
192
+
193
+ Probes the index via ``describe_index_stats`` on first call and
194
+ caches the result.
195
+ """
196
+ if self._vector_type is None:
197
+ index = self._get_index()
198
+ stats = index.describe_index_stats()
199
+ self._vector_type = getattr(stats, "vector_type", None) or "dense"
200
+ return self._vector_type
201
+
202
+ def namespace_vector_count(self) -> int:
203
+ """Return the vector count for this client's namespace.
204
+
205
+ Scoped to the namespace, NOT the index-wide total — an index-wide
206
+ count would disagree with what list/fetch/query in this namespace
207
+ can actually see. The SDK keys the default namespace as
208
+ ``"__default__"`` (the REST API uses ``""``).
209
+ """
210
+ stats = self._get_index().describe_index_stats()
211
+ namespaces = getattr(stats, "namespaces", None) or {}
212
+ ns_stats = namespaces.get(self._namespace or "__default__")
213
+ if ns_stats is None and not self._namespace:
214
+ ns_stats = namespaces.get("")
215
+ if ns_stats is None:
216
+ return 0
217
+ return int(getattr(ns_stats, "vector_count", 0) or 0)
218
+
160
219
  def zero_vector(self) -> list[float]:
161
220
  """Return a zero-vector with the correct dimension for this index.
162
221
 
@@ -168,6 +227,12 @@ class PineconeIndexClient:
168
227
  index = self._get_index()
169
228
  stats = index.describe_index_stats()
170
229
  self._vector_dim = stats.dimension
230
+ if self._vector_dim is None:
231
+ # Sparse indexes have no fixed dimension.
232
+ raise ValueError(
233
+ f"Pinecone index '{self._index_name}' has no dimension — it is "
234
+ "a sparse index, which has no dense zero-vector."
235
+ )
171
236
  return [0.0] * self._vector_dim
172
237
 
173
238
  # ------------------------------------------------------------------
@@ -305,6 +370,14 @@ class PineconeIndexClient:
305
370
  include_metadata: bool = True,
306
371
  ) -> Any:
307
372
  """Run a vector query against the index."""
373
+ if self.vector_type() == "sparse":
374
+ # A dense query vector against a sparse index is rejected by
375
+ # Pinecone with an opaque error; fail with an actionable one.
376
+ raise ValueError(
377
+ f"Pinecone index '{self._index_name}' is a sparse index — "
378
+ "search against sparse indexes is not supported yet. "
379
+ "Use a dense index."
380
+ )
308
381
  index = self._get_index()
309
382
  kwargs: dict[str, Any] = {
310
383
  "vector": vector,
@@ -36,6 +36,8 @@ class PineconeSearch:
36
36
  embed_model: Pinecone hosted embedding model name. Ignored
37
37
  when ``embed_fn`` is provided.
38
38
  field_mapping: Maps Pinecone metadata keys to internal names.
39
+ content_field: Pinecone metadata key holding the chunk text — sugar
40
+ over ``field_mapping`` for BYO indexes that don't use ``content``.
39
41
  token_provider: Optional override — a callable resolving the key per
40
42
  call, or a literal key (string sugar). Defaults to reading
41
43
  ``PINECONE_API_KEY``.
@@ -50,6 +52,7 @@ class PineconeSearch:
50
52
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
51
53
  embed_model: str = "multilingual-e5-large",
52
54
  field_mapping: dict[str, str] | None = None,
55
+ content_field: str | None = None,
53
56
  token_provider: str | TokenProvider | None = None,
54
57
  ) -> None:
55
58
  self._index_name = index_name
@@ -58,6 +61,7 @@ class PineconeSearch:
58
61
  self._embed_fn = embed_fn
59
62
  self._embed_model = embed_model
60
63
  self._field_mapping = field_mapping
64
+ self._content_field = content_field
61
65
  self._token_provider = as_token_provider(
62
66
  token_provider, env_token("PINECONE_API_KEY")
63
67
  )
@@ -75,6 +79,7 @@ class PineconeSearch:
75
79
  embed_fn=self._embed_fn,
76
80
  embed_model=self._embed_model,
77
81
  field_mapping=self._field_mapping,
82
+ content_field=self._content_field,
78
83
  )
79
84
  return self._client
80
85
 
@@ -26,6 +26,9 @@ from .index_client import PineconeIndexClient
26
26
 
27
27
  logger = logging.getLogger(__name__)
28
28
 
29
+ #: Max IDs per vectors/fetch call — Pinecone caps fetch batches at 100.
30
+ _FETCH_BATCH_SIZE = 100
31
+
29
32
 
30
33
  def _raw_to_chunk(raw: dict[str, Any]) -> Chunk:
31
34
  """Convert a raw dict from PineconeIndexClient to a Chunk."""
@@ -64,8 +67,13 @@ class PineconeChunkSource:
64
67
  embed_model: Pinecone hosted embedding model name. Ignored when
65
68
  ``embed_fn`` is provided. Defaults to
66
69
  ``"multilingual-e5-large"``.
67
- field_mapping: Maps Pinecone metadata field names to internal names.
68
- Useful for "bring your own index" scenarios.
70
+ field_mapping: Low-level escape hatch maps Pinecone metadata field
71
+ names to internal names when structural fields (``file_path``,
72
+ ``chunk_index``, headers) are also relocated. For the common
73
+ case, prefer ``content_field``.
74
+ content_field: Pinecone metadata key holding the chunk text — the
75
+ canonical way to point at your text column for pre-existing
76
+ indexes that don't use ``content``.
69
77
 
70
78
  Example:
71
79
  >>> # Using Pinecone's built-in embeddings (simplest)
@@ -82,12 +90,12 @@ class PineconeChunkSource:
82
90
  ... embed_fn=my_embed_fn,
83
91
  ... )
84
92
 
85
- >>> # Pre-existing index with custom field names
93
+ >>> # Pre-existing index whose text lives under another key
86
94
  >>> source = PineconeChunkSource(
87
95
  ... api_key="pcsk_...",
88
96
  ... index_name="product-catalog",
89
97
  ... embed_model="llama-text-embed-v2",
90
- ... field_mapping={"description": "content", "path": "file_path"},
98
+ ... content_field="description",
91
99
  ... )
92
100
  """
93
101
 
@@ -101,6 +109,7 @@ class PineconeChunkSource:
101
109
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
102
110
  embed_model: str = "multilingual-e5-large",
103
111
  field_mapping: dict[str, str] | None = None,
112
+ content_field: str | None = None,
104
113
  ) -> None:
105
114
  self._client = PineconeIndexClient(
106
115
  api_key=api_key,
@@ -110,6 +119,7 @@ class PineconeChunkSource:
110
119
  embed_fn=embed_fn,
111
120
  embed_model=embed_model,
112
121
  field_mapping=field_mapping,
122
+ content_field=content_field,
113
123
  )
114
124
  self._files = FileAwareness(self._client)
115
125
 
@@ -237,40 +247,56 @@ class PineconeChunkSource:
237
247
  # ------------------------------------------------------------------
238
248
 
239
249
  def get_chunk_count(self) -> int:
240
- """Return the total number of vectors in the index."""
241
- index = self._client._get_index()
242
- stats = index.describe_index_stats()
243
- return int(stats.total_vector_count or 0)
250
+ """Return the number of vectors in the configured namespace.
251
+
252
+ Scoped to the namespace this source reads from — an index-wide
253
+ total would disagree with what sampling/search can actually see.
254
+ """
255
+ return self._client.namespace_vector_count()
244
256
 
245
257
  def sample_chunks(self, n: int, min_chars: int = 0) -> list[Chunk]:
246
258
  """Return n randomly sampled chunks, optionally filtered by
247
259
  minimum length.
248
260
 
249
- Uses a random vector query to get pseudo-random results
250
- efficiently in a single API call.
261
+ Samples uniformly from the paginated ID listing and hydrates the
262
+ sample via fetch no query vector involved, so the draw is
263
+ genuinely uniform (not nearest-to-a-random-point) and works for
264
+ dense and sparse indexes alike.
251
265
  """
252
- # Generate a random vector for pseudo-random sampling
253
- dim = len(self._client.zero_vector())
254
- rand_vec = [random.gauss(0, 1) for _ in range(dim)]
255
-
256
- # Fetch more than needed to allow for min_chars filtering
257
- fetch_k = min(n * 3, 10000) if min_chars > 0 else min(n, 10000)
258
- result = self._client.query(
259
- vector=rand_vec,
260
- top_k=fetch_k,
261
- include_metadata=True,
262
- )
263
-
264
- matches = result.matches or []
265
- if not matches:
266
+ # Oversample when a length filter will discard part of the draw
267
+ fetch_n = min(n * 3, 10000) if min_chars > 0 else min(n, 10000)
268
+ ids = self._client.sample_ids(fetch_n)
269
+ if not ids:
266
270
  return []
267
271
 
268
- chunks = [_raw_to_chunk(self._client.match_to_raw(m)) for m in matches]
272
+ raws: list[dict[str, Any]] = []
273
+ for batch_start in range(0, len(ids), _FETCH_BATCH_SIZE):
274
+ raws.extend(
275
+ self._client.fetch_by_ids_raw(
276
+ ids[batch_start : batch_start + _FETCH_BATCH_SIZE]
277
+ )
278
+ )
279
+ chunks = [_raw_to_chunk(r) for r in raws]
280
+
281
+ # Every fetched record decoding to empty content means the text key
282
+ # is wrong (BYO index whose schema doesn't use the configured field),
283
+ # not that the corpus is empty. Without this, the pipeline dies later
284
+ # with an unactionable "No eligible chunks were found".
285
+ if chunks and all(not c.content for c in chunks):
286
+ content_key = self._client._pc_field("content")
287
+ seen_keys = sorted(
288
+ {k for r in raws for k in r.get("metadata", {}) if not k.startswith("_")}
289
+ )
290
+ raise ValueError(
291
+ f"No text found under metadata field '{content_key}' in any "
292
+ f"sampled record. This index's metadata fields are: "
293
+ f"{seen_keys}. Set content_field to the one holding the "
294
+ f"chunk text."
295
+ )
269
296
 
270
297
  if min_chars > 0:
271
298
  chunks = [c for c in chunks if len(c.content) >= min_chars]
272
299
 
273
- # Shuffle to avoid bias from similarity ordering
274
300
  random.shuffle(chunks)
275
301
  return chunks[:n]
276
302
 
@@ -43,3 +43,21 @@ class UnsupportedSearchModeError(ValueError):
43
43
  f"[{backend}] unsupported search mode '{mode}'. "
44
44
  f"Supported modes: {sorted(supported_modes)}"
45
45
  )
46
+
47
+
48
+ class LocalEmbeddingDownloadDisallowedError(RuntimeError):
49
+ """Raised when serving a search would download a client-side embedding model.
50
+
51
+ The collection has no server-side (hosted) embedding function and no BM25
52
+ index, and the caller supplied no ``embed_fn`` — so embedding a text query
53
+ would make chromadb download and run a local model (e.g. all-MiniLM). We
54
+ refuse rather than trigger that download.
55
+ """
56
+
57
+ def __init__(self, backend: str, collection: str):
58
+ super().__init__(
59
+ f"[{backend}] collection {collection!r} has no server-side embedding "
60
+ "function and no BM25 index, so search would download a local "
61
+ "embedding model. Re-ingest the corpus with a hosted embedder "
62
+ "(chroma-cloud-qwen) or a BM25 index, or supply an embed_fn."
63
+ )
@@ -19,6 +19,27 @@ from benchmax.rag.corpus.search_schema.search_types import (
19
19
  )
20
20
 
21
21
 
22
+ def resolve_content_attr(
23
+ content_attr: list[str] | None, content_field: str | None
24
+ ) -> list[str] | None:
25
+ """Resolve the ``content_field`` sugar against an explicit ``content_attr``.
26
+
27
+ ``content_field`` is the canonical single-column param; ``content_attr``
28
+ is the low-level multi-field escape hatch. Specifying the text column
29
+ both ways with different values raises instead of silently picking a
30
+ winner.
31
+ """
32
+ if not content_field:
33
+ return content_attr
34
+ if content_attr is not None and content_attr != [content_field]:
35
+ raise ValueError(
36
+ f"content_field={content_field!r} conflicts with "
37
+ f"content_attr={content_attr!r}. Specify the text column one way "
38
+ "or the other."
39
+ )
40
+ return [content_field]
41
+
42
+
22
43
  class TpufNamespace:
23
44
  """Thin wrapper around a Turbopuffer namespace.
24
45
 
@@ -30,7 +30,12 @@ class TpufSearch:
30
30
  Args:
31
31
  namespace: Turbopuffer namespace name.
32
32
  region: Turbopuffer region (default ``"aws-us-east-1"``).
33
- content_attr: List of BM25-indexed content fields.
33
+ content_attr: Low-level escape hatch — list of BM25-indexed content
34
+ fields for multi-field schemas. Prefer ``content_field``.
35
+ content_field: Turbopuffer attribute holding the chunk text — the
36
+ canonical single-column param. Must be BM25-indexed for lexical
37
+ search. Raises if ``content_attr`` is also supplied with a
38
+ different value.
34
39
  embed_fn: Custom embedding function. Required for vector/hybrid.
35
40
  vector_attr: Vector attribute name (default ``"vector"``).
36
41
  distance_metric: Distance metric (default ``"cosine_distance"``).
@@ -48,11 +53,14 @@ class TpufSearch:
48
53
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
49
54
  vector_attr: str = "vector",
50
55
  distance_metric: str = "cosine_distance",
56
+ content_field: str | None = None,
51
57
  token_provider: str | TokenProvider | None = None,
52
58
  ) -> None:
59
+ from .namespace import resolve_content_attr
60
+
53
61
  self._namespace = namespace
54
62
  self._region = region
55
- self._content_attr = content_attr
63
+ self._content_attr = resolve_content_attr(content_attr, content_field)
56
64
  self._embed_fn = embed_fn
57
65
  self._vector_attr = vector_attr
58
66
  self._distance_metric = distance_metric
@@ -92,7 +100,6 @@ class TpufSearch:
92
100
  top_k: int = 10,
93
101
  ) -> list[dict[str, Any]]:
94
102
  """Search and return structured results."""
95
- ns = self._get_client()
96
103
  modes = self.available_modes
97
104
  content_fields = self._content_attr or ["content"]
98
105
 
@@ -111,6 +118,11 @@ class TpufSearch:
111
118
  f"{'Provide embed_fn for vector/hybrid.' if mode in ('vector', 'hybrid') else ''}"
112
119
  )
113
120
 
121
+ # Validate the request before constructing the client — an invalid
122
+ # mode should fail as such, not as a missing-credential error from
123
+ # the token provider.
124
+ ns = self._get_client()
125
+
114
126
  if mode == "lexical":
115
127
  rank_by = [content_fields[0], "BM25", query]
116
128
  result = ns.query(rank_by=rank_by, top_k=top_k, include_attributes=True)
@@ -23,7 +23,7 @@ from benchmax.rag.corpus.search_schema.search_types import (
23
23
 
24
24
  from .files import FileAwareness
25
25
  from .filter_mapper import to_turbopuffer_filters
26
- from .namespace import TpufNamespace
26
+ from .namespace import TpufNamespace, resolve_content_attr
27
27
 
28
28
  _DEFAULT_RELATED_SEARCH_MODE: SearchMode = "lexical"
29
29
  _HYBRID_FUSION_RRF_K = 60.0
@@ -50,10 +50,15 @@ class TpufChunkSource:
50
50
  api_key: Turbopuffer API key
51
51
  namespace: Turbopuffer namespace name
52
52
  region: Turbopuffer region (default "aws-us-east-1")
53
- content_attr: List of Turbopuffer attribute names to use as the chunk's
54
- searchable text content. Defaults to ["content"]. For pre-existing
55
- namespaces, supply the BM25-indexed field(s), e.g. ["description"]
56
- or ["title", "content"].
53
+ content_attr: Low-level escape hatch list of Turbopuffer attribute
54
+ names to use as the chunk's searchable text content (multi-field
55
+ schemas, e.g. ["title", "content"]). For the common single-column
56
+ case, prefer ``content_field``. Defaults to ["content"].
57
+ content_field: Turbopuffer attribute holding the chunk text — the
58
+ canonical way to point at your text column for pre-existing
59
+ namespaces that don't use ``content``. Must be BM25-indexed for
60
+ lexical search. Raises if ``content_attr`` is also supplied with
61
+ a different value.
57
62
  vector_attr: Name of the vector attribute in the namespace. Defaults to
58
63
  "vector". Set this if your namespace stores embeddings under a
59
64
  different attribute name.
@@ -64,11 +69,11 @@ class TpufChunkSource:
64
69
  >>> source.populate_from_folder("./docs", embed_fn=my_embed_fn)
65
70
  >>> chunks = source.sample_chunks(n=10, min_chars=400)
66
71
 
67
- >>> # Pre-existing namespace with known BM25-indexed fields
72
+ >>> # Pre-existing namespace whose text lives under another key
68
73
  >>> source = TpufChunkSource(
69
74
  ... api_key="tpuf_...",
70
75
  ... namespace="product-catalog",
71
- ... content_attr=["description"],
76
+ ... content_field="description",
72
77
  ... )
73
78
  """
74
79
 
@@ -81,12 +86,13 @@ class TpufChunkSource:
81
86
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
82
87
  vector_attr: str = "vector",
83
88
  distance_metric: str = "cosine_distance",
89
+ content_field: str | None = None,
84
90
  ) -> None:
85
91
  self._client = TpufNamespace(
86
92
  api_key=api_key,
87
93
  namespace=namespace,
88
94
  region=region,
89
- content_attr=content_attr,
95
+ content_attr=resolve_content_attr(content_attr, content_field),
90
96
  embed_fn=embed_fn,
91
97
  vector_attr=vector_attr,
92
98
  distance_metric=distance_metric,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: benchmax
3
- Version: 0.1.2.dev30
3
+ Version: 0.1.2.dev33
4
4
  Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
5
5
  Author: castie@castform.com
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,11 +1,11 @@
1
- benchmax/bundle.py,sha256=Fr0gEWPRMPKVaHwHuxCN_UWWNxLxVPUqE1idKd3fgVE,9999
1
+ benchmax/bundle.py,sha256=HJ0ZCojI6DRSGqgF6uMNazDsQJM6o5rqsfoTKV0kZAU,13879
2
2
  benchmax/cli.py,sha256=N9gC_ilTutbF7nNplWo7-e-hw674PbBsw_iuCtt0xyA,2366
3
3
  benchmax/config.py,sha256=qTtr8-VO0XjjxKfXh0jE58bmpuw0UnirYI_8aH2gb3g,2112
4
4
  benchmax/envs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  benchmax/envs/base_env.py,sha256=FoUgWsNGeNpTHeOop0bklRjLfHA90Yi7MW8zCaNh_V0,8976
6
6
  benchmax/envs/example_id.py,sha256=WU967Pt2kFvn-W4k5BC6BvKyrTEqioLr7IyWZ3RjGgU,5685
7
7
  benchmax/envs/logging.py,sha256=QnXADCp0vWoV_-MK91yX5OFu6GwgIE98dvhaQTPawqQ,5053
8
- benchmax/envs/reward_helpers.py,sha256=lKbyTvJYU2JoiFItFkUPX6aWwp6JmmgXC76FBaf2rBQ,7740
8
+ benchmax/envs/reward_helpers.py,sha256=-pDqYBazvum8cc8KX7Q_Z0C-Daf3_4TVZuWt-ywhqyY,7364
9
9
  benchmax/envs/types.py,sha256=sGKKibQJZQj9RYkFpB3vaUY75tdoHet8yUmdzpZ0SVk,4389
10
10
  benchmax/envs/crm/crm_env.py,sha256=ltUtpA45YB_A_hYEpjFTp0nZKwkUvvLSLOAVkaUNz9E,4707
11
11
  benchmax/envs/crm/workdir/reward_fn.py,sha256=RY_iy347j79xX4gyCGI7WS0qPmut8Th2rqOiErVbDro,5439
@@ -32,8 +32,8 @@ benchmax/envs/mcp/provisioners/skypilot_provisioner.py,sha256=ACHnzNZE7GfL1WIWf7
32
32
  benchmax/envs/mcp/provisioners/utils.py,sha256=ORWJKtPzeS-IdD35p8aZyLMG2RxiB9BAFmU-0pVqiWw,3467
33
33
  benchmax/envs/postgres_search/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  benchmax/envs/postgres_search/linker_env.py,sha256=B3cn0TpiqgrYL5NvOQYW3Yxy5DdxPw1kmIgqDs-8Buo,8535
35
- benchmax/envs/postgres_search/search_env.py,sha256=ldfSNCQonbQZimO7rCO1Jc6im7ff2d0-TLvwryXUOXQ,20181
36
- benchmax/envs/telestich/example.py,sha256=zojsKPQEVoYx0ElKcpU4vnQuW02zfbIgsCx0rnDWVvg,28092
35
+ benchmax/envs/postgres_search/search_env.py,sha256=IWpqbFr4hjaN_DzdeRchvwvf9qVj5Ut5D-eOsGPyWKQ,19917
36
+ benchmax/envs/telestich/example.py,sha256=cqHIBjD8g7H4-nmspWSKRB2rxeKPOIwkLn136Y04KfQ,28680
37
37
  benchmax/envs/telestich/telestich_env.py,sha256=6p6GeyV-9ZIXrAX8zssMFjJgevkV5PfDLMZlslqO8js,61966
38
38
  benchmax/envs/wikipedia/utils.py,sha256=YDlxpMfwiVpfMpiZet4kWoeKqNbgTBxeWVEYg5QY3Qs,2879
39
39
  benchmax/envs/wikipedia/wiki_env.py,sha256=FigVZ0P0WVJG66CUZHOXq8tbSHWz8gNFr9cdeDePqfI,9288
@@ -45,13 +45,13 @@ benchmax/multi_model/inspector.py,sha256=j730w35YpZ4tGpzoVHza763GkUjyRxmqzRTwXqI
45
45
  benchmax/multi_model/models.py,sha256=bYLBJ0uybsB_tg2jkWHQGhyqakb21bHgstnZZCcAq58,3218
46
46
  benchmax/multi_model/pricing.py,sha256=x6Gz9dET7hBvQJb2SvQ8IVPvH-xenmoHrqp3Wpa4dI8,2122
47
47
  benchmax/platform/__init__.py,sha256=GI4U-qPyU-lPCQDxA1mw6Lnqj58gP5PptxSts_h7uPo,926
48
- benchmax/platform/client.py,sha256=6U9ojbSIHPFvn2Qg1roKMonwoQVt3WTJcLUWiY6KvDg,54687
48
+ benchmax/platform/client.py,sha256=mA0bQiIPrkimaBkW4Zv1byPJyEPnc_O7-E2dossWy4k,54919
49
49
  benchmax/platform/credentials.py,sha256=ABn44ChybWT9UQNd9_sc4yvfTdkw93521u9ZRI3H4Kk,15151
50
50
  benchmax/platform/device_auth.py,sha256=OCWCzTdQLNeB57cBFNpqDea2YV846r_ukDDIWRDDER4,2670
51
51
  benchmax/platform/exceptions.py,sha256=rkGrsSL2efqr3k15LOVv4k8mVfTXxjIDbWnKBKKz7Hk,1018
52
52
  benchmax/platform/login.py,sha256=aQJ9_QJir4pq_pWl85_LoyMKtOAs3ZVjE7TtulwEQbg,3471
53
53
  benchmax/platform/training_run.py,sha256=lzcUaigASRheASLN48BueCDu2fyESrWZdUejd0ZueIs,7000
54
- benchmax/platform/validation.py,sha256=JF2XRaNLkeVrEWvFpw8TuhG6xZJX9a0cuhtbc9E8s4g,34799
54
+ benchmax/platform/validation.py,sha256=ZmQkhEyeBuAKT_ViCycQnyfaA2fAgmzQFxjIDzVRK3U,36736
55
55
  benchmax/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  benchmax/prompts/tools.py,sha256=dhUkLfqNuFhwsZ3etNc_xiOIn_7CC8HhZatr657Rmq4,2978
57
57
  benchmax/rag/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -64,17 +64,17 @@ benchmax/rag/corpus/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKV
64
64
  benchmax/rag/corpus/search_client.py,sha256=171IqQriU6kuQqvSCDgNwOT8SR5pxUPMfCifarrgrFg,1859
65
65
  benchmax/rag/corpus/source.py,sha256=dnmReLC8mccHDkg8ZytfXa4AFXrRMCg9v8E2UuVxt8E,4183
66
66
  benchmax/rag/corpus/chroma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- benchmax/rag/corpus/chroma/client.py,sha256=TGf_YEgVBH8p-PLF7QcuSIHkkifrChXPKJB2ENR9OiM,19361
67
+ benchmax/rag/corpus/chroma/client.py,sha256=cYZZKQG09u_VfyjsP6UdCBh-RRNGKa9XisBN4OEejQk,20839
68
68
  benchmax/rag/corpus/chroma/files.py,sha256=hSP-J2osPNBAvMZHOWipMVXaWN4tila_tsQaTEPNzgc,5567
69
69
  benchmax/rag/corpus/chroma/filter_mapper.py,sha256=Y1FzDwDDg15LZ0-Uh1jzOVcSORiVUy5f1qiaVky3pJI,5074
70
- benchmax/rag/corpus/chroma/search.py,sha256=4kU1WMwsWQrN03ctVIPdXZoHyZa3jso2fKzcsc5uYr0,6824
71
- benchmax/rag/corpus/chroma/source.py,sha256=ZOLj_VfixBcB_VE8YLF6X3sKY6XHYupprHNHVxy1hH8,30295
70
+ benchmax/rag/corpus/chroma/search.py,sha256=iO8fBPk50vG3NmkCmAJ2tKnjP_wKnymV3fbfLjkIAJ8,7688
71
+ benchmax/rag/corpus/chroma/source.py,sha256=0azMLUvZS9g4jvxv_KxsPa3-ArQW5WHCq77CQh-qmqY,30440
72
72
  benchmax/rag/corpus/pinecone/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  benchmax/rag/corpus/pinecone/files.py,sha256=lhas7-mQ622Ku36QvOavXguBweJyYl78wXIeb_LNqig,5728
74
74
  benchmax/rag/corpus/pinecone/filter_mapper.py,sha256=exJ3G34QKeQo1rQ8Pu-iGL0XDXVxCW5dc3q0QoYfCo0,6454
75
- benchmax/rag/corpus/pinecone/index_client.py,sha256=3mI_gUu3s_h8PGIf08Ln6DnJgbq45v8hOKot4HAcJqE,14454
76
- benchmax/rag/corpus/pinecone/search.py,sha256=AySKV_2LQxMOe0_qvgeDjLnkEC5GU0aEz68VlK80EdA,4634
77
- benchmax/rag/corpus/pinecone/source.py,sha256=s8qyVkMP9j23qNK2YIOWsn_-PTT6sst6Fern4WzjDdg,19838
75
+ benchmax/rag/corpus/pinecone/index_client.py,sha256=eZ6LzBg82X9HCvs9HUrgLRuuHPVtM_hoICYYvznC2dg,18045
76
+ benchmax/rag/corpus/pinecone/search.py,sha256=XoDKouj5Y-THW0cG00zUIgcpbJxRKv1y5mM5z2a-ZdA,4928
77
+ benchmax/rag/corpus/pinecone/source.py,sha256=aJey4d5Pz_FB-G9MXusODAnszun57HAztFVzu8RWC_8,21305
78
78
  benchmax/rag/corpus/postgres/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
79
79
  benchmax/rag/corpus/postgres/client.py,sha256=JbRUelHpXlZhDlXdEUWooF7UpqrHmf0uAjWoweJ-Dio,19821
80
80
  benchmax/rag/corpus/postgres/exceptions.py,sha256=tykCt_4H9ewe5Qh_qzIg_PoSmuJpY-aox1QCku9PVmI,1572
@@ -85,14 +85,14 @@ benchmax/rag/corpus/postgres/source.py,sha256=6ptGHatOscYih42MZ9Wt8MQOrcIEQiJ1X5
85
85
  benchmax/rag/corpus/search_schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  benchmax/rag/corpus/search_schema/builders.py,sha256=qAMiEOGOLR7xSXWFf12KqzYlrwBZchU_78vkRcOKa8k,1764
87
87
  benchmax/rag/corpus/search_schema/dsl_parser.py,sha256=vMijm_nRKztIrsVQP-0OySuCKnrBsbUzet_pwwlU1T8,1586
88
- benchmax/rag/corpus/search_schema/search_exceptions.py,sha256=vxhJQa7UFHduXDt225onA_R_UWcvGlj3NiS5TFR3M7w,1578
88
+ benchmax/rag/corpus/search_schema/search_exceptions.py,sha256=1ccbLnDAuSMxUnjtyBt-5iXwoKjI3xaZvk9xplCyNFw,2413
89
89
  benchmax/rag/corpus/search_schema/search_types.py,sha256=UTkteugSx5OigDRZ8Xqe6itxLUXj2sVeIVxtYbnXGSg,5831
90
90
  benchmax/rag/corpus/turbopuffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  benchmax/rag/corpus/turbopuffer/files.py,sha256=DP80-3NmdyOD34fyQxlzovpLRs_UU1ezQ7PItpY2Nlk,5807
92
92
  benchmax/rag/corpus/turbopuffer/filter_mapper.py,sha256=r9YRn1A3XfoFUaD6KDRtUr-ufvMhGBA6VoR-YTuCvcY,4676
93
- benchmax/rag/corpus/turbopuffer/namespace.py,sha256=H60aaVd9sCToC1puXq0K_3Q0Fl2l5JqKtDdfKPke3mo,11507
94
- benchmax/rag/corpus/turbopuffer/search.py,sha256=WpoWtNNA8Y4rTc5n4yqAjeBYU0d3SpbxKHh1Awau03Y,7637
95
- benchmax/rag/corpus/turbopuffer/source.py,sha256=KN0T4-IJX5ES3GyvMCea2Mie-qy_5-qk3Fcz06GSOgU,28113
93
+ benchmax/rag/corpus/turbopuffer/namespace.py,sha256=LP0Gpwv91ZzgRhHaUBI0ITvutmS-er0W4o07QNyqSU8,12303
94
+ benchmax/rag/corpus/turbopuffer/search.py,sha256=MF0E3kWzAQf5C2fjWV3TyyFWpy_-3DwOBM0XBIkgZqM,8293
95
+ benchmax/rag/corpus/turbopuffer/source.py,sha256=f0G3RzJkdCxM0TSoyI_eB_JH-wFhHiPgl8SmINj3XbQ,28577
96
96
  benchmax/rag/preprocess/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
97
  benchmax/rag/preprocess/email/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
98
98
  benchmax/rag/preprocess/email/clean_bodies.py,sha256=OQ1fwsB3Dfy9iyzjX5ZpaHweB33Cs6hfwsULm8xeBZk,18097
@@ -160,9 +160,9 @@ benchmax/traces/braintrust/adapter.py,sha256=KTeN9qKLwZJJ8TY-KtSudd4J3_nySz1bRts
160
160
  benchmax/traces/braintrust/message_extraction.py,sha256=seh3eM_qd9FUPmGOEMChUq_UAMtaIQHYSYDttMgY1go,8409
161
161
  benchmax/utils/__init__.py,sha256=FWJVm6jt0m57HS-84bgrb2M-c_EFhf60rWayioUGges,402
162
162
  benchmax/utils/checkpoint.py,sha256=htIw9iYjUUHpJqLLZ0y6K4_UYYAkZIx3vdQVY7juKDw,3148
163
- benchmax-0.1.2.dev30.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
164
- benchmax-0.1.2.dev30.dist-info/METADATA,sha256=jGUiou-RTND1ZTmey7cVJT9uRQR_TCTjHaGAVXYjNHc,2775
165
- benchmax-0.1.2.dev30.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
166
- benchmax-0.1.2.dev30.dist-info/entry_points.txt,sha256=qtjqAQsHIwRIaLzwAhGTiRvI91CynwcUO5G95uQuDR4,47
167
- benchmax-0.1.2.dev30.dist-info/top_level.txt,sha256=ryj4zoahvAKL3BnxOpfJNfyIzhvlED9KJ3Q3k4bb9jc,9
168
- benchmax-0.1.2.dev30.dist-info/RECORD,,
163
+ benchmax-0.1.2.dev33.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
164
+ benchmax-0.1.2.dev33.dist-info/METADATA,sha256=X5P1IBK9INVKaO8xzBqoW8CQYQ2VIVD9IkaQV4tVjFQ,2775
165
+ benchmax-0.1.2.dev33.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
166
+ benchmax-0.1.2.dev33.dist-info/entry_points.txt,sha256=qtjqAQsHIwRIaLzwAhGTiRvI91CynwcUO5G95uQuDR4,47
167
+ benchmax-0.1.2.dev33.dist-info/top_level.txt,sha256=ryj4zoahvAKL3BnxOpfJNfyIzhvlED9KJ3Q3k4bb9jc,9
168
+ benchmax-0.1.2.dev33.dist-info/RECORD,,