benchmax 0.1.2.dev30__tar.gz → 0.1.2.dev31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. {benchmax-0.1.2.dev30/src/benchmax.egg-info → benchmax-0.1.2.dev31}/PKG-INFO +1 -1
  2. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/pyproject.toml +1 -1
  3. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/bundle.py +74 -0
  4. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/telestich/example.py +18 -11
  5. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/client.py +6 -2
  6. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/validation.py +43 -1
  7. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/pinecone/index_client.py +78 -5
  8. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/pinecone/search.py +5 -0
  9. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/pinecone/source.py +52 -26
  10. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/turbopuffer/namespace.py +21 -0
  11. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/turbopuffer/search.py +15 -3
  12. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/turbopuffer/source.py +14 -8
  13. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31/src/benchmax.egg-info}/PKG-INFO +1 -1
  14. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/LICENSE +0 -0
  15. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/README.md +0 -0
  16. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/setup.cfg +0 -0
  17. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/cli.py +0 -0
  18. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/config.py +0 -0
  19. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/__init__.py +0 -0
  20. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/base_env.py +0 -0
  21. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/crm/crm_env.py +0 -0
  22. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/crm/workdir/reward_fn.py +0 -0
  23. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/example_id.py +0 -0
  24. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/excel/data_utils.py +0 -0
  25. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/excel/excel_env.py +0 -0
  26. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/excel/workdir/__init__.py +0 -0
  27. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py +0 -0
  28. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/excel/workdir/excel_utils.py +0 -0
  29. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/excel/workdir/reward_fn.py +0 -0
  30. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/logging.py +0 -0
  31. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/math/math_env.py +0 -0
  32. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/math/workdir/reward_fn.py +0 -0
  33. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/__init__.py +0 -0
  34. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py +0 -0
  35. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/example_workdir/reward_fn.py +0 -0
  36. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/parallel_mcp_env.py +0 -0
  37. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/provisioners/__init__.py +0 -0
  38. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/provisioners/base_provisioner.py +0 -0
  39. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/provisioners/local_provisioner.py +0 -0
  40. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py +0 -0
  41. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py +0 -0
  42. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/provisioners/utils.py +0 -0
  43. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/proxy_server.py +0 -0
  44. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/server_pool.py +0 -0
  45. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/mcp/utils.py +0 -0
  46. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/postgres_search/__init__.py +0 -0
  47. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/postgres_search/linker_env.py +0 -0
  48. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/postgres_search/search_env.py +0 -0
  49. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/reward_helpers.py +0 -0
  50. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/telestich/telestich_env.py +0 -0
  51. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/types.py +0 -0
  52. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/wikipedia/utils.py +0 -0
  53. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/envs/wikipedia/wiki_env.py +0 -0
  54. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/__init__.py +0 -0
  55. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/caller.py +0 -0
  56. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/clients.py +0 -0
  57. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/example_usage.py +0 -0
  58. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/inspector.py +0 -0
  59. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/models.py +0 -0
  60. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/multi_model/pricing.py +0 -0
  61. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/__init__.py +0 -0
  62. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/credentials.py +0 -0
  63. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/device_auth.py +0 -0
  64. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/exceptions.py +0 -0
  65. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/login.py +0 -0
  66. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/platform/training_run.py +0 -0
  67. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/prompts/__init__.py +0 -0
  68. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/prompts/tools.py +0 -0
  69. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/chunkers/__init__.py +0 -0
  70. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/chunkers/email.py +0 -0
  71. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/chunkers/inspector.py +0 -0
  72. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/chunkers/markdown.py +0 -0
  73. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/chunkers/models.py +0 -0
  74. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/chunkers/storage.py +0 -0
  75. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/__init__.py +0 -0
  76. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/chroma/__init__.py +0 -0
  77. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/chroma/client.py +0 -0
  78. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/chroma/files.py +0 -0
  79. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/chroma/filter_mapper.py +0 -0
  80. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/chroma/search.py +0 -0
  81. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/chroma/source.py +0 -0
  82. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/pinecone/__init__.py +0 -0
  83. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/pinecone/files.py +0 -0
  84. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/pinecone/filter_mapper.py +0 -0
  85. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/__init__.py +0 -0
  86. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/client.py +0 -0
  87. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/exceptions.py +0 -0
  88. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/filter_mapper.py +0 -0
  89. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/models.py +0 -0
  90. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/search.py +0 -0
  91. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/postgres/source.py +0 -0
  92. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/search_client.py +0 -0
  93. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/search_schema/__init__.py +0 -0
  94. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/search_schema/builders.py +0 -0
  95. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/search_schema/dsl_parser.py +0 -0
  96. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/search_schema/search_exceptions.py +0 -0
  97. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/search_schema/search_types.py +0 -0
  98. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/source.py +0 -0
  99. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/turbopuffer/__init__.py +0 -0
  100. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/turbopuffer/files.py +0 -0
  101. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/corpus/turbopuffer/filter_mapper.py +0 -0
  102. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/__init__.py +0 -0
  103. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/__init__.py +0 -0
  104. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/clean_bodies.py +0 -0
  105. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/dedupe.py +0 -0
  106. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/filter_automated_email_qas.py +0 -0
  107. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/filter_automated_emails.py +0 -0
  108. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/mbox.py +0 -0
  109. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/preprocess/email/schema.py +0 -0
  110. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/__init__.py +0 -0
  111. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/anchor_selector.py +0 -0
  112. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/auto_tune.py +0 -0
  113. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/batch_processor.py +0 -0
  114. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/checkpoint.py +0 -0
  115. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/corpus_capabilities.py +0 -0
  116. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/corpus_profile.py +0 -0
  117. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/__init__.py +0 -0
  118. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/deterministic_guards.py +0 -0
  119. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/env_rollout.py +0 -0
  120. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/grounding_llm.py +0 -0
  121. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/hop_count_validity.py +0 -0
  122. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/quality_gate.py +0 -0
  123. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/filters/retrieval_llm.py +0 -0
  124. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/formatters/__init__.py +0 -0
  125. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/formatters/train_eval.py +0 -0
  126. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/generated_qa.py +0 -0
  127. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/generators/__init__.py +0 -0
  128. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/generators/direct_llm.py +0 -0
  129. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/helpers.py +0 -0
  130. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/metadata_linker.py +0 -0
  131. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/metrics.py +0 -0
  132. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/models.py +0 -0
  133. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/pipeline.py +0 -0
  134. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/pipeline_config.py +0 -0
  135. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/protocols.py +0 -0
  136. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/query_rewriter.py +0 -0
  137. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/response_parsers.py +0 -0
  138. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/retrieval_query.py +0 -0
  139. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/scoring.py +0 -0
  140. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/search_agent_linker.py +0 -0
  141. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/storage.py +0 -0
  142. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/style_controls.py +0 -0
  143. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/transformers/__init__.py +0 -0
  144. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/transformers/base.py +0 -0
  145. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/transformers/dedup.py +0 -0
  146. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/wiki_builder.py +0 -0
  147. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rag/qa_generation/wiki_chunk_linker.py +0 -0
  148. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rewards/__init__.py +0 -0
  149. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rewards/diversity.py +0 -0
  150. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/__init__.py +0 -0
  151. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/_utils.py +0 -0
  152. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/adaptive.py +0 -0
  153. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/cache.py +0 -0
  154. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/prompts.py +0 -0
  155. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/reward_fns.py +0 -0
  156. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/rubrics/rubric.py +0 -0
  157. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/__init__.py +0 -0
  158. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/adapter.py +0 -0
  159. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/braintrust/__init__.py +0 -0
  160. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/braintrust/adapter.py +0 -0
  161. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/braintrust/message_extraction.py +0 -0
  162. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/http.py +0 -0
  163. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/pipeline.py +0 -0
  164. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/pivot.py +0 -0
  165. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/processing.py +0 -0
  166. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/traces/registry.py +0 -0
  167. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/utils/__init__.py +0 -0
  168. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax/utils/checkpoint.py +0 -0
  169. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax.egg-info/SOURCES.txt +0 -0
  170. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax.egg-info/dependency_links.txt +0 -0
  171. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax.egg-info/entry_points.txt +0 -0
  172. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax.egg-info/requires.txt +0 -0
  173. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev31}/src/benchmax.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: benchmax
3
- Version: 0.1.2.dev30
3
+ Version: 0.1.2.dev31
4
4
  Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
5
5
  Author: castie@castform.com
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "benchmax"
3
- version = "0.1.2.dev30"
3
+ version = "0.1.2.dev31"
4
4
  description = "Framework-Agnostic RL Environments for LLM Fine-Tuning"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "castie@castform.com" }]
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
3
4
  import inspect
4
5
  import io
5
6
  import json
@@ -76,6 +77,7 @@ def dump_bundle(
76
77
  pip_dependencies: list[str] | None = None,
77
78
  local_modules: list[ModuleType] | None = None,
78
79
  env_class_source: str | None = None,
80
+ auto_local_modules: bool = True,
79
81
  ) -> Bundle:
80
82
  """Pickle ``(env_class, constructor_args)`` and stamp metadata.
81
83
 
@@ -90,6 +92,10 @@ def dump_bundle(
90
92
  recover it — e.g. a class produced by ``exec()`` into an in-memory
91
93
  namespace, which has no source file on disk. When ``None``
92
94
  (default), source is introspected from ``env_class``.
95
+ auto_local_modules: When True (default), any local module the pickle
96
+ references but that wasn't passed in ``local_modules`` is imported
97
+ and pickled by value automatically (a warning names them). When
98
+ False, such a reference raises ``BundlingError`` instead.
93
99
 
94
100
  Raises:
95
101
  BundlingError: bad env_class, cloudpickle failure, or pickle references
@@ -124,6 +130,46 @@ def dump_bundle(
124
130
  except Exception:
125
131
  pass
126
132
 
133
+ if auto_local_modules and _unregistered_local_refs(pickled):
134
+ # Import each referenced local module and re-dump with it pickled by
135
+ # value. Loop because a by-value module can surface further local refs;
136
+ # registrations accumulate (and are torn down once at the end) so an
137
+ # earlier module stays by-value while we resolve the ones it pulled in.
138
+ seen: set[str] = {m.__name__ for m in local_modules}
139
+ registered: list[ModuleType] = []
140
+ with _BUNDLE_LOCK:
141
+ try:
142
+ for _ in range(10):
143
+ pending = [
144
+ m for m in _unregistered_local_refs(pickled) if m not in seen
145
+ ]
146
+ if not pending:
147
+ break
148
+ new_mods: list[ModuleType] = []
149
+ for name in pending:
150
+ seen.add(name) # unimportable names fall through to the guard
151
+ try:
152
+ new_mods.append(importlib.import_module(name))
153
+ except Exception:
154
+ pass
155
+ if not new_mods:
156
+ break
157
+ logger.warning(
158
+ "[bundle] %s: auto-bundling local module(s): %s ",
159
+ env_class.__name__,
160
+ ", ".join(sorted(m.__name__ for m in new_mods)),
161
+ )
162
+ for mod in new_mods:
163
+ cloudpickle.register_pickle_by_value(mod)
164
+ registered.append(mod)
165
+ pickled = cloudpickle.dumps((env_class, constructor_args))
166
+ finally:
167
+ for mod in registered:
168
+ try:
169
+ cloudpickle.unregister_pickle_by_value(mod)
170
+ except Exception:
171
+ pass
172
+
127
173
  risky = _unregistered_local_refs(pickled)
128
174
  if risky:
129
175
  msg = (
@@ -259,6 +305,15 @@ def _referenced_modules(pickled: bytes) -> set[str]:
259
305
  # Hooks find_class so we see every (module, name) the unpickler would import —
260
306
  # i.e. exactly what'd raise ModuleNotFoundError on a fresh interpreter. The stub
261
307
  # lets unpickling proceed past missing classes so we collect every ref.
308
+ #
309
+ # find_class alone has a blind spot: a bare ``import foo`` that leaves a
310
+ # module *object* in the env's globals is pickled as
311
+ # ``cloudpickle.subimport("foo")`` — the module name is a REDUCE argument,
312
+ # not a find_class path, so we'd only see ``cloudpickle.cloudpickle`` (which
313
+ # looks installed) and miss ``foo``. We shim subimport to record its arg and
314
+ # return a stub instead of importing, so a missing module is captured rather
315
+ # than aborting the whole load early. (``dynamic_subimport`` is by-value /
316
+ # self-contained — leave it to the real find_class so we don't flag it.)
262
317
  refs: set[str] = set()
263
318
 
264
319
  class _Stub:
@@ -271,9 +326,28 @@ def _referenced_modules(pickled: bytes) -> set[str]:
271
326
  def __reduce__(self) -> tuple:
272
327
  return (type(self), ())
273
328
 
329
+ def _recording_subimport(name: str, *a: Any, **kw: Any) -> ModuleType:
330
+ refs.add(name)
331
+ return ModuleType(str(name))
332
+
333
+ def _noop_setstate(obj: Any, *a: Any, **kw: Any) -> Any:
334
+ # cloudpickle's _make_skeleton_class resolves the class_tracker_id back
335
+ # to the *live* class (it was tracked when env_class was dumped), so the
336
+ # real ``_class_setstate``/``_function_setstate`` would setattr the
337
+ # reconstructed (stub-globals) members onto the live class/function —
338
+ # mutating the caller's class mid-bundle and poisoning any later dump.
339
+ # We only need the refs from ``state``, which are already recorded while
340
+ # it's unpickled; the setter itself is a no-op here.
341
+ return obj
342
+
274
343
  class _Recorder(pickle.Unpickler):
275
344
  def find_class(self, module: str, name: str) -> Any:
276
345
  refs.add(module)
346
+ if module.startswith("cloudpickle"):
347
+ if name == "subimport":
348
+ return _recording_subimport
349
+ if name in ("_class_setstate", "_function_setstate"):
350
+ return _noop_setstate
277
351
  try:
278
352
  return super().find_class(module, name)
279
353
  except Exception:
@@ -12,10 +12,12 @@ Run it from the benchmax project root (the ``telestich`` extra pulls in the
12
12
  env's word-list / rhyme dependencies):
13
13
 
14
14
  cd core/benchmax
15
- CASTFORM_API_KEY=sk_... \
16
- uv run --extra telestich python -m benchmax.envs.telestich.example
15
+ uv run --extra telestich python -m benchmax.envs.telestich.example
17
16
 
18
- (``CASTFORM_LLM_API_KEY`` is optional it defaults to ``CASTFORM_API_KEY``.)
17
+ Auth is the device-auth session (``ensure_session()`` opens a browser login if
18
+ ``~/.castform`` has no valid session) — no API key needed. ``CASTFORM_API_KEY``
19
+ / ``CASTFORM_LLM_API_KEY`` are only consulted by the offline dataset-generation
20
+ helpers, not the launch path.
19
21
 
20
22
  This launches a real training run on the full committed seed dataset
21
23
  (~90/10 train/eval split).
@@ -63,6 +65,8 @@ CONCURRENCY = 15
63
65
  # pool) server-side. Supported: "Qwen/Qwen3.5-4B" (gpu4) or "Qwen/Qwen3.5-35B-A3B"
64
66
  # (gpu8). Override via TELESTICH_MODEL.
65
67
  MODEL = os.environ.get("TELESTICH_MODEL", "Qwen/Qwen3.5-4B")
68
+ # Run name — defaults to a unique telestich-full-<uuid>. Override via TELESTICH_RUN_NAME.
69
+ RUN_NAME = os.environ.get("TELESTICH_RUN_NAME", "")
66
70
 
67
71
  # (model, weight). Weights reflect observed reliability on our checks:
68
72
  # - Both grok models leak banned example words and rubber-stamp the CoT self-check.
@@ -558,12 +562,15 @@ def get_dataset():
558
562
  if __name__ == "__main__":
559
563
  import uuid
560
564
 
565
+ from benchmax.platform import ensure_session
561
566
  from benchmax.platform.client import TrainerClient
562
567
  from benchmax.platform.training_run import upload_training_run
563
568
  from benchmax.platform.validation import validate_env
564
569
 
565
- if not API_KEY:
566
- raise SystemExit("Set CASTFORM_API_KEY before running this example.")
570
+ # Device-auth session bootstrap: browser login if no credential resolves.
571
+ # After this the platform bearer comes from ~/.castform — no API key needed,
572
+ # so we pass api_key="" to the platform calls below (resolves via the seam).
573
+ ensure_session()
567
574
 
568
575
  print(f"Platform URL: {BASE_URL}")
569
576
  print(f"LLM URL: {LLM_BASE_URL}\n")
@@ -603,7 +610,7 @@ if __name__ == "__main__":
603
610
  eval_dataset=eval_data[:2],
604
611
  local_modules=local_modules,
605
612
  pip_dependencies=pip_dependencies,
606
- api_key=API_KEY,
613
+ api_key="", # session bearer via ensure_session()
607
614
  base_url=BASE_URL,
608
615
  llm_base_url=LLM_BASE_URL,
609
616
  llm_api_key="",
@@ -614,14 +621,14 @@ if __name__ == "__main__":
614
621
  )
615
622
 
616
623
  # 3. Bundle the env class and upload everything to platform storage.
617
- run_name = f"telestich-full-{uuid.uuid4().hex[:8]}"
624
+ run_name = RUN_NAME or f"telestich-full-{uuid.uuid4().hex[:8]}"
618
625
  print(f"\nUploading bundle + datasets as {run_name!r} ...")
619
626
  uploaded = upload_training_run(
620
627
  env_class=TelestichEnv,
621
628
  train_dataset=train_data,
622
629
  eval_dataset=eval_data,
623
630
  run_name=run_name,
624
- api_key=API_KEY,
631
+ api_key="", # session bearer via ensure_session()
625
632
  base_url=BASE_URL,
626
633
  local_modules=local_modules,
627
634
  constructor_args=constructor_args,
@@ -638,7 +645,7 @@ if __name__ == "__main__":
638
645
  # 4. Launch the training run. training_run_type="simple" + the `model` arg select
639
646
  # the trainer YAML/pool server-side (Qwen3.5-4B→gpu4, Qwen3.5-35B-A3B→gpu8).
640
647
  print(f"\nLaunching training run (model={MODEL}) ...")
641
- with TrainerClient(api_key=API_KEY, base_url=BASE_URL) as trainer:
648
+ with TrainerClient(api_key="", base_url=BASE_URL) as trainer:
642
649
  run_id = trainer.launch_training_run(
643
650
  training_run_type="simple",
644
651
  env_cls_path=uploaded.env_cls_path,
@@ -647,10 +654,10 @@ if __name__ == "__main__":
647
654
  eval_dataset_path=uploaded.eval_dataset_path,
648
655
  name=run_name,
649
656
  # num_epochs: passes over the train set (platform default is 5).
650
- # max_response_len 3000: a brief reason + 1-2 tool rounds + poem fits well
657
+ # max_rollout_len 3000: a brief reason + 1-2 tool rounds + poem fits well
651
658
  # under this; lowered from 4000 to cut off in-head enumeration rambles
652
659
  # sooner (they truncate to a 0-reward anyway).
653
- launcher_args={"model": MODEL, "max_response_len": 3000, "num_epochs": 10},
660
+ launcher_args={"model": MODEL, "max_rollout_len": 3000, "num_epochs": 10},
654
661
  )
655
662
 
656
663
  print(f"\n✓ Launched run_id={run_id}")
@@ -7,6 +7,7 @@ import hashlib
7
7
  import json
8
8
  import logging
9
9
  import textwrap
10
+ import warnings
10
11
  from collections.abc import Iterator
11
12
  from dataclasses import dataclass, field
12
13
  from pathlib import Path
@@ -404,7 +405,7 @@ class TrainerClient:
404
405
  eval_dataset_path: Path to the evaluation dataset
405
406
  name: Optional name for the training run
406
407
  launcher_args: Extra launcher args forwarded to the server
407
- (e.g. {"max_response_len": 4000}). The 4 required paths
408
+ (e.g. {"max_rollout_len": 4000}). The 4 required paths
408
409
  above always take precedence.
409
410
 
410
411
  Returns:
@@ -431,8 +432,11 @@ class TrainerClient:
431
432
  )
432
433
  self._handle_response_errors(response)
433
434
  body = response.json()
435
+ # Surface soft-cap / OOM-risk warnings via the warnings module (shown by
436
+ # default in notebooks/REPL) — a bare logger.warning is swallowed unless
437
+ # the caller configured logging.
434
438
  for warning in body.get("warnings", []) or []:
435
- logger.warning("launch warning: %s", warning)
439
+ warnings.warn(f"launch warning: {warning}", stacklevel=2)
436
440
  return body["runId"]
437
441
 
438
442
  def list_launch_args(self) -> list[LaunchArgSpec]:
@@ -7,6 +7,7 @@ the env class contract matches what the trainer expects.
7
7
  from __future__ import annotations
8
8
 
9
9
  import asyncio
10
+ import importlib
10
11
  import json
11
12
  import math
12
13
  import tempfile
@@ -578,6 +579,41 @@ def _run_local_checks(
578
579
  from benchmax.bundle import unregistered_local_refs
579
580
 
580
581
  risky = unregistered_local_refs(cloudpickle.dumps(env_class))
582
+ # Mirror dump_bundle's auto_local_modules: import + pickle-by-value
583
+ # any local refs the user didn't list, so validation reflects what
584
+ # the bundle will actually contain. Only genuinely unimportable refs
585
+ # (which the trainer also couldn't load) remain to be flagged.
586
+ auto: list[ModuleType] = []
587
+ if risky:
588
+ seen: set[str] = set()
589
+ try:
590
+ for _ in range(10):
591
+ pending = [
592
+ m
593
+ for m in unregistered_local_refs(cloudpickle.dumps(env_class))
594
+ if m not in seen
595
+ ]
596
+ if not pending:
597
+ break
598
+ new_mods: list[ModuleType] = []
599
+ for name in pending:
600
+ seen.add(name)
601
+ try:
602
+ new_mods.append(importlib.import_module(name))
603
+ except Exception:
604
+ pass
605
+ if not new_mods:
606
+ break
607
+ for mod in new_mods:
608
+ cloudpickle.register_pickle_by_value(mod)
609
+ auto.append(mod)
610
+ risky = unregistered_local_refs(cloudpickle.dumps(env_class))
611
+ finally:
612
+ for mod in auto:
613
+ try:
614
+ cloudpickle.unregister_pickle_by_value(mod)
615
+ except Exception:
616
+ pass
581
617
  if risky:
582
618
  print(
583
619
  f" \u2717 {env_class.__name__}: missing "
@@ -589,7 +625,13 @@ def _run_local_checks(
589
625
  )
590
626
  failed += 1
591
627
  else:
592
- print(" \u2713 no unregistered local-module references")
628
+ if auto:
629
+ names = ", ".join(sorted(m.__name__ for m in auto))
630
+ print(
631
+ f" \u2713 auto-bundled local module(s): {names} "
632
+ )
633
+ else:
634
+ print(" \u2713 no unregistered local-module references")
593
635
  passed += 1
594
636
  except Exception as exc:
595
637
  print(f" \u2717 local-modules check failed: {type(exc).__name__}: {exc}")
@@ -60,9 +60,17 @@ class PineconeIndexClient:
60
60
  embed_model: Pinecone hosted embedding model name. Ignored when
61
61
  ``embed_fn`` is provided. Defaults to
62
62
  ``"multilingual-e5-large"``.
63
- field_mapping: Maps *Pinecone metadata field names* → *internal
64
- field names*. Useful for "bring your own index" scenarios where
65
- the user's metadata schema differs from the default.
63
+ field_mapping: Low-level escape hatch maps *Pinecone metadata
64
+ field names* *internal field names* for schemas that also
65
+ relocate structural fields (``file_path``, ``chunk_index``,
66
+ headers). For the common "my text is under a different key"
67
+ case, prefer ``content_field``.
68
+ content_field: Pinecone metadata key holding the chunk text, for
69
+ "bring your own index" schemas that don't use ``content`` (e.g.
70
+ ``"summary"`` or ``"passage"``). The canonical way to point at
71
+ your text column. Empty / None means the default ``content``
72
+ key. Raises if ``field_mapping`` already maps a *different*
73
+ key to ``content``.
66
74
  """
67
75
 
68
76
  def __init__(
@@ -75,15 +83,35 @@ class PineconeIndexClient:
75
83
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
76
84
  embed_model: str = "multilingual-e5-large",
77
85
  field_mapping: dict[str, str] | None = None,
86
+ content_field: str | None = None,
78
87
  ) -> None:
79
88
  # Store config for lazy init / pickle safety.
80
89
  self._api_key = api_key
81
90
  self._index_name = index_name
82
91
  self._index_host = index_host
83
- self._namespace = namespace
92
+ # Platform codegen may pass None for an unset namespace; Pinecone's
93
+ # default namespace is "".
94
+ self._namespace = namespace or ""
84
95
  self._embed_model = embed_model
85
96
  self.embed_fn = embed_fn or self._build_pinecone_embed_fn()
86
- self._field_mapping = field_mapping or dict(DEFAULT_FIELD_MAPPING)
97
+ mapping = dict(field_mapping) if field_mapping else dict(DEFAULT_FIELD_MAPPING)
98
+ if content_field and content_field != "content":
99
+ conflicting = [
100
+ k
101
+ for k, v in mapping.items()
102
+ if v == "content" and k not in ("content", content_field)
103
+ ]
104
+ if field_mapping and conflicting:
105
+ raise ValueError(
106
+ f"content_field={content_field!r} conflicts with field_mapping "
107
+ f"entries {conflicting} that already map to 'content'. "
108
+ "Specify the text column one way or the other."
109
+ )
110
+ # Drop the default content→content entry so the reverse mapping
111
+ # resolves "content" to the custom key unambiguously.
112
+ mapping.pop("content", None)
113
+ mapping[content_field] = "content"
114
+ self._field_mapping = mapping
87
115
  # Reverse mapping: internal name → pinecone metadata key
88
116
  self._reverse_mapping = {v: k for k, v in self._field_mapping.items()}
89
117
  self._index: Any | None = None
@@ -91,6 +119,8 @@ class PineconeIndexClient:
91
119
  self._known_ids: list[str] | None = None
92
120
  # Cached vector dimension (detected on first embed or describe_index).
93
121
  self._vector_dim: int | None = None
122
+ # Cached index vector type ("dense" | "sparse"), probed lazily.
123
+ self._vector_type: str | None = None
94
124
 
95
125
  def _build_pinecone_embed_fn(self) -> Callable[[list[str]], list[list[float]]]:
96
126
  """Build an embed_fn using Pinecone's hosted Inference API.
@@ -157,6 +187,35 @@ class PineconeIndexClient:
157
187
  self._index = pc.Index(self._index_name)
158
188
  return self._index
159
189
 
190
+ def vector_type(self) -> str:
191
+ """Return the index vector type, ``"dense"`` or ``"sparse"``.
192
+
193
+ Probes the index via ``describe_index_stats`` on first call and
194
+ caches the result.
195
+ """
196
+ if self._vector_type is None:
197
+ index = self._get_index()
198
+ stats = index.describe_index_stats()
199
+ self._vector_type = getattr(stats, "vector_type", None) or "dense"
200
+ return self._vector_type
201
+
202
+ def namespace_vector_count(self) -> int:
203
+ """Return the vector count for this client's namespace.
204
+
205
+ Scoped to the namespace, NOT the index-wide total — an index-wide
206
+ count would disagree with what list/fetch/query in this namespace
207
+ can actually see. The SDK keys the default namespace as
208
+ ``"__default__"`` (the REST API uses ``""``).
209
+ """
210
+ stats = self._get_index().describe_index_stats()
211
+ namespaces = getattr(stats, "namespaces", None) or {}
212
+ ns_stats = namespaces.get(self._namespace or "__default__")
213
+ if ns_stats is None and not self._namespace:
214
+ ns_stats = namespaces.get("")
215
+ if ns_stats is None:
216
+ return 0
217
+ return int(getattr(ns_stats, "vector_count", 0) or 0)
218
+
160
219
  def zero_vector(self) -> list[float]:
161
220
  """Return a zero-vector with the correct dimension for this index.
162
221
 
@@ -168,6 +227,12 @@ class PineconeIndexClient:
168
227
  index = self._get_index()
169
228
  stats = index.describe_index_stats()
170
229
  self._vector_dim = stats.dimension
230
+ if self._vector_dim is None:
231
+ # Sparse indexes have no fixed dimension.
232
+ raise ValueError(
233
+ f"Pinecone index '{self._index_name}' has no dimension — it is "
234
+ "a sparse index, which has no dense zero-vector."
235
+ )
171
236
  return [0.0] * self._vector_dim
172
237
 
173
238
  # ------------------------------------------------------------------
@@ -305,6 +370,14 @@ class PineconeIndexClient:
305
370
  include_metadata: bool = True,
306
371
  ) -> Any:
307
372
  """Run a vector query against the index."""
373
+ if self.vector_type() == "sparse":
374
+ # A dense query vector against a sparse index is rejected by
375
+ # Pinecone with an opaque error; fail with an actionable one.
376
+ raise ValueError(
377
+ f"Pinecone index '{self._index_name}' is a sparse index — "
378
+ "search against sparse indexes is not supported yet. "
379
+ "Use a dense index."
380
+ )
308
381
  index = self._get_index()
309
382
  kwargs: dict[str, Any] = {
310
383
  "vector": vector,
@@ -36,6 +36,8 @@ class PineconeSearch:
36
36
  embed_model: Pinecone hosted embedding model name. Ignored
37
37
  when ``embed_fn`` is provided.
38
38
  field_mapping: Maps Pinecone metadata keys to internal names.
39
+ content_field: Pinecone metadata key holding the chunk text — sugar
40
+ over ``field_mapping`` for BYO indexes that don't use ``content``.
39
41
  token_provider: Optional override — a callable resolving the key per
40
42
  call, or a literal key (string sugar). Defaults to reading
41
43
  ``PINECONE_API_KEY``.
@@ -50,6 +52,7 @@ class PineconeSearch:
50
52
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
51
53
  embed_model: str = "multilingual-e5-large",
52
54
  field_mapping: dict[str, str] | None = None,
55
+ content_field: str | None = None,
53
56
  token_provider: str | TokenProvider | None = None,
54
57
  ) -> None:
55
58
  self._index_name = index_name
@@ -58,6 +61,7 @@ class PineconeSearch:
58
61
  self._embed_fn = embed_fn
59
62
  self._embed_model = embed_model
60
63
  self._field_mapping = field_mapping
64
+ self._content_field = content_field
61
65
  self._token_provider = as_token_provider(
62
66
  token_provider, env_token("PINECONE_API_KEY")
63
67
  )
@@ -75,6 +79,7 @@ class PineconeSearch:
75
79
  embed_fn=self._embed_fn,
76
80
  embed_model=self._embed_model,
77
81
  field_mapping=self._field_mapping,
82
+ content_field=self._content_field,
78
83
  )
79
84
  return self._client
80
85
 
@@ -26,6 +26,9 @@ from .index_client import PineconeIndexClient
26
26
 
27
27
  logger = logging.getLogger(__name__)
28
28
 
29
+ #: Max IDs per vectors/fetch call — Pinecone caps fetch batches at 100.
30
+ _FETCH_BATCH_SIZE = 100
31
+
29
32
 
30
33
  def _raw_to_chunk(raw: dict[str, Any]) -> Chunk:
31
34
  """Convert a raw dict from PineconeIndexClient to a Chunk."""
@@ -64,8 +67,13 @@ class PineconeChunkSource:
64
67
  embed_model: Pinecone hosted embedding model name. Ignored when
65
68
  ``embed_fn`` is provided. Defaults to
66
69
  ``"multilingual-e5-large"``.
67
- field_mapping: Maps Pinecone metadata field names to internal names.
68
- Useful for "bring your own index" scenarios.
70
+ field_mapping: Low-level escape hatch maps Pinecone metadata field
71
+ names to internal names when structural fields (``file_path``,
72
+ ``chunk_index``, headers) are also relocated. For the common
73
+ case, prefer ``content_field``.
74
+ content_field: Pinecone metadata key holding the chunk text — the
75
+ canonical way to point at your text column for pre-existing
76
+ indexes that don't use ``content``.
69
77
 
70
78
  Example:
71
79
  >>> # Using Pinecone's built-in embeddings (simplest)
@@ -82,12 +90,12 @@ class PineconeChunkSource:
82
90
  ... embed_fn=my_embed_fn,
83
91
  ... )
84
92
 
85
- >>> # Pre-existing index with custom field names
93
+ >>> # Pre-existing index whose text lives under another key
86
94
  >>> source = PineconeChunkSource(
87
95
  ... api_key="pcsk_...",
88
96
  ... index_name="product-catalog",
89
97
  ... embed_model="llama-text-embed-v2",
90
- ... field_mapping={"description": "content", "path": "file_path"},
98
+ ... content_field="description",
91
99
  ... )
92
100
  """
93
101
 
@@ -101,6 +109,7 @@ class PineconeChunkSource:
101
109
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
102
110
  embed_model: str = "multilingual-e5-large",
103
111
  field_mapping: dict[str, str] | None = None,
112
+ content_field: str | None = None,
104
113
  ) -> None:
105
114
  self._client = PineconeIndexClient(
106
115
  api_key=api_key,
@@ -110,6 +119,7 @@ class PineconeChunkSource:
110
119
  embed_fn=embed_fn,
111
120
  embed_model=embed_model,
112
121
  field_mapping=field_mapping,
122
+ content_field=content_field,
113
123
  )
114
124
  self._files = FileAwareness(self._client)
115
125
 
@@ -237,40 +247,56 @@ class PineconeChunkSource:
237
247
  # ------------------------------------------------------------------
238
248
 
239
249
  def get_chunk_count(self) -> int:
240
- """Return the total number of vectors in the index."""
241
- index = self._client._get_index()
242
- stats = index.describe_index_stats()
243
- return int(stats.total_vector_count or 0)
250
+ """Return the number of vectors in the configured namespace.
251
+
252
+ Scoped to the namespace this source reads from — an index-wide
253
+ total would disagree with what sampling/search can actually see.
254
+ """
255
+ return self._client.namespace_vector_count()
244
256
 
245
257
  def sample_chunks(self, n: int, min_chars: int = 0) -> list[Chunk]:
246
258
  """Return n randomly sampled chunks, optionally filtered by
247
259
  minimum length.
248
260
 
249
- Uses a random vector query to get pseudo-random results
250
- efficiently in a single API call.
261
+ Samples uniformly from the paginated ID listing and hydrates the
262
+ sample via fetch no query vector involved, so the draw is
263
+ genuinely uniform (not nearest-to-a-random-point) and works for
264
+ dense and sparse indexes alike.
251
265
  """
252
- # Generate a random vector for pseudo-random sampling
253
- dim = len(self._client.zero_vector())
254
- rand_vec = [random.gauss(0, 1) for _ in range(dim)]
255
-
256
- # Fetch more than needed to allow for min_chars filtering
257
- fetch_k = min(n * 3, 10000) if min_chars > 0 else min(n, 10000)
258
- result = self._client.query(
259
- vector=rand_vec,
260
- top_k=fetch_k,
261
- include_metadata=True,
262
- )
263
-
264
- matches = result.matches or []
265
- if not matches:
266
+ # Oversample when a length filter will discard part of the draw
267
+ fetch_n = min(n * 3, 10000) if min_chars > 0 else min(n, 10000)
268
+ ids = self._client.sample_ids(fetch_n)
269
+ if not ids:
266
270
  return []
267
271
 
268
- chunks = [_raw_to_chunk(self._client.match_to_raw(m)) for m in matches]
272
+ raws: list[dict[str, Any]] = []
273
+ for batch_start in range(0, len(ids), _FETCH_BATCH_SIZE):
274
+ raws.extend(
275
+ self._client.fetch_by_ids_raw(
276
+ ids[batch_start : batch_start + _FETCH_BATCH_SIZE]
277
+ )
278
+ )
279
+ chunks = [_raw_to_chunk(r) for r in raws]
280
+
281
+ # Every fetched record decoding to empty content means the text key
282
+ # is wrong (BYO index whose schema doesn't use the configured field),
283
+ # not that the corpus is empty. Without this, the pipeline dies later
284
+ # with an unactionable "No eligible chunks were found".
285
+ if chunks and all(not c.content for c in chunks):
286
+ content_key = self._client._pc_field("content")
287
+ seen_keys = sorted(
288
+ {k for r in raws for k in r.get("metadata", {}) if not k.startswith("_")}
289
+ )
290
+ raise ValueError(
291
+ f"No text found under metadata field '{content_key}' in any "
292
+ f"sampled record. This index's metadata fields are: "
293
+ f"{seen_keys}. Set content_field to the one holding the "
294
+ f"chunk text."
295
+ )
269
296
 
270
297
  if min_chars > 0:
271
298
  chunks = [c for c in chunks if len(c.content) >= min_chars]
272
299
 
273
- # Shuffle to avoid bias from similarity ordering
274
300
  random.shuffle(chunks)
275
301
  return chunks[:n]
276
302
 
@@ -19,6 +19,27 @@ from benchmax.rag.corpus.search_schema.search_types import (
19
19
  )
20
20
 
21
21
 
22
+ def resolve_content_attr(
23
+ content_attr: list[str] | None, content_field: str | None
24
+ ) -> list[str] | None:
25
+ """Resolve the ``content_field`` sugar against an explicit ``content_attr``.
26
+
27
+ ``content_field`` is the canonical single-column param; ``content_attr``
28
+ is the low-level multi-field escape hatch. Specifying the text column
29
+ both ways with different values raises instead of silently picking a
30
+ winner.
31
+ """
32
+ if not content_field:
33
+ return content_attr
34
+ if content_attr is not None and content_attr != [content_field]:
35
+ raise ValueError(
36
+ f"content_field={content_field!r} conflicts with "
37
+ f"content_attr={content_attr!r}. Specify the text column one way "
38
+ "or the other."
39
+ )
40
+ return [content_field]
41
+
42
+
22
43
  class TpufNamespace:
23
44
  """Thin wrapper around a Turbopuffer namespace.
24
45