benchmax 0.1.2.dev30__tar.gz → 0.1.2.dev33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. {benchmax-0.1.2.dev30/src/benchmax.egg-info → benchmax-0.1.2.dev33}/PKG-INFO +1 -1
  2. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/pyproject.toml +1 -1
  3. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/bundle.py +74 -0
  4. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/postgres_search/search_env.py +1 -7
  5. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/reward_helpers.py +3 -12
  6. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/telestich/example.py +18 -11
  7. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/client.py +6 -2
  8. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/validation.py +43 -1
  9. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/chroma/client.py +30 -0
  10. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/chroma/search.py +23 -6
  11. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/chroma/source.py +22 -14
  12. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/pinecone/index_client.py +78 -5
  13. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/pinecone/search.py +5 -0
  14. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/pinecone/source.py +52 -26
  15. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/search_schema/search_exceptions.py +18 -0
  16. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/turbopuffer/namespace.py +21 -0
  17. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/turbopuffer/search.py +15 -3
  18. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/turbopuffer/source.py +14 -8
  19. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33/src/benchmax.egg-info}/PKG-INFO +1 -1
  20. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/LICENSE +0 -0
  21. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/README.md +0 -0
  22. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/setup.cfg +0 -0
  23. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/cli.py +0 -0
  24. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/config.py +0 -0
  25. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/__init__.py +0 -0
  26. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/base_env.py +0 -0
  27. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/crm/crm_env.py +0 -0
  28. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/crm/workdir/reward_fn.py +0 -0
  29. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/example_id.py +0 -0
  30. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/excel/data_utils.py +0 -0
  31. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/excel/excel_env.py +0 -0
  32. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/excel/workdir/__init__.py +0 -0
  33. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py +0 -0
  34. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/excel/workdir/excel_utils.py +0 -0
  35. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/excel/workdir/reward_fn.py +0 -0
  36. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/logging.py +0 -0
  37. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/math/math_env.py +0 -0
  38. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/math/workdir/reward_fn.py +0 -0
  39. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/__init__.py +0 -0
  40. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py +0 -0
  41. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/example_workdir/reward_fn.py +0 -0
  42. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/parallel_mcp_env.py +0 -0
  43. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/provisioners/__init__.py +0 -0
  44. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/provisioners/base_provisioner.py +0 -0
  45. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/provisioners/local_provisioner.py +0 -0
  46. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py +0 -0
  47. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py +0 -0
  48. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/provisioners/utils.py +0 -0
  49. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/proxy_server.py +0 -0
  50. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/server_pool.py +0 -0
  51. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/mcp/utils.py +0 -0
  52. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/postgres_search/__init__.py +0 -0
  53. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/postgres_search/linker_env.py +0 -0
  54. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/telestich/telestich_env.py +0 -0
  55. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/types.py +0 -0
  56. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/wikipedia/utils.py +0 -0
  57. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/envs/wikipedia/wiki_env.py +0 -0
  58. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/__init__.py +0 -0
  59. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/caller.py +0 -0
  60. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/clients.py +0 -0
  61. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/example_usage.py +0 -0
  62. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/inspector.py +0 -0
  63. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/models.py +0 -0
  64. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/multi_model/pricing.py +0 -0
  65. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/__init__.py +0 -0
  66. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/credentials.py +0 -0
  67. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/device_auth.py +0 -0
  68. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/exceptions.py +0 -0
  69. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/login.py +0 -0
  70. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/platform/training_run.py +0 -0
  71. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/prompts/__init__.py +0 -0
  72. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/prompts/tools.py +0 -0
  73. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/chunkers/__init__.py +0 -0
  74. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/chunkers/email.py +0 -0
  75. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/chunkers/inspector.py +0 -0
  76. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/chunkers/markdown.py +0 -0
  77. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/chunkers/models.py +0 -0
  78. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/chunkers/storage.py +0 -0
  79. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/__init__.py +0 -0
  80. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/chroma/__init__.py +0 -0
  81. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/chroma/files.py +0 -0
  82. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/chroma/filter_mapper.py +0 -0
  83. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/pinecone/__init__.py +0 -0
  84. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/pinecone/files.py +0 -0
  85. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/pinecone/filter_mapper.py +0 -0
  86. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/__init__.py +0 -0
  87. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/client.py +0 -0
  88. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/exceptions.py +0 -0
  89. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/filter_mapper.py +0 -0
  90. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/models.py +0 -0
  91. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/search.py +0 -0
  92. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/postgres/source.py +0 -0
  93. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/search_client.py +0 -0
  94. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/search_schema/__init__.py +0 -0
  95. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/search_schema/builders.py +0 -0
  96. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/search_schema/dsl_parser.py +0 -0
  97. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/search_schema/search_types.py +0 -0
  98. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/source.py +0 -0
  99. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/turbopuffer/__init__.py +0 -0
  100. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/turbopuffer/files.py +0 -0
  101. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/corpus/turbopuffer/filter_mapper.py +0 -0
  102. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/__init__.py +0 -0
  103. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/__init__.py +0 -0
  104. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/clean_bodies.py +0 -0
  105. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/dedupe.py +0 -0
  106. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/filter_automated_email_qas.py +0 -0
  107. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/filter_automated_emails.py +0 -0
  108. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/mbox.py +0 -0
  109. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/preprocess/email/schema.py +0 -0
  110. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/__init__.py +0 -0
  111. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/anchor_selector.py +0 -0
  112. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/auto_tune.py +0 -0
  113. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/batch_processor.py +0 -0
  114. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/checkpoint.py +0 -0
  115. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/corpus_capabilities.py +0 -0
  116. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/corpus_profile.py +0 -0
  117. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/__init__.py +0 -0
  118. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/deterministic_guards.py +0 -0
  119. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/env_rollout.py +0 -0
  120. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/grounding_llm.py +0 -0
  121. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/hop_count_validity.py +0 -0
  122. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/quality_gate.py +0 -0
  123. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/filters/retrieval_llm.py +0 -0
  124. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/formatters/__init__.py +0 -0
  125. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/formatters/train_eval.py +0 -0
  126. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/generated_qa.py +0 -0
  127. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/generators/__init__.py +0 -0
  128. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/generators/direct_llm.py +0 -0
  129. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/helpers.py +0 -0
  130. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/metadata_linker.py +0 -0
  131. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/metrics.py +0 -0
  132. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/models.py +0 -0
  133. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/pipeline.py +0 -0
  134. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/pipeline_config.py +0 -0
  135. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/protocols.py +0 -0
  136. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/query_rewriter.py +0 -0
  137. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/response_parsers.py +0 -0
  138. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/retrieval_query.py +0 -0
  139. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/scoring.py +0 -0
  140. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/search_agent_linker.py +0 -0
  141. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/storage.py +0 -0
  142. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/style_controls.py +0 -0
  143. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/transformers/__init__.py +0 -0
  144. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/transformers/base.py +0 -0
  145. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/transformers/dedup.py +0 -0
  146. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/wiki_builder.py +0 -0
  147. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rag/qa_generation/wiki_chunk_linker.py +0 -0
  148. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rewards/__init__.py +0 -0
  149. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rewards/diversity.py +0 -0
  150. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/__init__.py +0 -0
  151. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/_utils.py +0 -0
  152. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/adaptive.py +0 -0
  153. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/cache.py +0 -0
  154. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/prompts.py +0 -0
  155. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/reward_fns.py +0 -0
  156. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/rubrics/rubric.py +0 -0
  157. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/__init__.py +0 -0
  158. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/adapter.py +0 -0
  159. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/braintrust/__init__.py +0 -0
  160. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/braintrust/adapter.py +0 -0
  161. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/braintrust/message_extraction.py +0 -0
  162. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/http.py +0 -0
  163. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/pipeline.py +0 -0
  164. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/pivot.py +0 -0
  165. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/processing.py +0 -0
  166. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/traces/registry.py +0 -0
  167. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/utils/__init__.py +0 -0
  168. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax/utils/checkpoint.py +0 -0
  169. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax.egg-info/SOURCES.txt +0 -0
  170. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax.egg-info/dependency_links.txt +0 -0
  171. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax.egg-info/entry_points.txt +0 -0
  172. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax.egg-info/requires.txt +0 -0
  173. {benchmax-0.1.2.dev30 → benchmax-0.1.2.dev33}/src/benchmax.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: benchmax
3
- Version: 0.1.2.dev30
3
+ Version: 0.1.2.dev33
4
4
  Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
5
5
  Author: castie@castform.com
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "benchmax"
3
- version = "0.1.2.dev30"
3
+ version = "0.1.2.dev33"
4
4
  description = "Framework-Agnostic RL Environments for LLM Fine-Tuning"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "castie@castform.com" }]
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import importlib
3
4
  import inspect
4
5
  import io
5
6
  import json
@@ -76,6 +77,7 @@ def dump_bundle(
76
77
  pip_dependencies: list[str] | None = None,
77
78
  local_modules: list[ModuleType] | None = None,
78
79
  env_class_source: str | None = None,
80
+ auto_local_modules: bool = True,
79
81
  ) -> Bundle:
80
82
  """Pickle ``(env_class, constructor_args)`` and stamp metadata.
81
83
 
@@ -90,6 +92,10 @@ def dump_bundle(
90
92
  recover it — e.g. a class produced by ``exec()`` into an in-memory
91
93
  namespace, which has no source file on disk. When ``None``
92
94
  (default), source is introspected from ``env_class``.
95
+ auto_local_modules: When True (default), any local module the pickle
96
+ references but that wasn't passed in ``local_modules`` is imported
97
+ and pickled by value automatically (a warning names them). When
98
+ False, such a reference raises ``BundlingError`` instead.
93
99
 
94
100
  Raises:
95
101
  BundlingError: bad env_class, cloudpickle failure, or pickle references
@@ -124,6 +130,46 @@ def dump_bundle(
124
130
  except Exception:
125
131
  pass
126
132
 
133
+ if auto_local_modules and _unregistered_local_refs(pickled):
134
+ # Import each referenced local module and re-dump with it pickled by
135
+ # value. Loop because a by-value module can surface further local refs;
136
+ # registrations accumulate (and are torn down once at the end) so an
137
+ # earlier module stays by-value while we resolve the ones it pulled in.
138
+ seen: set[str] = {m.__name__ for m in local_modules}
139
+ registered: list[ModuleType] = []
140
+ with _BUNDLE_LOCK:
141
+ try:
142
+ for _ in range(10):
143
+ pending = [
144
+ m for m in _unregistered_local_refs(pickled) if m not in seen
145
+ ]
146
+ if not pending:
147
+ break
148
+ new_mods: list[ModuleType] = []
149
+ for name in pending:
150
+ seen.add(name) # unimportable names fall through to the guard
151
+ try:
152
+ new_mods.append(importlib.import_module(name))
153
+ except Exception:
154
+ pass
155
+ if not new_mods:
156
+ break
157
+ logger.warning(
158
+ "[bundle] %s: auto-bundling local module(s): %s ",
159
+ env_class.__name__,
160
+ ", ".join(sorted(m.__name__ for m in new_mods)),
161
+ )
162
+ for mod in new_mods:
163
+ cloudpickle.register_pickle_by_value(mod)
164
+ registered.append(mod)
165
+ pickled = cloudpickle.dumps((env_class, constructor_args))
166
+ finally:
167
+ for mod in registered:
168
+ try:
169
+ cloudpickle.unregister_pickle_by_value(mod)
170
+ except Exception:
171
+ pass
172
+
127
173
  risky = _unregistered_local_refs(pickled)
128
174
  if risky:
129
175
  msg = (
@@ -259,6 +305,15 @@ def _referenced_modules(pickled: bytes) -> set[str]:
259
305
  # Hooks find_class so we see every (module, name) the unpickler would import —
260
306
  # i.e. exactly what'd raise ModuleNotFoundError on a fresh interpreter. The stub
261
307
  # lets unpickling proceed past missing classes so we collect every ref.
308
+ #
309
+ # find_class alone has a blind spot: a bare ``import foo`` that leaves a
310
+ # module *object* in the env's globals is pickled as
311
+ # ``cloudpickle.subimport("foo")`` — the module name is a REDUCE argument,
312
+ # not a find_class path, so we'd only see ``cloudpickle.cloudpickle`` (which
313
+ # looks installed) and miss ``foo``. We shim subimport to record its arg and
314
+ # return a stub instead of importing, so a missing module is captured rather
315
+ # than aborting the whole load early. (``dynamic_subimport`` is by-value /
316
+ # self-contained — leave it to the real find_class so we don't flag it.)
262
317
  refs: set[str] = set()
263
318
 
264
319
  class _Stub:
@@ -271,9 +326,28 @@ def _referenced_modules(pickled: bytes) -> set[str]:
271
326
  def __reduce__(self) -> tuple:
272
327
  return (type(self), ())
273
328
 
329
+ def _recording_subimport(name: str, *a: Any, **kw: Any) -> ModuleType:
330
+ refs.add(name)
331
+ return ModuleType(str(name))
332
+
333
+ def _noop_setstate(obj: Any, *a: Any, **kw: Any) -> Any:
334
+ # cloudpickle's _make_skeleton_class resolves the class_tracker_id back
335
+ # to the *live* class (it was tracked when env_class was dumped), so the
336
+ # real ``_class_setstate``/``_function_setstate`` would setattr the
337
+ # reconstructed (stub-globals) members onto the live class/function —
338
+ # mutating the caller's class mid-bundle and poisoning any later dump.
339
+ # We only need the refs from ``state``, which are already recorded while
340
+ # it's unpickled; the setter itself is a no-op here.
341
+ return obj
342
+
274
343
  class _Recorder(pickle.Unpickler):
275
344
  def find_class(self, module: str, name: str) -> Any:
276
345
  refs.add(module)
346
+ if module.startswith("cloudpickle"):
347
+ if name == "subimport":
348
+ return _recording_subimport
349
+ if name in ("_class_setstate", "_function_setstate"):
350
+ return _noop_setstate
277
351
  try:
278
352
  return super().find_class(module, name)
279
353
  except Exception:
@@ -285,14 +285,8 @@ tags. Cite your sources inline using [Source: <source_id>] next to each claim.
285
285
  if not text.strip():
286
286
  return zeros
287
287
 
288
- # No final <answer> block → no answer to score. Return all-zero
289
- # rewards so conciseness / citations / efficiency can't accrue
290
- # from reasoning or tool-call text alone.
291
- answer = extract_answer_block(text)
292
- if not answer:
293
- return zeros
294
-
295
288
  t = task or {}
289
+ answer = extract_answer_block(text)
296
290
  prompt = str(t.get("question") or t.get("prompt") or "")
297
291
  gt_str = str(t.get("ground_truth") or "")
298
292
  reference_chunks = t.get("reference_chunks", [])
@@ -82,16 +82,9 @@ def extract_completion_text(completion: str | list[dict[str, Any]]) -> str:
82
82
 
83
83
 
84
84
  def extract_answer_block(text: str) -> str:
85
- """Extract content from ``<answer>`` tags.
86
-
87
- Returns the (stripped) tag contents when an ``<answer>…</answer>`` block
88
- is present, otherwise ``""``. A missing answer block is treated as "no
89
- final answer" rather than silently falling back to the full completion —
90
- consumers can gate rewards on a non-empty result. ``<answer></answer>``
91
- likewise yields ``""``.
92
- """
85
+ """Extract content from <answer> tags, or return full text."""
93
86
  match = _ANSWER_TAG_RE.search(text or "")
94
- return match.group(1).strip() if match else ""
87
+ return (match.group(1) if match else text).strip()
95
88
 
96
89
 
97
90
  def clip01(value: Any) -> float:
@@ -169,10 +162,8 @@ def citation_score(
169
162
  ref_ids.add(norm_sid)
170
163
  break
171
164
 
172
- if not cited:
165
+ if not cited or not ref_ids:
173
166
  return {"precision": 0.0, "recall": 0.0}
174
- if not ref_ids:
175
- return {"precision": 1.0, "recall": 0.0}
176
167
 
177
168
  precision = len(cited & ref_ids) / len(cited)
178
169
  recall = len(cited & ref_ids) / len(ref_ids)
@@ -12,10 +12,12 @@ Run it from the benchmax project root (the ``telestich`` extra pulls in the
12
12
  env's word-list / rhyme dependencies):
13
13
 
14
14
  cd core/benchmax
15
- CASTFORM_API_KEY=sk_... \
16
- uv run --extra telestich python -m benchmax.envs.telestich.example
15
+ uv run --extra telestich python -m benchmax.envs.telestich.example
17
16
 
18
- (``CASTFORM_LLM_API_KEY`` is optional it defaults to ``CASTFORM_API_KEY``.)
17
+ Auth is the device-auth session (``ensure_session()`` opens a browser login if
18
+ ``~/.castform`` has no valid session) — no API key needed. ``CASTFORM_API_KEY``
19
+ / ``CASTFORM_LLM_API_KEY`` are only consulted by the offline dataset-generation
20
+ helpers, not the launch path.
19
21
 
20
22
  This launches a real training run on the full committed seed dataset
21
23
  (~90/10 train/eval split).
@@ -63,6 +65,8 @@ CONCURRENCY = 15
63
65
  # pool) server-side. Supported: "Qwen/Qwen3.5-4B" (gpu4) or "Qwen/Qwen3.5-35B-A3B"
64
66
  # (gpu8). Override via TELESTICH_MODEL.
65
67
  MODEL = os.environ.get("TELESTICH_MODEL", "Qwen/Qwen3.5-4B")
68
+ # Run name — defaults to a unique telestich-full-<uuid>. Override via TELESTICH_RUN_NAME.
69
+ RUN_NAME = os.environ.get("TELESTICH_RUN_NAME", "")
66
70
 
67
71
  # (model, weight). Weights reflect observed reliability on our checks:
68
72
  # - Both grok models leak banned example words and rubber-stamp the CoT self-check.
@@ -558,12 +562,15 @@ def get_dataset():
558
562
  if __name__ == "__main__":
559
563
  import uuid
560
564
 
565
+ from benchmax.platform import ensure_session
561
566
  from benchmax.platform.client import TrainerClient
562
567
  from benchmax.platform.training_run import upload_training_run
563
568
  from benchmax.platform.validation import validate_env
564
569
 
565
- if not API_KEY:
566
- raise SystemExit("Set CASTFORM_API_KEY before running this example.")
570
+ # Device-auth session bootstrap: browser login if no credential resolves.
571
+ # After this the platform bearer comes from ~/.castform — no API key needed,
572
+ # so we pass api_key="" to the platform calls below (resolves via the seam).
573
+ ensure_session()
567
574
 
568
575
  print(f"Platform URL: {BASE_URL}")
569
576
  print(f"LLM URL: {LLM_BASE_URL}\n")
@@ -603,7 +610,7 @@ if __name__ == "__main__":
603
610
  eval_dataset=eval_data[:2],
604
611
  local_modules=local_modules,
605
612
  pip_dependencies=pip_dependencies,
606
- api_key=API_KEY,
613
+ api_key="", # session bearer via ensure_session()
607
614
  base_url=BASE_URL,
608
615
  llm_base_url=LLM_BASE_URL,
609
616
  llm_api_key="",
@@ -614,14 +621,14 @@ if __name__ == "__main__":
614
621
  )
615
622
 
616
623
  # 3. Bundle the env class and upload everything to platform storage.
617
- run_name = f"telestich-full-{uuid.uuid4().hex[:8]}"
624
+ run_name = RUN_NAME or f"telestich-full-{uuid.uuid4().hex[:8]}"
618
625
  print(f"\nUploading bundle + datasets as {run_name!r} ...")
619
626
  uploaded = upload_training_run(
620
627
  env_class=TelestichEnv,
621
628
  train_dataset=train_data,
622
629
  eval_dataset=eval_data,
623
630
  run_name=run_name,
624
- api_key=API_KEY,
631
+ api_key="", # session bearer via ensure_session()
625
632
  base_url=BASE_URL,
626
633
  local_modules=local_modules,
627
634
  constructor_args=constructor_args,
@@ -638,7 +645,7 @@ if __name__ == "__main__":
638
645
  # 4. Launch the training run. training_run_type="simple" + the `model` arg select
639
646
  # the trainer YAML/pool server-side (Qwen3.5-4B→gpu4, Qwen3.5-35B-A3B→gpu8).
640
647
  print(f"\nLaunching training run (model={MODEL}) ...")
641
- with TrainerClient(api_key=API_KEY, base_url=BASE_URL) as trainer:
648
+ with TrainerClient(api_key="", base_url=BASE_URL) as trainer:
642
649
  run_id = trainer.launch_training_run(
643
650
  training_run_type="simple",
644
651
  env_cls_path=uploaded.env_cls_path,
@@ -647,10 +654,10 @@ if __name__ == "__main__":
647
654
  eval_dataset_path=uploaded.eval_dataset_path,
648
655
  name=run_name,
649
656
  # num_epochs: passes over the train set (platform default is 5).
650
- # max_response_len 3000: a brief reason + 1-2 tool rounds + poem fits well
657
+ # max_rollout_len 3000: a brief reason + 1-2 tool rounds + poem fits well
651
658
  # under this; lowered from 4000 to cut off in-head enumeration rambles
652
659
  # sooner (they truncate to a 0-reward anyway).
653
- launcher_args={"model": MODEL, "max_response_len": 3000, "num_epochs": 10},
660
+ launcher_args={"model": MODEL, "max_rollout_len": 3000, "num_epochs": 10},
654
661
  )
655
662
 
656
663
  print(f"\n✓ Launched run_id={run_id}")
@@ -7,6 +7,7 @@ import hashlib
7
7
  import json
8
8
  import logging
9
9
  import textwrap
10
+ import warnings
10
11
  from collections.abc import Iterator
11
12
  from dataclasses import dataclass, field
12
13
  from pathlib import Path
@@ -404,7 +405,7 @@ class TrainerClient:
404
405
  eval_dataset_path: Path to the evaluation dataset
405
406
  name: Optional name for the training run
406
407
  launcher_args: Extra launcher args forwarded to the server
407
- (e.g. {"max_response_len": 4000}). The 4 required paths
408
+ (e.g. {"max_rollout_len": 4000}). The 4 required paths
408
409
  above always take precedence.
409
410
 
410
411
  Returns:
@@ -431,8 +432,11 @@ class TrainerClient:
431
432
  )
432
433
  self._handle_response_errors(response)
433
434
  body = response.json()
435
+ # Surface soft-cap / OOM-risk warnings via the warnings module (shown by
436
+ # default in notebooks/REPL) — a bare logger.warning is swallowed unless
437
+ # the caller configured logging.
434
438
  for warning in body.get("warnings", []) or []:
435
- logger.warning("launch warning: %s", warning)
439
+ warnings.warn(f"launch warning: {warning}", stacklevel=2)
436
440
  return body["runId"]
437
441
 
438
442
  def list_launch_args(self) -> list[LaunchArgSpec]:
@@ -7,6 +7,7 @@ the env class contract matches what the trainer expects.
7
7
  from __future__ import annotations
8
8
 
9
9
  import asyncio
10
+ import importlib
10
11
  import json
11
12
  import math
12
13
  import tempfile
@@ -578,6 +579,41 @@ def _run_local_checks(
578
579
  from benchmax.bundle import unregistered_local_refs
579
580
 
580
581
  risky = unregistered_local_refs(cloudpickle.dumps(env_class))
582
+ # Mirror dump_bundle's auto_local_modules: import + pickle-by-value
583
+ # any local refs the user didn't list, so validation reflects what
584
+ # the bundle will actually contain. Only genuinely unimportable refs
585
+ # (which the trainer also couldn't load) remain to be flagged.
586
+ auto: list[ModuleType] = []
587
+ if risky:
588
+ seen: set[str] = set()
589
+ try:
590
+ for _ in range(10):
591
+ pending = [
592
+ m
593
+ for m in unregistered_local_refs(cloudpickle.dumps(env_class))
594
+ if m not in seen
595
+ ]
596
+ if not pending:
597
+ break
598
+ new_mods: list[ModuleType] = []
599
+ for name in pending:
600
+ seen.add(name)
601
+ try:
602
+ new_mods.append(importlib.import_module(name))
603
+ except Exception:
604
+ pass
605
+ if not new_mods:
606
+ break
607
+ for mod in new_mods:
608
+ cloudpickle.register_pickle_by_value(mod)
609
+ auto.append(mod)
610
+ risky = unregistered_local_refs(cloudpickle.dumps(env_class))
611
+ finally:
612
+ for mod in auto:
613
+ try:
614
+ cloudpickle.unregister_pickle_by_value(mod)
615
+ except Exception:
616
+ pass
581
617
  if risky:
582
618
  print(
583
619
  f" \u2717 {env_class.__name__}: missing "
@@ -589,7 +625,13 @@ def _run_local_checks(
589
625
  )
590
626
  failed += 1
591
627
  else:
592
- print(" \u2713 no unregistered local-module references")
628
+ if auto:
629
+ names = ", ".join(sorted(m.__name__ for m in auto))
630
+ print(
631
+ f" \u2713 auto-bundled local module(s): {names} "
632
+ )
633
+ else:
634
+ print(" \u2713 no unregistered local-module references")
593
635
  passed += 1
594
636
  except Exception as exc:
595
637
  print(f" \u2717 local-modules check failed: {type(exc).__name__}: {exc}")
@@ -16,6 +16,13 @@ from typing import Any
16
16
  # Sparse-key name used when setting up BM25 schema
17
17
  BM25_KEY = "bm25_embedding"
18
18
 
19
+ # Embedding functions that run server-side on Chroma Cloud (embed.trychroma.com)
20
+ # — querying a collection that uses one never downloads a model. Everything else
21
+ # (default all-MiniLM, sentence-transformers / HF / Ollama / ONNX locals,
22
+ # third-party API EFs, or no EF) is treated as unsafe. Add hosted names here as
23
+ # they are verified server-side.
24
+ _SERVER_SIDE_EF_NAMES = frozenset({"chroma-cloud-qwen"})
25
+
19
26
 
20
27
  def has_search_api() -> bool:
21
28
  """Return True when the chromadb package exposes the Search API."""
@@ -176,6 +183,29 @@ class ChromaClient:
176
183
 
177
184
  return self._collection
178
185
 
186
+ def dense_embed_is_safe(self) -> bool:
187
+ """True when a dense (vector) query embeds WITHOUT downloading a model.
188
+
189
+ Safe only when we can produce vectors without a client-side model
190
+ download: either a caller-supplied ``embed_fn``, or a Chroma-hosted
191
+ server-side embedding function (embeds at embed.trychroma.com). Every
192
+ other embedder — chromadb's default all-MiniLM, sentence-transformers /
193
+ HuggingFace / Ollama / ONNX locals, third-party API EFs we lack keys
194
+ for, or no EF at all — is treated as UNSAFE, so callers refuse the dense
195
+ path rather than trigger a model download. Conservative by design: an
196
+ unknown embedder is unsafe.
197
+ """
198
+ if self.embed_fn is not None:
199
+ return True
200
+ col = self._collection
201
+ if col is None:
202
+ return False
203
+ try:
204
+ ef = (col._model.configuration_json or {}).get("embedding_function") or {}
205
+ except Exception:
206
+ return False
207
+ return ef.get("name") in _SERVER_SIDE_EF_NAMES
208
+
179
209
  @staticmethod
180
210
  def _repair_cloud_embedding_function(collection: Any) -> None:
181
211
  """Attach a working EF when chromadb can't rebuild a Cloud hosted one.
@@ -10,6 +10,9 @@ from collections.abc import Callable
10
10
  from typing import Any
11
11
 
12
12
  from benchmax.platform.credentials import TokenProvider, as_token_provider, env_token
13
+ from benchmax.rag.corpus.search_schema.search_exceptions import (
14
+ LocalEmbeddingDownloadDisallowedError,
15
+ )
13
16
 
14
17
 
15
18
  class ChromaSearch:
@@ -113,19 +116,33 @@ class ChromaSearch:
113
116
  ) -> list[dict[str, Any]]:
114
117
  """Search and return structured results."""
115
118
  client = self._get_client()
116
-
117
- if mode == "auto":
118
- modes = client.modes
119
+ # Initialize the collection first so capabilities reflect the real index
120
+ # (BM25 downgrade) and the embedder config is readable below.
121
+ client.get_collection()
122
+ modes = client.modes
123
+ has_lexical = "lexical" in modes
124
+
125
+ # Never download a client-side embedding model at inference/rollout time.
126
+ # When a dense embed isn't safe — no embed_fn and no Chroma-hosted
127
+ # server-side embedding function — use the BM25 lexical index if the
128
+ # collection has one, otherwise refuse rather than fetch all-MiniLM.
129
+ if not client.dense_embed_is_safe():
130
+ if not has_lexical:
131
+ raise LocalEmbeddingDownloadDisallowedError(
132
+ "chroma", self._collection_name
133
+ )
134
+ mode = "lexical"
135
+ elif mode == "auto":
119
136
  if "hybrid" in modes:
120
137
  mode = "hybrid"
121
- elif "lexical" in modes:
138
+ elif has_lexical:
122
139
  mode = "lexical"
123
140
  else:
124
141
  mode = "vector"
125
- elif mode not in client.modes:
142
+ elif mode not in modes:
126
143
  raise ValueError(
127
144
  f"ChromaSearch does not support mode '{mode}'. "
128
- f"Available modes: {sorted(client.modes)}"
145
+ f"Available modes: {sorted(modes)}"
129
146
  )
130
147
 
131
148
  if client.search_api and mode in ("lexical", "hybrid"):
@@ -17,6 +17,7 @@ from tqdm.auto import tqdm
17
17
  from benchmax.rag.chunkers.models import Chunk, ChunkCollection
18
18
  from benchmax.rag.corpus.search_schema.search_exceptions import (
19
19
  InvalidSearchSpecError,
20
+ LocalEmbeddingDownloadDisallowedError,
20
21
  UnsupportedSearchModeError,
21
22
  )
22
23
  from benchmax.rag.corpus.search_schema.search_types import (
@@ -642,23 +643,30 @@ class ChromaChunkSource:
642
643
  # lack a BM25 index, in which case modes was downgraded to vector-only.
643
644
  modes = self._current_modes()
644
645
 
645
- # Pick mode. "hybrid"/None use the best available strategy and KEEP
646
- # lexical enabled as a fallback: hybrid = dense + sparse, and when we
647
- # can't produce dense query vectors (no embed_fn, the usual remote case)
648
- # the per-query loop below degrades to the sparse/lexical leg which
649
- # needs no embedding. Only an explicit "vector" disables lexical; that's
650
- # the dense-only recovery path a caller uses after a lexical/hybrid
651
- # failure. (Disabling lexical for "hybrid" silently forced vector search,
652
- # which made remote collections dense-embed every query — slow, and on a
653
- # default-EF collection it pulls the all-MiniLM model.)
654
- if mode == "vector":
655
- use_hybrid = use_lexical = False
646
+ has_lexical = "lexical" in modes
647
+ has_hybrid = "hybrid" in modes
648
+
649
+ # Hard rule: never let chromadb embed a query with a client-side model
650
+ # (it downloads all-MiniLM and crawls in constrained executors). When a
651
+ # dense embed isn't safe no embed_fn and no Chroma-hosted server-side
652
+ # embedding function — use the BM25 lexical index if the collection has
653
+ # one, otherwise refuse. This covers every requested mode, including the
654
+ # linker's "inference" preference for vector.
655
+ if not self._chroma.dense_embed_is_safe():
656
+ if not has_lexical:
657
+ raise LocalEmbeddingDownloadDisallowedError(
658
+ "chroma", self._chroma.collection_name
659
+ )
660
+ use_hybrid = False
661
+ use_lexical = True
656
662
  elif mode == "lexical":
657
663
  use_hybrid = False
658
- use_lexical = "lexical" in modes
664
+ use_lexical = has_lexical
665
+ elif mode == "vector":
666
+ use_hybrid = use_lexical = False
659
667
  else: # "hybrid", None, or unrecognized -> best available
660
- use_hybrid = "hybrid" in modes
661
- use_lexical = "lexical" in modes
668
+ use_hybrid = has_hybrid
669
+ use_lexical = has_lexical
662
670
 
663
671
  # Batch-embed all queries when embed_fn available and vectors needed
664
672
  vectors: list[list[float]] | None = None
@@ -60,9 +60,17 @@ class PineconeIndexClient:
60
60
  embed_model: Pinecone hosted embedding model name. Ignored when
61
61
  ``embed_fn`` is provided. Defaults to
62
62
  ``"multilingual-e5-large"``.
63
- field_mapping: Maps *Pinecone metadata field names* → *internal
64
- field names*. Useful for "bring your own index" scenarios where
65
- the user's metadata schema differs from the default.
63
+ field_mapping: Low-level escape hatch maps *Pinecone metadata
64
+ field names* *internal field names* for schemas that also
65
+ relocate structural fields (``file_path``, ``chunk_index``,
66
+ headers). For the common "my text is under a different key"
67
+ case, prefer ``content_field``.
68
+ content_field: Pinecone metadata key holding the chunk text, for
69
+ "bring your own index" schemas that don't use ``content`` (e.g.
70
+ ``"summary"`` or ``"passage"``). The canonical way to point at
71
+ your text column. Empty / None means the default ``content``
72
+ key. Raises if ``field_mapping`` already maps a *different*
73
+ key to ``content``.
66
74
  """
67
75
 
68
76
  def __init__(
@@ -75,15 +83,35 @@ class PineconeIndexClient:
75
83
  embed_fn: Callable[[list[str]], list[list[float]]] | None = None,
76
84
  embed_model: str = "multilingual-e5-large",
77
85
  field_mapping: dict[str, str] | None = None,
86
+ content_field: str | None = None,
78
87
  ) -> None:
79
88
  # Store config for lazy init / pickle safety.
80
89
  self._api_key = api_key
81
90
  self._index_name = index_name
82
91
  self._index_host = index_host
83
- self._namespace = namespace
92
+ # Platform codegen may pass None for an unset namespace; Pinecone's
93
+ # default namespace is "".
94
+ self._namespace = namespace or ""
84
95
  self._embed_model = embed_model
85
96
  self.embed_fn = embed_fn or self._build_pinecone_embed_fn()
86
- self._field_mapping = field_mapping or dict(DEFAULT_FIELD_MAPPING)
97
+ mapping = dict(field_mapping) if field_mapping else dict(DEFAULT_FIELD_MAPPING)
98
+ if content_field and content_field != "content":
99
+ conflicting = [
100
+ k
101
+ for k, v in mapping.items()
102
+ if v == "content" and k not in ("content", content_field)
103
+ ]
104
+ if field_mapping and conflicting:
105
+ raise ValueError(
106
+ f"content_field={content_field!r} conflicts with field_mapping "
107
+ f"entries {conflicting} that already map to 'content'. "
108
+ "Specify the text column one way or the other."
109
+ )
110
+ # Drop the default content→content entry so the reverse mapping
111
+ # resolves "content" to the custom key unambiguously.
112
+ mapping.pop("content", None)
113
+ mapping[content_field] = "content"
114
+ self._field_mapping = mapping
87
115
  # Reverse mapping: internal name → pinecone metadata key
88
116
  self._reverse_mapping = {v: k for k, v in self._field_mapping.items()}
89
117
  self._index: Any | None = None
@@ -91,6 +119,8 @@ class PineconeIndexClient:
91
119
  self._known_ids: list[str] | None = None
92
120
  # Cached vector dimension (detected on first embed or describe_index).
93
121
  self._vector_dim: int | None = None
122
+ # Cached index vector type ("dense" | "sparse"), probed lazily.
123
+ self._vector_type: str | None = None
94
124
 
95
125
  def _build_pinecone_embed_fn(self) -> Callable[[list[str]], list[list[float]]]:
96
126
  """Build an embed_fn using Pinecone's hosted Inference API.
@@ -157,6 +187,35 @@ class PineconeIndexClient:
157
187
  self._index = pc.Index(self._index_name)
158
188
  return self._index
159
189
 
190
+ def vector_type(self) -> str:
191
+ """Return the index vector type, ``"dense"`` or ``"sparse"``.
192
+
193
+ Probes the index via ``describe_index_stats`` on first call and
194
+ caches the result.
195
+ """
196
+ if self._vector_type is None:
197
+ index = self._get_index()
198
+ stats = index.describe_index_stats()
199
+ self._vector_type = getattr(stats, "vector_type", None) or "dense"
200
+ return self._vector_type
201
+
202
+ def namespace_vector_count(self) -> int:
203
+ """Return the vector count for this client's namespace.
204
+
205
+ Scoped to the namespace, NOT the index-wide total — an index-wide
206
+ count would disagree with what list/fetch/query in this namespace
207
+ can actually see. The SDK keys the default namespace as
208
+ ``"__default__"`` (the REST API uses ``""``).
209
+ """
210
+ stats = self._get_index().describe_index_stats()
211
+ namespaces = getattr(stats, "namespaces", None) or {}
212
+ ns_stats = namespaces.get(self._namespace or "__default__")
213
+ if ns_stats is None and not self._namespace:
214
+ ns_stats = namespaces.get("")
215
+ if ns_stats is None:
216
+ return 0
217
+ return int(getattr(ns_stats, "vector_count", 0) or 0)
218
+
160
219
  def zero_vector(self) -> list[float]:
161
220
  """Return a zero-vector with the correct dimension for this index.
162
221
 
@@ -168,6 +227,12 @@ class PineconeIndexClient:
168
227
  index = self._get_index()
169
228
  stats = index.describe_index_stats()
170
229
  self._vector_dim = stats.dimension
230
+ if self._vector_dim is None:
231
+ # Sparse indexes have no fixed dimension.
232
+ raise ValueError(
233
+ f"Pinecone index '{self._index_name}' has no dimension — it is "
234
+ "a sparse index, which has no dense zero-vector."
235
+ )
171
236
  return [0.0] * self._vector_dim
172
237
 
173
238
  # ------------------------------------------------------------------
@@ -305,6 +370,14 @@ class PineconeIndexClient:
305
370
  include_metadata: bool = True,
306
371
  ) -> Any:
307
372
  """Run a vector query against the index."""
373
+ if self.vector_type() == "sparse":
374
+ # A dense query vector against a sparse index is rejected by
375
+ # Pinecone with an opaque error; fail with an actionable one.
376
+ raise ValueError(
377
+ f"Pinecone index '{self._index_name}' is a sparse index — "
378
+ "search against sparse indexes is not supported yet. "
379
+ "Use a dense index."
380
+ )
308
381
  index = self._get_index()
309
382
  kwargs: dict[str, Any] = {
310
383
  "vector": vector,