benchmax 0.1.2.dev26__tar.gz → 0.1.2.dev28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. benchmax-0.1.2.dev28/PKG-INFO +75 -0
  2. benchmax-0.1.2.dev28/README.md +21 -0
  3. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/pyproject.toml +2 -1
  4. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/example_id.py +21 -19
  5. benchmax-0.1.2.dev28/src/benchmax/envs/telestich/example.py +668 -0
  6. benchmax-0.1.2.dev28/src/benchmax/envs/telestich/telestich_env.py +1107 -0
  7. benchmax-0.1.2.dev28/src/benchmax/envs/types.py +137 -0
  8. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/platform/__init__.py +3 -0
  9. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/platform/client.py +13 -0
  10. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/platform/credentials.py +35 -0
  11. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/platform/training_run.py +24 -0
  12. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/platform/validation.py +274 -61
  13. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/search.py +63 -6
  14. benchmax-0.1.2.dev28/src/benchmax/rewards/diversity.py +305 -0
  15. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/_utils.py +3 -2
  16. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/adaptive.py +4 -2
  17. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/rubric.py +157 -56
  18. benchmax-0.1.2.dev28/src/benchmax/traces/__init__.py +8 -0
  19. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/adapter.py +113 -53
  20. benchmax-0.1.2.dev28/src/benchmax/traces/braintrust/__init__.py +0 -0
  21. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/braintrust/message_extraction.py +6 -79
  22. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/processing.py +16 -16
  23. benchmax-0.1.2.dev28/src/benchmax.egg-info/PKG-INFO +75 -0
  24. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/SOURCES.txt +4 -0
  25. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/requires.txt +5 -0
  26. benchmax-0.1.2.dev26/PKG-INFO +0 -188
  27. benchmax-0.1.2.dev26/README.md +0 -138
  28. benchmax-0.1.2.dev26/src/benchmax/envs/types.py +0 -39
  29. benchmax-0.1.2.dev26/src/benchmax/traces/__init__.py +0 -3
  30. benchmax-0.1.2.dev26/src/benchmax.egg-info/PKG-INFO +0 -188
  31. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/LICENSE +0 -0
  32. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/setup.cfg +0 -0
  33. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/bundle.py +0 -0
  34. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/config.py +0 -0
  35. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/__init__.py +0 -0
  36. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/base_env.py +0 -0
  37. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/crm/crm_env.py +0 -0
  38. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/crm/workdir/reward_fn.py +0 -0
  39. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/data_utils.py +0 -0
  40. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/excel_env.py +0 -0
  41. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/__init__.py +0 -0
  42. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py +0 -0
  43. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/excel_utils.py +0 -0
  44. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/reward_fn.py +0 -0
  45. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/logging.py +0 -0
  46. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/math/math_env.py +0 -0
  47. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/math/workdir/reward_fn.py +0 -0
  48. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/__init__.py +0 -0
  49. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py +0 -0
  50. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/example_workdir/reward_fn.py +0 -0
  51. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/parallel_mcp_env.py +0 -0
  52. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/__init__.py +0 -0
  53. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/base_provisioner.py +0 -0
  54. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/local_provisioner.py +0 -0
  55. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py +0 -0
  56. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py +0 -0
  57. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/utils.py +0 -0
  58. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/proxy_server.py +0 -0
  59. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/server_pool.py +0 -0
  60. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/utils.py +0 -0
  61. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/postgres_search/__init__.py +0 -0
  62. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/postgres_search/linker_env.py +0 -0
  63. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/postgres_search/search_env.py +0 -0
  64. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/reward_helpers.py +0 -0
  65. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/wikipedia/utils.py +0 -0
  66. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/envs/wikipedia/wiki_env.py +0 -0
  67. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/__init__.py +0 -0
  68. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/caller.py +0 -0
  69. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/clients.py +0 -0
  70. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/example_usage.py +0 -0
  71. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/inspector.py +0 -0
  72. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/models.py +0 -0
  73. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/pricing.py +0 -0
  74. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/platform/exceptions.py +0 -0
  75. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/prompts/__init__.py +0 -0
  76. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/prompts/tools.py +0 -0
  77. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/__init__.py +0 -0
  78. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/email.py +0 -0
  79. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/inspector.py +0 -0
  80. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/markdown.py +0 -0
  81. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/models.py +0 -0
  82. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/storage.py +0 -0
  83. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/__init__.py +0 -0
  84. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/__init__.py +0 -0
  85. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/client.py +0 -0
  86. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/files.py +0 -0
  87. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/filter_mapper.py +0 -0
  88. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/source.py +0 -0
  89. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/__init__.py +0 -0
  90. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/files.py +0 -0
  91. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/filter_mapper.py +0 -0
  92. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/index_client.py +0 -0
  93. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/search.py +0 -0
  94. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/source.py +0 -0
  95. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/__init__.py +0 -0
  96. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/client.py +0 -0
  97. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/exceptions.py +0 -0
  98. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/filter_mapper.py +0 -0
  99. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/models.py +0 -0
  100. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/search.py +0 -0
  101. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/source.py +0 -0
  102. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_client.py +0 -0
  103. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/__init__.py +0 -0
  104. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/builders.py +0 -0
  105. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/dsl_parser.py +0 -0
  106. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/search_exceptions.py +0 -0
  107. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/search_types.py +0 -0
  108. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/source.py +0 -0
  109. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/__init__.py +0 -0
  110. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/files.py +0 -0
  111. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/filter_mapper.py +0 -0
  112. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/namespace.py +0 -0
  113. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/search.py +0 -0
  114. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/source.py +0 -0
  115. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/__init__.py +0 -0
  116. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/__init__.py +0 -0
  117. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/clean_bodies.py +0 -0
  118. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/dedupe.py +0 -0
  119. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/filter_automated_email_qas.py +0 -0
  120. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/filter_automated_emails.py +0 -0
  121. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/mbox.py +0 -0
  122. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/schema.py +0 -0
  123. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/__init__.py +0 -0
  124. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/anchor_selector.py +0 -0
  125. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/auto_tune.py +0 -0
  126. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/batch_processor.py +0 -0
  127. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/checkpoint.py +0 -0
  128. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/corpus_capabilities.py +0 -0
  129. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/corpus_profile.py +0 -0
  130. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/__init__.py +0 -0
  131. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/deterministic_guards.py +0 -0
  132. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/env_rollout.py +0 -0
  133. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/grounding_llm.py +0 -0
  134. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/hop_count_validity.py +0 -0
  135. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/quality_gate.py +0 -0
  136. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/retrieval_llm.py +0 -0
  137. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/formatters/__init__.py +0 -0
  138. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/formatters/train_eval.py +0 -0
  139. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/generated_qa.py +0 -0
  140. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/generators/__init__.py +0 -0
  141. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/generators/direct_llm.py +0 -0
  142. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/helpers.py +0 -0
  143. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/metadata_linker.py +0 -0
  144. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/metrics.py +0 -0
  145. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/models.py +0 -0
  146. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/pipeline.py +0 -0
  147. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/pipeline_config.py +0 -0
  148. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/protocols.py +0 -0
  149. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/query_rewriter.py +0 -0
  150. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/response_parsers.py +0 -0
  151. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/retrieval_query.py +0 -0
  152. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/scoring.py +0 -0
  153. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/search_agent_linker.py +0 -0
  154. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/storage.py +0 -0
  155. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/style_controls.py +0 -0
  156. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/transformers/__init__.py +0 -0
  157. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/transformers/base.py +0 -0
  158. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/transformers/dedup.py +0 -0
  159. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/wiki_builder.py +0 -0
  160. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/wiki_chunk_linker.py +0 -0
  161. {benchmax-0.1.2.dev26/src/benchmax/traces/braintrust → benchmax-0.1.2.dev28/src/benchmax/rewards}/__init__.py +0 -0
  162. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/__init__.py +0 -0
  163. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/cache.py +0 -0
  164. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/prompts.py +0 -0
  165. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/reward_fns.py +0 -0
  166. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/braintrust/adapter.py +0 -0
  167. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/http.py +0 -0
  168. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/pipeline.py +0 -0
  169. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/pivot.py +0 -0
  170. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/traces/registry.py +0 -0
  171. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/utils/__init__.py +0 -0
  172. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax/utils/checkpoint.py +0 -0
  173. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/dependency_links.txt +0 -0
  174. {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/top_level.txt +0 -0
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.4
2
+ Name: benchmax
3
+ Version: 0.1.2.dev28
4
+ Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
5
+ Author: castie@castform.com
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: ==3.12.*
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: aiohttp>=3.13.1
12
+ Requires-Dist: asyncio>=4.0.0
13
+ Requires-Dist: cloudpickle>=3.0.0
14
+ Requires-Dist: datasets>=4.0.0
15
+ Requires-Dist: httpx>=0.27.0
16
+ Requires-Dist: json-repair>=0.59.10
17
+ Requires-Dist: openai>=2.15.0
18
+ Requires-Dist: pydantic>=2.0.0
19
+ Provides-Extra: mcp
20
+ Requires-Dist: fastmcp~=2.12.0; extra == "mcp"
21
+ Requires-Dist: pyjwt>=2.10.1; extra == "mcp"
22
+ Provides-Extra: skypilot
23
+ Requires-Dist: skypilot[aws,gcp]~=0.8.1; extra == "skypilot"
24
+ Requires-Dist: pip>=25.3; extra == "skypilot"
25
+ Requires-Dist: msrestazure>=0.6.4.post1; extra == "skypilot"
26
+ Provides-Extra: excel
27
+ Requires-Dist: openpyxl>=3.1.5; extra == "excel"
28
+ Provides-Extra: excel-mac-windows
29
+ Requires-Dist: openpyxl>=3.1.5; extra == "excel-mac-windows"
30
+ Requires-Dist: xlwings>=0.33.16; extra == "excel-mac-windows"
31
+ Provides-Extra: crm
32
+ Requires-Dist: python-dateutil>=2.9.0.post0; extra == "crm"
33
+ Provides-Extra: telestich
34
+ Requires-Dist: english_words; extra == "telestich"
35
+ Requires-Dist: pronouncing; extra == "telestich"
36
+ Requires-Dist: wordfreq; extra == "telestich"
37
+ Provides-Extra: rag
38
+ Requires-Dist: keybert>=0.8; extra == "rag"
39
+ Requires-Dist: langchain-text-splitters>=0.3.0; extra == "rag"
40
+ Requires-Dist: nest-asyncio>=1.5.0; extra == "rag"
41
+ Requires-Dist: ragas>=0.4.3; extra == "rag"
42
+ Requires-Dist: ruamel-yaml>=0.19.1; extra == "rag"
43
+ Requires-Dist: scikit-learn>=1.8.0; extra == "rag"
44
+ Requires-Dist: sentence-transformers>=5.2.3; extra == "rag"
45
+ Requires-Dist: tqdm>=4.66.0; extra == "rag"
46
+ Provides-Extra: traces
47
+ Provides-Extra: chroma
48
+ Requires-Dist: chromadb>=1.0.0; extra == "chroma"
49
+ Provides-Extra: pinecone
50
+ Requires-Dist: pinecone>=5.0.0; extra == "pinecone"
51
+ Provides-Extra: turbopuffer
52
+ Requires-Dist: turbopuffer>=1.16.2; extra == "turbopuffer"
53
+ Dynamic: license-file
54
+
55
+ <picture>
56
+ <img alt="Benchmax" src="./static/benchmax.png" width="full">
57
+ </picture>
58
+
59
+ ## benchmax — companion sdk for the castform training platform
60
+
61
+ benchmax is the python sdk for running training jobs on castform. see the [online docs](https://castform.com/docs/) for how to start training runs. you can use our pre-built recipes use-cases like [training rag agents](https://castform.com/docs/rag/guide/) or [training on production traces](https://castform.com/docs/traces/overview/). or you can [roll your own too](https://castform.com/docs/environments/overview/).
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ uv pip install benchmax
67
+ ```
68
+
69
+ python 3.12 required.
70
+
71
+ ---
72
+
73
+ ## License
74
+
75
+ apache 2.0 © 2026 cgft inc
@@ -0,0 +1,21 @@
1
+ <picture>
2
+ <img alt="Benchmax" src="./static/benchmax.png" width="full">
3
+ </picture>
4
+
5
+ ## benchmax — companion sdk for the castform training platform
6
+
7
+ benchmax is the python sdk for running training jobs on castform. see the [online docs](https://castform.com/docs/) for how to start training runs. you can use our pre-built recipes use-cases like [training rag agents](https://castform.com/docs/rag/guide/) or [training on production traces](https://castform.com/docs/traces/overview/). or you can [roll your own too](https://castform.com/docs/environments/overview/).
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ uv pip install benchmax
13
+ ```
14
+
15
+ python 3.12 required.
16
+
17
+ ---
18
+
19
+ ## License
20
+
21
+ apache 2.0 © 2026 cgft inc
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "benchmax"
3
- version = "0.1.2.dev26"
3
+ version = "0.1.2.dev28"
4
4
  description = "Framework-Agnostic RL Environments for LLM Fine-Tuning"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "castie@castform.com" }]
@@ -40,6 +40,7 @@ skypilot = [
40
40
  excel = ["openpyxl>=3.1.5"]
41
41
  excel-mac-windows = ["openpyxl>=3.1.5", "xlwings>=0.33.16"]
42
42
  crm = ["python-dateutil>=2.9.0.post0"]
43
+ telestich = ["english_words", "pronouncing", "wordfreq"]
43
44
  rag = [
44
45
  "keybert>=0.8",
45
46
  "langchain-text-splitters>=0.3.0",
@@ -1,24 +1,23 @@
1
1
  """Canonical example identity.
2
2
 
3
3
  ``canonical_example_id(prompt_messages, task)`` returns a SHA-256 hex digest
4
- that is stable across processes and languages: a TypeScript port lives in
5
- ``platform-service/src/lib/canonical-example-id.ts`` and is exercised by a
6
- parity test.
7
-
8
- Determinism is achieved by:
9
- - normalizing numeric values so JSON output matches between Python and JS
10
- (JS has no int/float distinction; integer-valued floats are coerced to int,
11
- -0.0 to 0; NaN/Inf are rejected).
12
- - rejecting values whose JSON serialization diverges between Python and JS:
13
- non-string dict keys, integers outside JS ``Number.MAX_SAFE_INTEGER``,
14
- byte strings, lone surrogates, and unknown types.
15
- - emitting canonical JSON with sorted keys, no whitespace, and no ASCII
16
- escaping (modern JSON.stringify also preserves non-ASCII).
17
-
18
- The hash is computed over ``{"v": 2, "prompt_messages": ..., "task": ...}``.
19
- v:2 bump went together with the ``seed_messages`` → ``prompt_messages``
20
- field rename in 2026-05; v:1 hashes are obsolete.
4
+ stable across processes. Identity is computed only here, in Python — both the
5
+ trainer and rollout-service hash via this module.
6
+
7
+ Normalization keeps the digest loader-independent:
8
+ - integer-valued floats → int, -0.0 → 0; NaN/Inf rejected.
9
+ - dict keys whose value is ``None`` are dropped, so a key absent in one loader
10
+ and present-but-null in another (Arrow schema-unification) hashes the same;
11
+ nulls *inside lists* are kept (length/order are identity).
12
+ - ambiguous values rejected: non-str dict keys, ints beyond
13
+ ``Number.MAX_SAFE_INTEGER``, byte strings, lone surrogates, unknown types.
14
+ - canonical JSON: sorted keys, no whitespace, no ASCII escaping.
15
+
16
+ Payload tag ``v:3``. History: v:1→v:2 = the 2026-05 ``seed_messages`` →
17
+ ``prompt_messages`` rename; v:2→v:3 = drop null-valued dict keys (loader skew).
18
+ Older hashes are obsolete.
21
19
  """
20
+
22
21
  from __future__ import annotations
23
22
 
24
23
  import hashlib
@@ -78,7 +77,10 @@ def _normalize(v: Any) -> Any:
78
77
  raise ValueError(
79
78
  f"dict keys must be str for canonical hashing; got {type(k).__name__}"
80
79
  )
81
- out[k] = _normalize(x)
80
+ nx = _normalize(x)
81
+ if nx is None:
82
+ continue
83
+ out[k] = nx
82
84
  return out
83
85
  raise ValueError(
84
86
  f"type {type(v).__name__} is not JSON-canonicalizable; "
@@ -90,7 +92,7 @@ def canonical_example_id(
90
92
  prompt_messages: Messages,
91
93
  task: dict[str, Any] | None,
92
94
  ) -> str:
93
- payload = {"v": 2, "prompt_messages": prompt_messages, "task": task}
95
+ payload = {"v": 3, "prompt_messages": prompt_messages, "task": task}
94
96
  serialized = json.dumps(
95
97
  _normalize(payload),
96
98
  sort_keys=True,