benchmax 0.1.2.dev27__tar.gz → 0.1.2.dev28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmax-0.1.2.dev28/PKG-INFO +75 -0
- benchmax-0.1.2.dev28/README.md +21 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/pyproject.toml +2 -1
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/example_id.py +21 -19
- benchmax-0.1.2.dev28/src/benchmax/envs/telestich/example.py +668 -0
- benchmax-0.1.2.dev28/src/benchmax/envs/telestich/telestich_env.py +1107 -0
- benchmax-0.1.2.dev28/src/benchmax/envs/types.py +137 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/platform/__init__.py +3 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/platform/client.py +13 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/platform/credentials.py +35 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/platform/training_run.py +24 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/platform/validation.py +274 -61
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/search.py +63 -6
- benchmax-0.1.2.dev28/src/benchmax/rewards/diversity.py +305 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/_utils.py +3 -2
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/adaptive.py +4 -2
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/rubric.py +127 -68
- benchmax-0.1.2.dev28/src/benchmax/traces/__init__.py +8 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/adapter.py +113 -53
- benchmax-0.1.2.dev28/src/benchmax/traces/braintrust/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/braintrust/message_extraction.py +6 -79
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/processing.py +16 -16
- benchmax-0.1.2.dev28/src/benchmax.egg-info/PKG-INFO +75 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/SOURCES.txt +4 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/requires.txt +5 -0
- benchmax-0.1.2.dev27/PKG-INFO +0 -188
- benchmax-0.1.2.dev27/README.md +0 -138
- benchmax-0.1.2.dev27/src/benchmax/envs/types.py +0 -39
- benchmax-0.1.2.dev27/src/benchmax/traces/__init__.py +0 -3
- benchmax-0.1.2.dev27/src/benchmax.egg-info/PKG-INFO +0 -188
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/LICENSE +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/setup.cfg +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/bundle.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/config.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/base_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/crm/crm_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/crm/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/data_utils.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/excel_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/excel_utils.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/excel/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/logging.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/math/math_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/math/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/example_workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/parallel_mcp_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/base_provisioner.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/local_provisioner.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/provisioners/utils.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/proxy_server.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/server_pool.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/mcp/utils.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/postgres_search/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/postgres_search/linker_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/postgres_search/search_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/reward_helpers.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/wikipedia/utils.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/envs/wikipedia/wiki_env.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/caller.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/clients.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/example_usage.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/inspector.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/models.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/multi_model/pricing.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/platform/exceptions.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/prompts/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/prompts/tools.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/email.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/inspector.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/markdown.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/models.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/chunkers/storage.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/client.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/files.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/chroma/source.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/files.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/index_client.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/search.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/pinecone/source.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/client.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/exceptions.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/models.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/search.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/postgres/source.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_client.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/builders.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/dsl_parser.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/search_exceptions.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/search_schema/search_types.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/source.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/files.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/namespace.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/search.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/corpus/turbopuffer/source.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/clean_bodies.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/dedupe.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/filter_automated_email_qas.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/filter_automated_emails.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/mbox.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/preprocess/email/schema.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/anchor_selector.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/auto_tune.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/batch_processor.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/checkpoint.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/corpus_capabilities.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/corpus_profile.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/deterministic_guards.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/env_rollout.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/grounding_llm.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/hop_count_validity.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/quality_gate.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/filters/retrieval_llm.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/formatters/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/formatters/train_eval.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/generated_qa.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/generators/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/generators/direct_llm.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/helpers.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/metadata_linker.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/metrics.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/models.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/pipeline.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/pipeline_config.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/protocols.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/query_rewriter.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/response_parsers.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/retrieval_query.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/scoring.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/search_agent_linker.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/storage.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/style_controls.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/transformers/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/transformers/base.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/transformers/dedup.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/wiki_builder.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rag/qa_generation/wiki_chunk_linker.py +0 -0
- {benchmax-0.1.2.dev27/src/benchmax/traces/braintrust → benchmax-0.1.2.dev28/src/benchmax/rewards}/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/cache.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/prompts.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/rubrics/reward_fns.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/braintrust/adapter.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/http.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/pipeline.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/pivot.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/traces/registry.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/utils/__init__.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax/utils/checkpoint.py +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/dependency_links.txt +0 -0
- {benchmax-0.1.2.dev27 → benchmax-0.1.2.dev28}/src/benchmax.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: benchmax
|
|
3
|
+
Version: 0.1.2.dev28
|
|
4
|
+
Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
|
|
5
|
+
Author: castie@castform.com
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Requires-Python: ==3.12.*
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: aiohttp>=3.13.1
|
|
12
|
+
Requires-Dist: asyncio>=4.0.0
|
|
13
|
+
Requires-Dist: cloudpickle>=3.0.0
|
|
14
|
+
Requires-Dist: datasets>=4.0.0
|
|
15
|
+
Requires-Dist: httpx>=0.27.0
|
|
16
|
+
Requires-Dist: json-repair>=0.59.10
|
|
17
|
+
Requires-Dist: openai>=2.15.0
|
|
18
|
+
Requires-Dist: pydantic>=2.0.0
|
|
19
|
+
Provides-Extra: mcp
|
|
20
|
+
Requires-Dist: fastmcp~=2.12.0; extra == "mcp"
|
|
21
|
+
Requires-Dist: pyjwt>=2.10.1; extra == "mcp"
|
|
22
|
+
Provides-Extra: skypilot
|
|
23
|
+
Requires-Dist: skypilot[aws,gcp]~=0.8.1; extra == "skypilot"
|
|
24
|
+
Requires-Dist: pip>=25.3; extra == "skypilot"
|
|
25
|
+
Requires-Dist: msrestazure>=0.6.4.post1; extra == "skypilot"
|
|
26
|
+
Provides-Extra: excel
|
|
27
|
+
Requires-Dist: openpyxl>=3.1.5; extra == "excel"
|
|
28
|
+
Provides-Extra: excel-mac-windows
|
|
29
|
+
Requires-Dist: openpyxl>=3.1.5; extra == "excel-mac-windows"
|
|
30
|
+
Requires-Dist: xlwings>=0.33.16; extra == "excel-mac-windows"
|
|
31
|
+
Provides-Extra: crm
|
|
32
|
+
Requires-Dist: python-dateutil>=2.9.0.post0; extra == "crm"
|
|
33
|
+
Provides-Extra: telestich
|
|
34
|
+
Requires-Dist: english_words; extra == "telestich"
|
|
35
|
+
Requires-Dist: pronouncing; extra == "telestich"
|
|
36
|
+
Requires-Dist: wordfreq; extra == "telestich"
|
|
37
|
+
Provides-Extra: rag
|
|
38
|
+
Requires-Dist: keybert>=0.8; extra == "rag"
|
|
39
|
+
Requires-Dist: langchain-text-splitters>=0.3.0; extra == "rag"
|
|
40
|
+
Requires-Dist: nest-asyncio>=1.5.0; extra == "rag"
|
|
41
|
+
Requires-Dist: ragas>=0.4.3; extra == "rag"
|
|
42
|
+
Requires-Dist: ruamel-yaml>=0.19.1; extra == "rag"
|
|
43
|
+
Requires-Dist: scikit-learn>=1.8.0; extra == "rag"
|
|
44
|
+
Requires-Dist: sentence-transformers>=5.2.3; extra == "rag"
|
|
45
|
+
Requires-Dist: tqdm>=4.66.0; extra == "rag"
|
|
46
|
+
Provides-Extra: traces
|
|
47
|
+
Provides-Extra: chroma
|
|
48
|
+
Requires-Dist: chromadb>=1.0.0; extra == "chroma"
|
|
49
|
+
Provides-Extra: pinecone
|
|
50
|
+
Requires-Dist: pinecone>=5.0.0; extra == "pinecone"
|
|
51
|
+
Provides-Extra: turbopuffer
|
|
52
|
+
Requires-Dist: turbopuffer>=1.16.2; extra == "turbopuffer"
|
|
53
|
+
Dynamic: license-file
|
|
54
|
+
|
|
55
|
+
<picture>
|
|
56
|
+
<img alt="Benchmax" src="./static/benchmax.png" width="full">
|
|
57
|
+
</picture>
|
|
58
|
+
|
|
59
|
+
## benchmax — companion sdk for the castform training platform
|
|
60
|
+
|
|
61
|
+
benchmax is the python sdk for running training jobs on castform. see the [online docs](https://castform.com/docs/) for how to start training runs. you can use our pre-built recipes use-cases like [training rag agents](https://castform.com/docs/rag/guide/) or [training on production traces](https://castform.com/docs/traces/overview/). or you can [roll your own too](https://castform.com/docs/environments/overview/).
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
uv pip install benchmax
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
python 3.12 required.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
apache 2.0 © 2026 cgft inc
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
<picture>
|
|
2
|
+
<img alt="Benchmax" src="./static/benchmax.png" width="full">
|
|
3
|
+
</picture>
|
|
4
|
+
|
|
5
|
+
## benchmax — companion sdk for the castform training platform
|
|
6
|
+
|
|
7
|
+
benchmax is the python sdk for running training jobs on castform. see the [online docs](https://castform.com/docs/) for how to start training runs. you can use our pre-built recipes use-cases like [training rag agents](https://castform.com/docs/rag/guide/) or [training on production traces](https://castform.com/docs/traces/overview/). or you can [roll your own too](https://castform.com/docs/environments/overview/).
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
uv pip install benchmax
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
python 3.12 required.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## License
|
|
20
|
+
|
|
21
|
+
apache 2.0 © 2026 cgft inc
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "benchmax"
|
|
3
|
-
version = "0.1.2.
|
|
3
|
+
version = "0.1.2.dev28"
|
|
4
4
|
description = "Framework-Agnostic RL Environments for LLM Fine-Tuning"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{ name = "castie@castform.com" }]
|
|
@@ -40,6 +40,7 @@ skypilot = [
|
|
|
40
40
|
excel = ["openpyxl>=3.1.5"]
|
|
41
41
|
excel-mac-windows = ["openpyxl>=3.1.5", "xlwings>=0.33.16"]
|
|
42
42
|
crm = ["python-dateutil>=2.9.0.post0"]
|
|
43
|
+
telestich = ["english_words", "pronouncing", "wordfreq"]
|
|
43
44
|
rag = [
|
|
44
45
|
"keybert>=0.8",
|
|
45
46
|
"langchain-text-splitters>=0.3.0",
|
|
@@ -1,24 +1,23 @@
|
|
|
1
1
|
"""Canonical example identity.
|
|
2
2
|
|
|
3
3
|
``canonical_example_id(prompt_messages, task)`` returns a SHA-256 hex digest
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
v:2 bump went together with the ``seed_messages`` → ``prompt_messages``
|
|
20
|
-
field rename in 2026-05; v:1 hashes are obsolete.
|
|
4
|
+
stable across processes. Identity is computed only here, in Python — both the
|
|
5
|
+
trainer and rollout-service hash via this module.
|
|
6
|
+
|
|
7
|
+
Normalization keeps the digest loader-independent:
|
|
8
|
+
- integer-valued floats → int, -0.0 → 0; NaN/Inf rejected.
|
|
9
|
+
- dict keys whose value is ``None`` are dropped, so a key absent in one loader
|
|
10
|
+
and present-but-null in another (Arrow schema-unification) hashes the same;
|
|
11
|
+
nulls *inside lists* are kept (length/order are identity).
|
|
12
|
+
- ambiguous values rejected: non-str dict keys, ints beyond
|
|
13
|
+
``Number.MAX_SAFE_INTEGER``, byte strings, lone surrogates, unknown types.
|
|
14
|
+
- canonical JSON: sorted keys, no whitespace, no ASCII escaping.
|
|
15
|
+
|
|
16
|
+
Payload tag ``v:3``. History: v:1→v:2 = the 2026-05 ``seed_messages`` →
|
|
17
|
+
``prompt_messages`` rename; v:2→v:3 = drop null-valued dict keys (loader skew).
|
|
18
|
+
Older hashes are obsolete.
|
|
21
19
|
"""
|
|
20
|
+
|
|
22
21
|
from __future__ import annotations
|
|
23
22
|
|
|
24
23
|
import hashlib
|
|
@@ -78,7 +77,10 @@ def _normalize(v: Any) -> Any:
|
|
|
78
77
|
raise ValueError(
|
|
79
78
|
f"dict keys must be str for canonical hashing; got {type(k).__name__}"
|
|
80
79
|
)
|
|
81
|
-
|
|
80
|
+
nx = _normalize(x)
|
|
81
|
+
if nx is None:
|
|
82
|
+
continue
|
|
83
|
+
out[k] = nx
|
|
82
84
|
return out
|
|
83
85
|
raise ValueError(
|
|
84
86
|
f"type {type(v).__name__} is not JSON-canonicalizable; "
|
|
@@ -90,7 +92,7 @@ def canonical_example_id(
|
|
|
90
92
|
prompt_messages: Messages,
|
|
91
93
|
task: dict[str, Any] | None,
|
|
92
94
|
) -> str:
|
|
93
|
-
payload = {"v":
|
|
95
|
+
payload = {"v": 3, "prompt_messages": prompt_messages, "task": task}
|
|
94
96
|
serialized = json.dumps(
|
|
95
97
|
_normalize(payload),
|
|
96
98
|
sort_keys=True,
|