benchmax 0.1.2.dev26__tar.gz → 0.1.2.dev27__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/PKG-INFO +1 -1
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/pyproject.toml +1 -1
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/rubric.py +44 -2
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax.egg-info/PKG-INFO +1 -1
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/LICENSE +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/README.md +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/setup.cfg +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/bundle.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/config.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/base_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/crm/crm_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/crm/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/example_id.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/data_utils.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/excel_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/workdir/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/workdir/excel_utils.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/logging.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/math/math_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/math/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/example_workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/parallel_mcp_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/base_provisioner.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/local_provisioner.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/utils.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/proxy_server.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/server_pool.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/utils.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/postgres_search/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/postgres_search/linker_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/postgres_search/search_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/reward_helpers.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/types.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/wikipedia/utils.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/wikipedia/wiki_env.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/caller.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/clients.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/example_usage.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/inspector.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/models.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/multi_model/pricing.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/platform/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/platform/client.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/platform/credentials.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/platform/exceptions.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/platform/training_run.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/platform/validation.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/prompts/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/prompts/tools.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/chunkers/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/chunkers/email.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/chunkers/inspector.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/chunkers/markdown.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/chunkers/models.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/chunkers/storage.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/client.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/files.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/search.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/source.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/files.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/index_client.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/search.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/source.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/client.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/exceptions.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/models.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/search.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/source.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_client.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/builders.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/dsl_parser.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/search_exceptions.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/search_types.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/source.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/files.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/filter_mapper.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/namespace.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/search.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/source.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/clean_bodies.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/dedupe.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/filter_automated_email_qas.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/filter_automated_emails.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/mbox.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/schema.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/anchor_selector.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/auto_tune.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/batch_processor.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/checkpoint.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/corpus_capabilities.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/corpus_profile.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/deterministic_guards.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/env_rollout.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/grounding_llm.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/hop_count_validity.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/quality_gate.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/retrieval_llm.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/formatters/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/formatters/train_eval.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/generated_qa.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/generators/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/generators/direct_llm.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/helpers.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/metadata_linker.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/metrics.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/models.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/pipeline.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/pipeline_config.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/protocols.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/query_rewriter.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/response_parsers.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/retrieval_query.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/scoring.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/search_agent_linker.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/storage.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/style_controls.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/transformers/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/transformers/base.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/transformers/dedup.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/wiki_builder.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/wiki_chunk_linker.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/_utils.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/adaptive.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/cache.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/prompts.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rubrics/reward_fns.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/adapter.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/braintrust/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/braintrust/adapter.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/braintrust/message_extraction.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/http.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/pipeline.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/pivot.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/processing.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/registry.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/utils/__init__.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/utils/checkpoint.py +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax.egg-info/SOURCES.txt +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax.egg-info/dependency_links.txt +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax.egg-info/requires.txt +0 -0
- {benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax.egg-info/top_level.txt +0 -0
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from typing import Any, Dict, List, Literal, Optional
|
|
4
5
|
|
|
5
6
|
from openai import AsyncOpenAI
|
|
6
7
|
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
7
10
|
from benchmax.platform.credentials import platform_bearer
|
|
8
11
|
|
|
9
12
|
from ._utils import _extract_json
|
|
@@ -76,6 +79,7 @@ async def evaluate_single_rubric(
|
|
|
76
79
|
ground_truth: Optional[str] = None,
|
|
77
80
|
api_key: str = "",
|
|
78
81
|
timeout: Optional[float] = None,
|
|
82
|
+
enable_logging: bool = True,
|
|
79
83
|
) -> Dict[str, Any]:
|
|
80
84
|
"""
|
|
81
85
|
Evaluate a single response against a single rubric.
|
|
@@ -146,11 +150,26 @@ async def evaluate_single_rubric(
|
|
|
146
150
|
return {"score": 0, "reasoning": "Empty response", "llm_output": ""}
|
|
147
151
|
|
|
148
152
|
result = _extract_json(content)
|
|
149
|
-
|
|
153
|
+
out = {
|
|
150
154
|
"score": result.get("score", 0),
|
|
151
155
|
"reasoning": result.get("reasoning", ""),
|
|
152
156
|
"llm_output": content,
|
|
153
157
|
}
|
|
158
|
+
if enable_logging:
|
|
159
|
+
logger.info(
|
|
160
|
+
"\n┌─ rubric: %s ─────────────────────\n"
|
|
161
|
+
"│ ground_truth : %s\n"
|
|
162
|
+
"│ score : %s\n"
|
|
163
|
+
"│ reasoning : %s\n"
|
|
164
|
+
"│ llm_output :\n%s\n"
|
|
165
|
+
"└──────────────────────────────────────────────────",
|
|
166
|
+
rubric.title,
|
|
167
|
+
(ground_truth or "").strip() or "(none)",
|
|
168
|
+
out["score"],
|
|
169
|
+
out["reasoning"],
|
|
170
|
+
content,
|
|
171
|
+
)
|
|
172
|
+
return out
|
|
154
173
|
|
|
155
174
|
except Exception as e:
|
|
156
175
|
print(f"Error evaluating rubric '{rubric.title}': {e}\njudge output:\n{content}")
|
|
@@ -166,6 +185,7 @@ async def evaluate_rubric_ranking(
|
|
|
166
185
|
api_key: str = "",
|
|
167
186
|
timeout: Optional[float] = None,
|
|
168
187
|
ground_truth: Optional[str] = None,
|
|
188
|
+
enable_logging: bool = True,
|
|
169
189
|
) -> Dict[str, Any]:
|
|
170
190
|
"""
|
|
171
191
|
Rank N responses against a single rubric in one judge call and convert the
|
|
@@ -276,12 +296,34 @@ async def evaluate_rubric_ranking(
|
|
|
276
296
|
for j, p in pos_of.items():
|
|
277
297
|
scores[nonempty[j][0]] = 1.0 - p / max_pos if max_pos > 0 else 1.0
|
|
278
298
|
|
|
279
|
-
|
|
299
|
+
out = {
|
|
280
300
|
"scores": scores,
|
|
281
301
|
"ranking": ranking,
|
|
282
302
|
"reasoning": result.get("reasoning", ""),
|
|
283
303
|
"llm_output": content,
|
|
284
304
|
}
|
|
305
|
+
if enable_logging:
|
|
306
|
+
scores_fmt = " ".join(f"[{i}]={s:.3f}" for i, s in enumerate(scores))
|
|
307
|
+
ranking_fmt = " > ".join(
|
|
308
|
+
f"[{', '.join(str(j) for j in tier)}]" if isinstance(tier, list) else str(tier)
|
|
309
|
+
for tier in ranking
|
|
310
|
+
)
|
|
311
|
+
logger.info(
|
|
312
|
+
"\n┌─ ranked rubric: %s ────────────────────\n"
|
|
313
|
+
"│ ground_truth : %s\n"
|
|
314
|
+
"│ ranking : %s\n"
|
|
315
|
+
"│ scores : %s\n"
|
|
316
|
+
"│ reasoning : %s\n"
|
|
317
|
+
"│ llm_output :\n%s\n"
|
|
318
|
+
"└──────────────────────────────────────────────────",
|
|
319
|
+
rubric.title,
|
|
320
|
+
(ground_truth or "").strip() or "(none)",
|
|
321
|
+
ranking_fmt or "(empty)",
|
|
322
|
+
scores_fmt,
|
|
323
|
+
out["reasoning"],
|
|
324
|
+
content,
|
|
325
|
+
)
|
|
326
|
+
return out
|
|
285
327
|
except Exception as e:
|
|
286
328
|
print(f"Error ranking rubric '{rubric.title}': {e}\njudge output:\n{content}")
|
|
287
329
|
return {"scores": scores, "ranking": [], "reasoning": f"Error: {e}", "llm_output": content}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/excel/workdir/excel_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/example_workdir/reward_fn.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/__init__.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/mcp/provisioners/base_provisioner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/postgres_search/linker_env.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/envs/postgres_search/search_env.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/chroma/filter_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/filter_mapper.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/pinecone/index_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/exceptions.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/postgres/filter_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/__init__.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/builders.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/dsl_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/search_schema/search_types.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/filter_mapper.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/corpus/turbopuffer/namespace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/preprocess/email/clean_bodies.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/anchor_selector.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/batch_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/corpus_capabilities.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/corpus_profile.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/env_rollout.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/filters/quality_gate.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/formatters/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/generated_qa.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/generators/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/metadata_linker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/pipeline_config.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/query_rewriter.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/response_parsers.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/retrieval_query.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/search_agent_linker.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/style_controls.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/transformers/base.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/transformers/dedup.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/wiki_builder.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/rag/qa_generation/wiki_chunk_linker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev26 → benchmax-0.1.2.dev27}/src/benchmax/traces/braintrust/message_extraction.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|