eval-protocol 0.2.69.dev3__tar.gz → 0.2.70.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.69.dev3/eval_protocol.egg-info → eval_protocol-0.2.70.dev1}/PKG-INFO +1 -1
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/_version.py +3 -3
- eval_protocol-0.2.70.dev1/eval_protocol/exceptions.py +176 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/models.py +162 -112
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_single_turn_rollout_process.py +18 -2
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/exception_config.py +14 -4
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/remote_rollout_processor.py +7 -9
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/tracing_utils.py +2 -2
- eval_protocol-0.2.70.dev1/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +177 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/SOURCES.txt +2 -0
- eval_protocol-0.2.70.dev1/tests/test_exceptions.py +350 -0
- eval_protocol-0.2.69.dev3/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -162
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/LICENSE +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/README.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/fireworks_tracing.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/create_rft.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/fireworks_rft.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/pyproject.toml +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/setup.cfg +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/setup.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_config.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_format.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_length.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_math.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_models.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/versioneer.py +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.70.dev1
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-29T17:18:36-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "0ebd0177dafc55bfa302a49b2d674c0487516eff",
|
|
15
|
+
"version": "0.2.70-dev1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for Eval Protocol that map to gRPC Status codes.
|
|
3
|
+
|
|
4
|
+
These exceptions provide a clean way to handle errors and map them to appropriate
|
|
5
|
+
Status objects following the AIP-193 standard.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EvalProtocolError(Exception):
|
|
12
|
+
"""
|
|
13
|
+
Base exception for all Eval Protocol specific errors.
|
|
14
|
+
|
|
15
|
+
Maps to Status.Code and can be converted to Status objects for structured logging.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Standard gRPC status code exceptions
|
|
22
|
+
class CancelledError(EvalProtocolError):
|
|
23
|
+
"""Operation was cancelled (Status.Code.CANCELLED = 1)"""
|
|
24
|
+
|
|
25
|
+
status_code = 1
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class UnknownError(EvalProtocolError):
|
|
29
|
+
"""Unknown error occurred (Status.Code.UNKNOWN = 2)"""
|
|
30
|
+
|
|
31
|
+
status_code = 2
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class InvalidArgumentError(EvalProtocolError):
|
|
35
|
+
"""Invalid argument provided (Status.Code.INVALID_ARGUMENT = 3)"""
|
|
36
|
+
|
|
37
|
+
status_code = 3
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DeadlineExceededError(EvalProtocolError):
|
|
41
|
+
"""Deadline exceeded (Status.Code.DEADLINE_EXCEEDED = 4)"""
|
|
42
|
+
|
|
43
|
+
status_code = 4
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NotFoundError(EvalProtocolError):
|
|
47
|
+
"""Resource not found (Status.Code.NOT_FOUND = 5)"""
|
|
48
|
+
|
|
49
|
+
status_code = 5
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AlreadyExistsError(EvalProtocolError):
|
|
53
|
+
"""Resource already exists (Status.Code.ALREADY_EXISTS = 6)"""
|
|
54
|
+
|
|
55
|
+
status_code = 6
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class PermissionDeniedError(EvalProtocolError):
|
|
59
|
+
"""Permission denied (Status.Code.PERMISSION_DENIED = 7)"""
|
|
60
|
+
|
|
61
|
+
status_code = 7
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ResourceExhaustedError(EvalProtocolError):
|
|
65
|
+
"""Resource exhausted (Status.Code.RESOURCE_EXHAUSTED = 8)"""
|
|
66
|
+
|
|
67
|
+
status_code = 8
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class FailedPreconditionError(EvalProtocolError):
|
|
71
|
+
"""Failed precondition (Status.Code.FAILED_PRECONDITION = 9)"""
|
|
72
|
+
|
|
73
|
+
status_code = 9
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class AbortedError(EvalProtocolError):
|
|
77
|
+
"""Operation was aborted (Status.Code.ABORTED = 10)"""
|
|
78
|
+
|
|
79
|
+
status_code = 10
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class OutOfRangeError(EvalProtocolError):
|
|
83
|
+
"""Value out of range (Status.Code.OUT_OF_RANGE = 11)"""
|
|
84
|
+
|
|
85
|
+
status_code = 11
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class UnimplementedError(EvalProtocolError):
|
|
89
|
+
"""Operation is not implemented (Status.Code.UNIMPLEMENTED = 12)"""
|
|
90
|
+
|
|
91
|
+
status_code = 12
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class InternalError(EvalProtocolError):
|
|
95
|
+
"""Internal server error (Status.Code.INTERNAL = 13)"""
|
|
96
|
+
|
|
97
|
+
status_code = 13
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class UnavailableError(EvalProtocolError):
|
|
101
|
+
"""Service unavailable (Status.Code.UNAVAILABLE = 14)"""
|
|
102
|
+
|
|
103
|
+
status_code = 14
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DataLossError(EvalProtocolError):
|
|
107
|
+
"""Unrecoverable data loss (Status.Code.DATA_LOSS = 15)"""
|
|
108
|
+
|
|
109
|
+
status_code = 15
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class UnauthenticatedError(EvalProtocolError):
|
|
113
|
+
"""Request lacks valid authentication (Status.Code.UNAUTHENTICATED = 16)"""
|
|
114
|
+
|
|
115
|
+
status_code = 16
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# Custom EP exceptions
|
|
119
|
+
class RolloutFinishedError(EvalProtocolError):
|
|
120
|
+
"""Rollout completed successfully (Status.Code.FINISHED = 100)"""
|
|
121
|
+
|
|
122
|
+
status_code = 100
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class RolloutRunningError(EvalProtocolError):
|
|
126
|
+
"""Rollout is still running (Status.Code.RUNNING = 101)"""
|
|
127
|
+
|
|
128
|
+
status_code = 101
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class ScoreInvalidError(EvalProtocolError):
|
|
132
|
+
"""Score is invalid (Status.Code.SCORE_INVALID = 102)"""
|
|
133
|
+
|
|
134
|
+
status_code = 102
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# Convenience mapping from status codes to exception classes
|
|
138
|
+
# Only actual error conditions should raise exceptions
|
|
139
|
+
STATUS_CODE_TO_EXCEPTION = {
|
|
140
|
+
0: None, # OK - success, no exception
|
|
141
|
+
1: CancelledError,
|
|
142
|
+
2: UnknownError,
|
|
143
|
+
3: InvalidArgumentError,
|
|
144
|
+
4: DeadlineExceededError,
|
|
145
|
+
5: NotFoundError,
|
|
146
|
+
6: AlreadyExistsError,
|
|
147
|
+
7: PermissionDeniedError,
|
|
148
|
+
8: ResourceExhaustedError,
|
|
149
|
+
9: FailedPreconditionError,
|
|
150
|
+
10: AbortedError,
|
|
151
|
+
11: OutOfRangeError,
|
|
152
|
+
12: UnimplementedError,
|
|
153
|
+
13: InternalError,
|
|
154
|
+
14: UnavailableError,
|
|
155
|
+
15: DataLossError,
|
|
156
|
+
16: UnauthenticatedError,
|
|
157
|
+
100: None, # FINISHED - success, no exception
|
|
158
|
+
101: None, # RUNNING - in progress, no exception
|
|
159
|
+
102: None, # SCORE_INVALID - success, no exception
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def exception_for_status_code(code: int) -> Optional[EvalProtocolError]:
|
|
164
|
+
"""
|
|
165
|
+
Create an exception instance for a given status code.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
code: Status code from Status.Code enum
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Exception instance or None if code is OK (0)
|
|
172
|
+
"""
|
|
173
|
+
exception_class = STATUS_CODE_TO_EXCEPTION.get(code)
|
|
174
|
+
if exception_class is None:
|
|
175
|
+
return None
|
|
176
|
+
return exception_class()
|
|
@@ -136,6 +136,13 @@ class Status(BaseModel):
|
|
|
136
136
|
"""Create a status indicating the evaluation finished."""
|
|
137
137
|
return cls(code=cls.Code.FINISHED, message="Evaluation finished", details=[])
|
|
138
138
|
|
|
139
|
+
@staticmethod
|
|
140
|
+
def _build_details_with_extra_info(extra_info: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
141
|
+
"""Helper to build details list from extra_info."""
|
|
142
|
+
if extra_info:
|
|
143
|
+
return [ErrorInfo.extra_info(extra_info).to_aip193_format()]
|
|
144
|
+
return []
|
|
145
|
+
|
|
139
146
|
@classmethod
|
|
140
147
|
def aborted(cls, message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
141
148
|
"""Create a status indicating the evaluation was aborted."""
|
|
@@ -160,148 +167,191 @@ class Status(BaseModel):
|
|
|
160
167
|
"""Create a status indicating the rollout finished."""
|
|
161
168
|
return cls(code=cls.Code.FINISHED, message=message, details=details or [])
|
|
162
169
|
|
|
170
|
+
# Error methods organized by Status.Code enum values (1-16)
|
|
171
|
+
|
|
172
|
+
# CANCELLED = 1
|
|
163
173
|
@classmethod
|
|
164
|
-
def
|
|
165
|
-
"""Create a status indicating the rollout
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
174
|
+
def rollout_cancelled_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
175
|
+
"""Create a status indicating the rollout was cancelled."""
|
|
176
|
+
return cls.cancelled_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def cancelled_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
180
|
+
"""Create a status indicating the operation was cancelled."""
|
|
181
|
+
return cls(code=cls.Code.CANCELLED, message=error_message, details=details or [])
|
|
170
182
|
|
|
183
|
+
# UNKNOWN = 2
|
|
171
184
|
@classmethod
|
|
172
|
-
def
|
|
173
|
-
|
|
185
|
+
def rollout_unknown_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
186
|
+
"""Create a status indicating the rollout failed with an unknown error."""
|
|
187
|
+
return cls.unknown_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def unknown_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
191
|
+
"""Create a status indicating an unknown error occurred."""
|
|
192
|
+
return cls(code=cls.Code.UNKNOWN, message=error_message, details=details or [])
|
|
193
|
+
|
|
194
|
+
# INVALID_ARGUMENT = 3
|
|
195
|
+
@classmethod
|
|
196
|
+
def rollout_invalid_argument_error(
|
|
197
|
+
cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
|
|
174
198
|
) -> "Status":
|
|
175
|
-
"""
|
|
176
|
-
|
|
177
|
-
Simple approach that stores exception info directly in details.
|
|
178
|
-
"""
|
|
179
|
-
details = []
|
|
199
|
+
"""Create a status indicating the rollout failed with an invalid argument error."""
|
|
200
|
+
return cls.invalid_argument_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
180
201
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
}
|
|
186
|
-
)
|
|
202
|
+
@classmethod
|
|
203
|
+
def invalid_argument_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
204
|
+
"""Create a status indicating an invalid argument error occurred."""
|
|
205
|
+
return cls(code=cls.Code.INVALID_ARGUMENT, message=error_message, details=details or [])
|
|
187
206
|
|
|
188
|
-
|
|
189
|
-
|
|
207
|
+
# DEADLINE_EXCEEDED = 4
|
|
208
|
+
@classmethod
|
|
209
|
+
def rollout_deadline_exceeded_error(
|
|
210
|
+
cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
|
|
211
|
+
) -> "Status":
|
|
212
|
+
"""Create a status indicating the rollout failed with a deadline exceeded error."""
|
|
213
|
+
return cls.deadline_exceeded_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
190
214
|
|
|
191
|
-
|
|
215
|
+
@classmethod
|
|
216
|
+
def deadline_exceeded_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
217
|
+
"""Create a status indicating a deadline exceeded error occurred."""
|
|
218
|
+
return cls(code=cls.Code.DEADLINE_EXCEEDED, message=error_message, details=details or [])
|
|
192
219
|
|
|
220
|
+
# NOT_FOUND = 5
|
|
193
221
|
@classmethod
|
|
194
|
-
def
|
|
195
|
-
"""
|
|
196
|
-
|
|
197
|
-
"""
|
|
222
|
+
def rollout_not_found_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
223
|
+
"""Create a status indicating the rollout failed with a not found error."""
|
|
224
|
+
return cls.not_found_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
198
225
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
exception_message = detail["exception_message"]
|
|
204
|
-
|
|
205
|
-
logger.info(f"Found exception info: {exception_type}")
|
|
206
|
-
|
|
207
|
-
# Dynamically import and raise the exception
|
|
208
|
-
exception_class = cls._import_exception_class(exception_type)
|
|
209
|
-
if exception_class:
|
|
210
|
-
logger.info(f"Found exception class: {exception_class}")
|
|
211
|
-
# Try different constructor patterns
|
|
212
|
-
exception_to_raise = cls._create_exception_instance(exception_class, exception_message)
|
|
213
|
-
if exception_to_raise:
|
|
214
|
-
logger.info(f"Re-raising {exception_type} from status details")
|
|
215
|
-
raise exception_to_raise
|
|
216
|
-
else:
|
|
217
|
-
logger.debug(f"Could not create instance of {exception_type}")
|
|
218
|
-
continue
|
|
219
|
-
else:
|
|
220
|
-
logger.debug(f"Could not import exception type: {exception_type}")
|
|
221
|
-
continue
|
|
226
|
+
@classmethod
|
|
227
|
+
def not_found_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
228
|
+
"""Create a status indicating a not found error occurred."""
|
|
229
|
+
return cls(code=cls.Code.NOT_FOUND, message=error_message, details=details or [])
|
|
222
230
|
|
|
223
|
-
|
|
231
|
+
# ALREADY_EXISTS = 6
|
|
232
|
+
@classmethod
|
|
233
|
+
def rollout_already_exists_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
234
|
+
"""Create a status indicating the rollout failed with an already exists error."""
|
|
235
|
+
return cls.already_exists_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
224
236
|
|
|
225
237
|
@classmethod
|
|
226
|
-
def
|
|
227
|
-
"""
|
|
228
|
-
|
|
238
|
+
def already_exists_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
239
|
+
"""Create a status indicating an already exists error occurred."""
|
|
240
|
+
return cls(code=cls.Code.ALREADY_EXISTS, message=error_message, details=details or [])
|
|
229
241
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
242
|
+
# PERMISSION_DENIED = 7
|
|
243
|
+
@classmethod
|
|
244
|
+
def rollout_permission_denied_error(
|
|
245
|
+
cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
|
|
246
|
+
) -> "Status":
|
|
247
|
+
"""Create a status indicating the rollout failed with a permission denied error."""
|
|
248
|
+
return cls.permission_denied_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
233
249
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
"""
|
|
237
|
-
|
|
238
|
-
patterns = [
|
|
239
|
-
# Pattern 1: Just message
|
|
240
|
-
lambda: exception_class(message),
|
|
241
|
-
# Pattern 2: Message as named parameter
|
|
242
|
-
lambda: exception_class(message=message),
|
|
243
|
-
# Pattern 3: Message + common litellm parameters
|
|
244
|
-
# NOTE: we are losing some diagnostic information here by not passing the model and llm_provider. We could try to capture full exception state in rollout_error_from_exception.
|
|
245
|
-
lambda: exception_class(message, model="unknown", llm_provider="unknown"),
|
|
246
|
-
lambda: exception_class(message=message, model="unknown", llm_provider="unknown"),
|
|
247
|
-
# Pattern 4: No arguments (fallback)
|
|
248
|
-
lambda: exception_class(),
|
|
249
|
-
]
|
|
250
|
-
|
|
251
|
-
for i, pattern in enumerate(patterns):
|
|
252
|
-
try:
|
|
253
|
-
instance = pattern()
|
|
254
|
-
logger.debug(f"Successfully created {exception_class.__name__} using pattern {i + 1}")
|
|
255
|
-
return instance
|
|
256
|
-
except (TypeError, ValueError) as e:
|
|
257
|
-
logger.debug(f"Pattern {i + 1} failed for {exception_class.__name__}: {e}")
|
|
258
|
-
continue
|
|
250
|
+
@classmethod
|
|
251
|
+
def permission_denied_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
252
|
+
"""Create a status indicating a permission denied error occurred."""
|
|
253
|
+
return cls(code=cls.Code.PERMISSION_DENIED, message=error_message, details=details or [])
|
|
259
254
|
|
|
260
|
-
|
|
261
|
-
|
|
255
|
+
# RESOURCE_EXHAUSTED = 8
|
|
256
|
+
@classmethod
|
|
257
|
+
def rollout_resource_exhausted_error(
|
|
258
|
+
cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
|
|
259
|
+
) -> "Status":
|
|
260
|
+
"""Create a status indicating the rollout failed with a resource exhausted error."""
|
|
261
|
+
return cls.resource_exhausted_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
262
262
|
|
|
263
263
|
@classmethod
|
|
264
|
-
def
|
|
265
|
-
"""
|
|
266
|
-
|
|
264
|
+
def resource_exhausted_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
265
|
+
"""Create a status indicating a resource exhausted error occurred."""
|
|
266
|
+
return cls(code=cls.Code.RESOURCE_EXHAUSTED, message=error_message, details=details or [])
|
|
267
267
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
268
|
+
# FAILED_PRECONDITION = 9
|
|
269
|
+
@classmethod
|
|
270
|
+
def rollout_failed_precondition_error(
|
|
271
|
+
cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
|
|
272
|
+
) -> "Status":
|
|
273
|
+
"""Create a status indicating the rollout failed with a failed precondition error."""
|
|
274
|
+
return cls.failed_precondition_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
271
275
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
"""
|
|
275
|
-
|
|
276
|
-
# Require fully qualified names (no automatic prefixing)
|
|
277
|
-
if "." not in exception_type:
|
|
278
|
-
logging.getLogger(__name__).debug(f"Exception type must be fully qualified: {exception_type}")
|
|
279
|
-
return None
|
|
276
|
+
@classmethod
|
|
277
|
+
def failed_precondition_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
278
|
+
"""Create a status indicating a failed precondition error occurred."""
|
|
279
|
+
return cls(code=cls.Code.FAILED_PRECONDITION, message=error_message, details=details or [])
|
|
280
280
|
|
|
281
|
-
|
|
282
|
-
|
|
281
|
+
# ABORTED = 10
|
|
282
|
+
@classmethod
|
|
283
|
+
def rollout_aborted_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
284
|
+
"""Create a status indicating the rollout was aborted."""
|
|
285
|
+
return cls.aborted(error_message, cls._build_details_with_extra_info(extra_info))
|
|
283
286
|
|
|
284
|
-
|
|
285
|
-
|
|
287
|
+
# OUT_OF_RANGE = 11
|
|
288
|
+
@classmethod
|
|
289
|
+
def rollout_out_of_range_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
290
|
+
"""Create a status indicating the rollout failed with an out of range error."""
|
|
291
|
+
return cls.out_of_range_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
286
292
|
|
|
287
|
-
|
|
288
|
-
|
|
293
|
+
@classmethod
|
|
294
|
+
def out_of_range_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
295
|
+
"""Create a status indicating an out of range error occurred."""
|
|
296
|
+
return cls(code=cls.Code.OUT_OF_RANGE, message=error_message, details=details or [])
|
|
289
297
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
298
|
+
# UNIMPLEMENTED = 12
|
|
299
|
+
@classmethod
|
|
300
|
+
def rollout_unimplemented_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
301
|
+
"""Create a status indicating the rollout failed with an unimplemented error."""
|
|
302
|
+
return cls.unimplemented_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
293
303
|
|
|
294
|
-
|
|
304
|
+
@classmethod
|
|
305
|
+
def unimplemented_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
306
|
+
"""Create a status indicating an unimplemented error occurred."""
|
|
307
|
+
return cls(code=cls.Code.UNIMPLEMENTED, message=error_message, details=details or [])
|
|
295
308
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
309
|
+
# INTERNAL = 13
|
|
310
|
+
@classmethod
|
|
311
|
+
def rollout_internal_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
312
|
+
"""Create a status indicating the rollout failed with an internal error."""
|
|
313
|
+
return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
299
314
|
|
|
300
315
|
@classmethod
|
|
301
|
-
def
|
|
302
|
-
"""Create a status indicating
|
|
316
|
+
def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
317
|
+
"""Create a status indicating an internal error occurred."""
|
|
303
318
|
return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
|
|
304
319
|
|
|
320
|
+
# UNAVAILABLE = 14
|
|
321
|
+
@classmethod
|
|
322
|
+
def rollout_unavailable_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
323
|
+
"""Create a status indicating the rollout failed with an unavailable error."""
|
|
324
|
+
return cls.unavailable_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
325
|
+
|
|
326
|
+
@classmethod
|
|
327
|
+
def unavailable_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
328
|
+
"""Create a status indicating an unavailable error occurred."""
|
|
329
|
+
return cls(code=cls.Code.UNAVAILABLE, message=error_message, details=details or [])
|
|
330
|
+
|
|
331
|
+
# DATA_LOSS = 15
|
|
332
|
+
@classmethod
|
|
333
|
+
def rollout_data_loss_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
334
|
+
"""Create a status indicating the rollout failed with a data loss error."""
|
|
335
|
+
return cls.data_loss_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
336
|
+
|
|
337
|
+
@classmethod
|
|
338
|
+
def data_loss_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
339
|
+
"""Create a status indicating a data loss error occurred."""
|
|
340
|
+
return cls(code=cls.Code.DATA_LOSS, message=error_message, details=details or [])
|
|
341
|
+
|
|
342
|
+
# UNAUTHENTICATED = 16
|
|
343
|
+
@classmethod
|
|
344
|
+
def rollout_unauthenticated_error(
|
|
345
|
+
cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
|
|
346
|
+
) -> "Status":
|
|
347
|
+
"""Create a status indicating the rollout failed with an unauthenticated error."""
|
|
348
|
+
return cls.unauthenticated_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
349
|
+
|
|
350
|
+
@classmethod
|
|
351
|
+
def unauthenticated_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
352
|
+
"""Create a status indicating an unauthenticated error occurred."""
|
|
353
|
+
return cls(code=cls.Code.UNAUTHENTICATED, message=error_message, details=details or [])
|
|
354
|
+
|
|
305
355
|
@classmethod
|
|
306
356
|
def score_invalid(
|
|
307
357
|
cls, message: str = "Score is invalid", details: Optional[List[Dict[str, Any]]] = None
|
|
@@ -21,6 +21,16 @@ logger = logging.getLogger(__name__)
|
|
|
21
21
|
class SingleTurnRolloutProcessor(RolloutProcessor):
|
|
22
22
|
"""Single turn rollout processor for direct LLM calls."""
|
|
23
23
|
|
|
24
|
+
def __init__(self, *, drop_trailing_assistant_messages: bool = True) -> None:
|
|
25
|
+
"""
|
|
26
|
+
Args:
|
|
27
|
+
drop_trailing_assistant_messages: When True (default), strip any trailing
|
|
28
|
+
assistant messages from the input conversation before calling the model.
|
|
29
|
+
This helps when datasets include previous assistant turns and you want
|
|
30
|
+
the model to answer the latest user query.
|
|
31
|
+
"""
|
|
32
|
+
self.drop_trailing_assistant_messages = drop_trailing_assistant_messages
|
|
33
|
+
|
|
24
34
|
def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]:
|
|
25
35
|
"""Generate single turn rollout tasks and return them for external handling."""
|
|
26
36
|
# Do not modify global LiteLLM cache. Disable caching per-request instead.
|
|
@@ -32,7 +42,13 @@ class SingleTurnRolloutProcessor(RolloutProcessor):
|
|
|
32
42
|
if len(row.messages) == 0:
|
|
33
43
|
raise ValueError("Messages is empty. Please provide a non-empty dataset")
|
|
34
44
|
|
|
35
|
-
|
|
45
|
+
# Optionally drop trailing assistant messages for single-turn prompts
|
|
46
|
+
messages_for_request: List[Message] = list(row.messages)
|
|
47
|
+
if self.drop_trailing_assistant_messages:
|
|
48
|
+
while messages_for_request and messages_for_request[-1].role == "assistant":
|
|
49
|
+
messages_for_request.pop()
|
|
50
|
+
|
|
51
|
+
messages_payload = [message.model_dump() for message in messages_for_request]
|
|
36
52
|
|
|
37
53
|
request_params = {"messages": messages_payload, **config.completion_params}
|
|
38
54
|
# Ensure caching is disabled only for this request (review feedback)
|
|
@@ -114,7 +130,7 @@ class SingleTurnRolloutProcessor(RolloutProcessor):
|
|
|
114
130
|
except Exception:
|
|
115
131
|
pass
|
|
116
132
|
|
|
117
|
-
messages = list(
|
|
133
|
+
messages = list(messages_for_request) + [
|
|
118
134
|
Message(
|
|
119
135
|
role="assistant",
|
|
120
136
|
content=assistant_content,
|
{eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/exception_config.py
RENAMED
|
@@ -11,7 +11,9 @@ import backoff
|
|
|
11
11
|
import litellm
|
|
12
12
|
import requests
|
|
13
13
|
import httpx
|
|
14
|
-
|
|
14
|
+
|
|
15
|
+
import eval_protocol.exceptions
|
|
16
|
+
|
|
15
17
|
|
|
16
18
|
# Default exceptions that should be retried with backoff
|
|
17
19
|
DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = {
|
|
@@ -29,14 +31,22 @@ DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = {
|
|
|
29
31
|
httpx.TimeoutException,
|
|
30
32
|
httpx.NetworkError,
|
|
31
33
|
httpx.RemoteProtocolError,
|
|
34
|
+
# LiteLLM library exceptions
|
|
32
35
|
litellm.exceptions.RateLimitError,
|
|
33
36
|
litellm.exceptions.InternalServerError,
|
|
34
37
|
litellm.exceptions.Timeout,
|
|
35
38
|
litellm.exceptions.NotFoundError,
|
|
36
|
-
litellm.exceptions.BadRequestError, # remove this once we have a long term solution
|
|
39
|
+
# litellm.exceptions.BadRequestError, # remove this once we have a long term solution
|
|
37
40
|
litellm.exceptions.ServiceUnavailableError,
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
litellm.exceptions.APIError,
|
|
42
|
+
# Eval Protocol exceptions
|
|
43
|
+
eval_protocol.exceptions.UnknownError,
|
|
44
|
+
eval_protocol.exceptions.DeadlineExceededError,
|
|
45
|
+
eval_protocol.exceptions.NotFoundError,
|
|
46
|
+
eval_protocol.exceptions.PermissionDeniedError,
|
|
47
|
+
eval_protocol.exceptions.UnavailableError,
|
|
48
|
+
eval_protocol.exceptions.UnauthenticatedError,
|
|
49
|
+
eval_protocol.exceptions.ResourceExhaustedError,
|
|
40
50
|
}
|
|
41
51
|
|
|
42
52
|
|
|
@@ -10,6 +10,7 @@ from eval_protocol.types.remote_rollout_processor import (
|
|
|
10
10
|
DataLoaderConfig,
|
|
11
11
|
)
|
|
12
12
|
from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter
|
|
13
|
+
from eval_protocol.exceptions import exception_for_status_code
|
|
13
14
|
|
|
14
15
|
from .rollout_processor import RolloutProcessor
|
|
15
16
|
from .types import RolloutProcessorConfig
|
|
@@ -97,13 +98,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
97
98
|
r.raise_for_status()
|
|
98
99
|
except requests.exceptions.Timeout:
|
|
99
100
|
raise TimeoutError(
|
|
100
|
-
"The /init endpoint timed out after 30 seconds.
|
|
101
|
-
"CRITICAL: The /init endpoint must return immediately (within 30s) and NOT block on rollout execution. "
|
|
102
|
-
"Your remote server should:\n"
|
|
103
|
-
"1. Accept the /init request and return a 200 response immediately\n"
|
|
104
|
-
"2. Process the actual rollout asynchronously in the background\n"
|
|
105
|
-
"3. Use the /status endpoint to report progress\n"
|
|
106
|
-
"For Python/Node.js: Start a separate process per rollout to avoid blocking the /init response."
|
|
101
|
+
f"The /init endpoint tried {url} with {init_payload.model_dump()} but timed out after 30 seconds."
|
|
107
102
|
)
|
|
108
103
|
|
|
109
104
|
await asyncio.to_thread(_post_init)
|
|
@@ -166,7 +161,10 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
166
161
|
f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}"
|
|
167
162
|
)
|
|
168
163
|
|
|
169
|
-
|
|
164
|
+
# Create and raise exception if appropriate
|
|
165
|
+
exception = exception_for_status_code(status_code)
|
|
166
|
+
if exception is not None:
|
|
167
|
+
raise exception
|
|
170
168
|
|
|
171
169
|
row.rollout_status = Status(
|
|
172
170
|
code=Status.Code(status_code),
|
|
@@ -183,7 +181,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
183
181
|
f"Loop completed without breaking for {row.execution_metadata.rollout_id}, which means we timed out"
|
|
184
182
|
)
|
|
185
183
|
# Loop completed without breaking, which means we timed out
|
|
186
|
-
row.rollout_status = Status.
|
|
184
|
+
row.rollout_status = Status.rollout_deadline_exceeded_error(
|
|
187
185
|
f"Rollout {row.execution_metadata.rollout_id} timed out after {timeout_seconds} seconds"
|
|
188
186
|
)
|
|
189
187
|
|
{eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/tracing_utils.py
RENAMED
|
@@ -151,14 +151,14 @@ def update_row_with_remote_trace(
|
|
|
151
151
|
output_rows: List[EvaluationRow] = [r for result in results for r in result.rows]
|
|
152
152
|
|
|
153
153
|
if len(output_rows) == 0: # Fallback to original row if no remote data found
|
|
154
|
-
row.rollout_status = Status(
|
|
154
|
+
row.rollout_status = Status.rollout_not_found_error("No remote data found for rollout")
|
|
155
155
|
return None
|
|
156
156
|
elif len(output_rows) == 1: # Return the remote row
|
|
157
157
|
remote_row = output_rows[0]
|
|
158
158
|
|
|
159
159
|
# if the remote_row has the same number of messages as the original row, something went wrong
|
|
160
160
|
if len(remote_row.messages) == len(row.messages):
|
|
161
|
-
row.rollout_status = Status.
|
|
161
|
+
row.rollout_status = Status.rollout_internal_error(
|
|
162
162
|
"Rollout finished with the same number of messages as the original row"
|
|
163
163
|
)
|
|
164
164
|
return None
|