eval-protocol 0.2.71__tar.gz → 0.2.72__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.71/eval_protocol.egg-info → eval_protocol-0.2.72}/PKG-INFO +1 -1
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/exceptions.py +3 -2
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/models.py +7 -5
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/evaluation_test_utils.py +1 -1
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/exception_config.py +0 -1
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/remote_rollout_processor.py +6 -6
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +64 -39
- {eval_protocol-0.2.71 → eval_protocol-0.2.72/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_exceptions.py +21 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/LICENSE +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/README.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/fireworks_tracing.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/create_rft.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/fireworks_rft.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/tracing_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/pyproject.toml +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/setup.cfg +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/setup.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_config.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_format.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_length.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_math.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_models.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/versioneer.py +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.72
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-30T03:43:20-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "a71074ec111c9321e5cb2e8366dbb56504f2fc3a",
|
|
15
|
+
"version": "0.2.72"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -160,12 +160,13 @@ STATUS_CODE_TO_EXCEPTION = {
|
|
|
160
160
|
}
|
|
161
161
|
|
|
162
162
|
|
|
163
|
-
def exception_for_status_code(code: int) -> Optional[EvalProtocolError]:
|
|
163
|
+
def exception_for_status_code(code: int, message: str = "") -> Optional[EvalProtocolError]:
|
|
164
164
|
"""
|
|
165
165
|
Create an exception instance for a given status code.
|
|
166
166
|
|
|
167
167
|
Args:
|
|
168
168
|
code: Status code from Status.Code enum
|
|
169
|
+
message: Optional error message to include in the exception
|
|
169
170
|
|
|
170
171
|
Returns:
|
|
171
172
|
Exception instance or None if code is OK (0)
|
|
@@ -173,4 +174,4 @@ def exception_for_status_code(code: int) -> Optional[EvalProtocolError]:
|
|
|
173
174
|
exception_class = STATUS_CODE_TO_EXCEPTION.get(code)
|
|
174
175
|
if exception_class is None:
|
|
175
176
|
return None
|
|
176
|
-
return exception_class()
|
|
177
|
+
return exception_class(message) if message else exception_class()
|
|
@@ -21,9 +21,6 @@ from eval_protocol.human_id import generate_id
|
|
|
21
21
|
from eval_protocol.types import TerminationReason
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
|
-
|
|
26
|
-
|
|
27
24
|
class ErrorInfo(BaseModel):
|
|
28
25
|
"""
|
|
29
26
|
AIP-193 ErrorInfo model for structured error details.
|
|
@@ -312,6 +309,11 @@ class Status(BaseModel):
|
|
|
312
309
|
"""Create a status indicating the rollout failed with an internal error."""
|
|
313
310
|
return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
314
311
|
|
|
312
|
+
@classmethod
|
|
313
|
+
def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
314
|
+
"""Create a status indicating an internal error occurred."""
|
|
315
|
+
return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
|
|
316
|
+
|
|
315
317
|
# For backwards compatibility
|
|
316
318
|
@classmethod
|
|
317
319
|
def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
|
|
@@ -319,8 +321,8 @@ class Status(BaseModel):
|
|
|
319
321
|
return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info))
|
|
320
322
|
|
|
321
323
|
@classmethod
|
|
322
|
-
def
|
|
323
|
-
"""Create a status indicating an
|
|
324
|
+
def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
|
|
325
|
+
"""Create a status indicating an error occurred."""
|
|
324
326
|
return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
|
|
325
327
|
|
|
326
328
|
# UNAVAILABLE = 14
|
|
@@ -398,7 +398,7 @@ async def rollout_processor_with_retry(
|
|
|
398
398
|
else:
|
|
399
399
|
# Non-retryable exception - fail immediately
|
|
400
400
|
logging.error(f"❌ Rollout failed (non-retryable error encountered): {repr(e)}")
|
|
401
|
-
row.rollout_status = Status.rollout_error(
|
|
401
|
+
row.rollout_status = Status.rollout_error(str(e))
|
|
402
402
|
return row
|
|
403
403
|
|
|
404
404
|
async def execute_row_with_backoff_and_log(
|
|
@@ -36,7 +36,6 @@ DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = {
|
|
|
36
36
|
litellm.exceptions.InternalServerError,
|
|
37
37
|
litellm.exceptions.Timeout,
|
|
38
38
|
litellm.exceptions.NotFoundError,
|
|
39
|
-
# litellm.exceptions.BadRequestError, # remove this once we have a long term solution
|
|
40
39
|
litellm.exceptions.ServiceUnavailableError,
|
|
41
40
|
litellm.exceptions.APIError,
|
|
42
41
|
# Eval Protocol exceptions
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
@@ -94,7 +94,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
94
94
|
def _post_init() -> None:
|
|
95
95
|
url = f"{remote_base_url}/init"
|
|
96
96
|
try:
|
|
97
|
-
r = requests.post(url, json=init_payload.model_dump(), timeout=
|
|
97
|
+
r = requests.post(url, json=init_payload.model_dump(), timeout=300)
|
|
98
98
|
r.raise_for_status()
|
|
99
99
|
except requests.exceptions.Timeout:
|
|
100
100
|
raise TimeoutError(
|
|
@@ -133,9 +133,9 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
133
133
|
# For all other exceptions, raise them
|
|
134
134
|
raise
|
|
135
135
|
|
|
136
|
-
# Search Fireworks tracing logs for completion
|
|
137
|
-
completed_logs =
|
|
138
|
-
tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
|
|
136
|
+
# Search Fireworks tracing logs for completion (run in thread to avoid blocking event loop)
|
|
137
|
+
completed_logs = await asyncio.to_thread(
|
|
138
|
+
self._tracing_adapter.search_logs, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
|
|
139
139
|
)
|
|
140
140
|
# Filter for logs that actually have status information
|
|
141
141
|
status_logs = []
|
|
@@ -161,8 +161,8 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
161
161
|
f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}"
|
|
162
162
|
)
|
|
163
163
|
|
|
164
|
-
# Create and raise exception if appropriate
|
|
165
|
-
exception = exception_for_status_code(status_code)
|
|
164
|
+
# Create and raise exception if appropriate, preserving original message
|
|
165
|
+
exception = exception_for_status_code(status_code, status_message)
|
|
166
166
|
if exception is not None:
|
|
167
167
|
raise exception
|
|
168
168
|
|
|
@@ -12,6 +12,7 @@ import sys
|
|
|
12
12
|
import asyncio
|
|
13
13
|
from flask import Flask, request, jsonify
|
|
14
14
|
from openai import OpenAI
|
|
15
|
+
import openai
|
|
15
16
|
from dotenv import load_dotenv
|
|
16
17
|
|
|
17
18
|
from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter
|
|
@@ -49,56 +50,80 @@ root_logger.addHandler(FireworksTracingHttpHandler())
|
|
|
49
50
|
app = Flask(__name__)
|
|
50
51
|
|
|
51
52
|
|
|
52
|
-
async def execute_rollout_background(req, api_key):
|
|
53
|
+
async def execute_rollout_background(req: InitRequest, api_key: str):
|
|
53
54
|
"""Execute the OpenAI completion in background and log results"""
|
|
54
55
|
# Attach rollout_id filter to logger
|
|
55
56
|
logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
|
|
56
57
|
logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
59
|
+
model = req.completion_params.get("model")
|
|
60
|
+
# Uncomment if you need to strip fireworks_ai/ prefix
|
|
61
|
+
# if model and isinstance(model, str) and model.startswith("fireworks_ai/"):
|
|
62
|
+
# model = model[len("fireworks_ai/"):]
|
|
63
|
+
|
|
64
|
+
# Prepare completion arguments
|
|
65
|
+
completion_kwargs = {
|
|
66
|
+
"messages": req.messages,
|
|
67
|
+
# "messages": [{"role": "user", "content": "Hello, how are you?"}],
|
|
68
|
+
"model": model,
|
|
69
|
+
"temperature": req.completion_params.get("temperature"),
|
|
70
|
+
"max_tokens": req.completion_params.get("max_tokens"),
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# Add tools if present
|
|
74
|
+
if req.tools:
|
|
75
|
+
completion_kwargs["tools"] = req.tools
|
|
76
|
+
|
|
77
|
+
logger.info(
|
|
78
|
+
f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}"
|
|
79
|
+
)
|
|
72
80
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
81
|
+
# Create AsyncOpenAI client
|
|
82
|
+
# client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key)
|
|
83
|
+
client = OpenAI(base_url=req.model_base_url, api_key=api_key)
|
|
76
84
|
|
|
77
|
-
|
|
78
|
-
f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}"
|
|
79
|
-
)
|
|
85
|
+
logger.info(f"Sending completion request to model {model}")
|
|
80
86
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
client = OpenAI(base_url=req.model_base_url, api_key=api_key)
|
|
87
|
+
# Make the async model call with timeout
|
|
88
|
+
import time
|
|
84
89
|
|
|
85
|
-
|
|
90
|
+
logger.info(f"timing start: {time.time()}")
|
|
86
91
|
|
|
87
|
-
|
|
88
|
-
import time
|
|
89
|
-
|
|
90
|
-
logger.info(f"timing start: {time.time()}")
|
|
92
|
+
try:
|
|
91
93
|
completion = client.chat.completions.create(**completion_kwargs)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
94
|
+
except (
|
|
95
|
+
openai.AuthenticationError,
|
|
96
|
+
openai.PermissionDeniedError,
|
|
97
|
+
) as e:
|
|
98
|
+
# These errors should be logged and will be retried by RemoteRolloutProcessor
|
|
99
|
+
logger.error(
|
|
100
|
+
f"Rollout {req.metadata.rollout_id} failed: {e}",
|
|
101
|
+
extra={"status": Status.rollout_permission_denied_error(str(e))},
|
|
102
|
+
)
|
|
103
|
+
return
|
|
104
|
+
except openai.NotFoundError as e:
|
|
105
|
+
logger.error(
|
|
106
|
+
f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_not_found_error(str(e))}
|
|
107
|
+
)
|
|
108
|
+
return
|
|
109
|
+
except openai.RateLimitError as e:
|
|
110
|
+
logger.error(
|
|
111
|
+
f"Rollout {req.metadata.rollout_id} failed: {e}",
|
|
112
|
+
extra={"status": Status.rollout_resource_exhausted_error(str(e))},
|
|
113
|
+
)
|
|
114
|
+
return
|
|
97
115
|
except Exception as e:
|
|
98
|
-
#
|
|
116
|
+
# Non-OpenAI errors (shouldn't normally happen but catch anyway)
|
|
99
117
|
logger.error(
|
|
100
|
-
f"Rollout {req.metadata.rollout_id} failed: {e}",
|
|
118
|
+
f"Rollout {req.metadata.rollout_id} failed with unexpected error: {e}",
|
|
119
|
+
extra={"status": Status.rollout_internal_error(str(e))},
|
|
101
120
|
)
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
logger.info(f"Completed response: {completion}")
|
|
124
|
+
logger.info(f"timing end: {time.time()}")
|
|
125
|
+
# Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR
|
|
126
|
+
logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
|
|
102
127
|
|
|
103
128
|
|
|
104
129
|
@app.route("/init", methods=["POST"])
|
|
@@ -114,7 +139,7 @@ async def init():
|
|
|
114
139
|
# Validate required fields
|
|
115
140
|
if not req.messages:
|
|
116
141
|
error_msg = "messages is required"
|
|
117
|
-
logger.error(error_msg, extra={"status": Status.
|
|
142
|
+
logger.error(error_msg, extra={"status": Status.rollout_internal_error(error_msg)})
|
|
118
143
|
return jsonify({"error": error_msg}), 400
|
|
119
144
|
|
|
120
145
|
# Get API key (prefer request api_key, fallback to environment)
|
|
@@ -126,7 +151,7 @@ async def init():
|
|
|
126
151
|
api_key = os.environ.get("FIREWORKS_API_KEY")
|
|
127
152
|
else:
|
|
128
153
|
error_msg = "API key not provided in request or environment variable"
|
|
129
|
-
logger.error(error_msg, extra={"status": Status.
|
|
154
|
+
logger.error(error_msg, extra={"status": Status.rollout_internal_error(error_msg)})
|
|
130
155
|
return jsonify({"error": error_msg}), 401
|
|
131
156
|
|
|
132
157
|
# 🔥 FIRE: Return immediately with acceptance (within 30s requirement)
|
|
@@ -137,7 +162,7 @@ async def init():
|
|
|
137
162
|
}
|
|
138
163
|
|
|
139
164
|
# Fire and forget: Execute rollout asynchronously
|
|
140
|
-
asyncio.create_task(execute_rollout_background(req, api_key))
|
|
165
|
+
asyncio.create_task(execute_rollout_background(req, api_key or ""))
|
|
141
166
|
|
|
142
167
|
return jsonify(response_data), 200
|
|
143
168
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.72
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -348,3 +348,24 @@ def test_integration_with_retry_logic():
|
|
|
348
348
|
assert exception_class in DEFAULT_RETRYABLE_EXCEPTIONS, (
|
|
349
349
|
f"{exception_class.__name__} should be in DEFAULT_RETRYABLE_EXCEPTIONS for retry support"
|
|
350
350
|
)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def test_exception_message_preservation():
|
|
354
|
+
"""Test that error messages are properly preserved in exceptions."""
|
|
355
|
+
test_cases = [
|
|
356
|
+
(13, "test error", InternalError),
|
|
357
|
+
(5, "Model xyz not found", NotFoundError),
|
|
358
|
+
(7, "Invalid API key", PermissionDeniedError),
|
|
359
|
+
]
|
|
360
|
+
|
|
361
|
+
for status_code, message, expected_exception_class in test_cases:
|
|
362
|
+
# Test with message
|
|
363
|
+
exception = exception_for_status_code(status_code, message)
|
|
364
|
+
assert exception is not None
|
|
365
|
+
assert isinstance(exception, expected_exception_class)
|
|
366
|
+
assert str(exception) == message, f"Exception should preserve message '{message}'"
|
|
367
|
+
|
|
368
|
+
# Test without message (should still work)
|
|
369
|
+
exception_no_msg = exception_for_status_code(status_code)
|
|
370
|
+
assert exception_no_msg is not None
|
|
371
|
+
assert isinstance(exception_no_msg, expected_exception_class)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/math_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/posting_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_sim_api_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/docker_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/filesystem_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/python_state_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/data/airline_dataset.jsonl
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/data/retail_dataset.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_tau_bench_airline.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_tau_bench_retail.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/dynamic_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/factory_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/inline_data_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/dataset_logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/sqlite_event_bus_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_index_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|