eval-protocol 0.2.28__tar.gz → 0.2.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.28/eval_protocol.egg-info → eval_protocol-0.2.30}/PKG-INFO +1 -1
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/evaluation_test.py +5 -1
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/handle_persist_flow.py +3 -1
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/remote_rollout_processor.py +31 -34
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/types/remote_rollout_processor.py +8 -1
- {eval_protocol-0.2.28 → eval_protocol-0.2.30/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/LICENSE +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/README.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/development/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/pyproject.toml +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/setup.cfg +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/setup.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_config.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_format.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_length.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_math.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_models.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/versioneer.py +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vite-app/dist/assets/index-C8woq7EO.js +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vite-app/dist/assets/index-C8woq7EO.js.map +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vite-app/dist/assets/index-CSKGq1w7.css +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.28 → eval_protocol-0.2.30}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.30
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-09-
|
|
11
|
+
"date": "2025-09-26T14:10:14-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "626a125899fb42ed135a1f223b3e827f37e44ae0",
|
|
15
|
+
"version": "0.2.30"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -189,6 +189,7 @@ def evaluation_test(
|
|
|
189
189
|
completion_params = parse_ep_completion_params(completion_params)
|
|
190
190
|
original_completion_params = completion_params
|
|
191
191
|
passed_threshold = parse_ep_passed_threshold(passed_threshold)
|
|
192
|
+
custom_invocation_id = os.environ.get("EP_INVOCATION_ID", None)
|
|
192
193
|
|
|
193
194
|
def decorator(
|
|
194
195
|
test_func: TestFunction,
|
|
@@ -228,7 +229,10 @@ def evaluation_test(
|
|
|
228
229
|
# Create wrapper function with exact signature that pytest expects
|
|
229
230
|
def create_wrapper_with_signature() -> Callable[[], None]:
|
|
230
231
|
# Create the function body that will be used
|
|
231
|
-
|
|
232
|
+
if custom_invocation_id:
|
|
233
|
+
invocation_id = custom_invocation_id
|
|
234
|
+
else:
|
|
235
|
+
invocation_id = generate_id()
|
|
232
236
|
|
|
233
237
|
async def wrapper_body(**kwargs: Unpack[ParameterizedTestKwargs]) -> None:
|
|
234
238
|
# Store URL for viewing results (after all postprocessing is complete)
|
|
@@ -7,6 +7,7 @@ import pathlib
|
|
|
7
7
|
import re
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
+
from eval_protocol.directory_utils import find_eval_protocol_dir
|
|
10
11
|
from eval_protocol.models import EvaluationRow
|
|
11
12
|
from eval_protocol.pytest.store_experiment_link import store_experiment_link
|
|
12
13
|
import requests
|
|
@@ -25,7 +26,8 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
|
|
|
25
26
|
if row.execution_metadata and row.execution_metadata.experiment_id:
|
|
26
27
|
experiments[row.execution_metadata.experiment_id].append(row)
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
eval_protocol_dir = find_eval_protocol_dir()
|
|
30
|
+
exp_dir = pathlib.Path(eval_protocol_dir) / "experiment_results"
|
|
29
31
|
exp_dir.mkdir(parents=True, exist_ok=True)
|
|
30
32
|
|
|
31
33
|
# Create one JSONL file per experiment_id
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
@@ -6,6 +6,7 @@ import requests
|
|
|
6
6
|
|
|
7
7
|
from eval_protocol.models import EvaluationRow, Status
|
|
8
8
|
from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader
|
|
9
|
+
from eval_protocol.types.remote_rollout_processor import InitRequest, RolloutMetadata
|
|
9
10
|
from .rollout_processor import RolloutProcessor
|
|
10
11
|
from .types import RolloutProcessorConfig
|
|
11
12
|
|
|
@@ -14,25 +15,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
14
15
|
"""
|
|
15
16
|
Rollout processor that triggers a remote HTTP server to perform the rollout.
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
- POST {remote_base_url}/init
|
|
19
|
-
Body: {
|
|
20
|
-
"rollout_id": str,
|
|
21
|
-
"model": str,
|
|
22
|
-
"messages": list[dict],
|
|
23
|
-
"tools": list[dict] | null,
|
|
24
|
-
"metadata": {
|
|
25
|
-
"invocation_id": str,
|
|
26
|
-
"experiment_id": str,
|
|
27
|
-
"rollout_id": str,
|
|
28
|
-
"run_id": str | null,
|
|
29
|
-
"row_id": str | null
|
|
30
|
-
},
|
|
31
|
-
}
|
|
32
|
-
Returns: {"ok": true}
|
|
33
|
-
|
|
34
|
-
- GET {remote_base_url}/status?rollout_id=...
|
|
35
|
-
Returns: {"terminated": bool, "info": {...}?}
|
|
18
|
+
See https://evalprotocol.io/tutorial/remote-rollout-processor for documentation.
|
|
36
19
|
"""
|
|
37
20
|
|
|
38
21
|
def __init__(
|
|
@@ -71,14 +54,25 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
71
54
|
async def _process_row(row: EvaluationRow) -> EvaluationRow:
|
|
72
55
|
start_time = time.perf_counter()
|
|
73
56
|
|
|
57
|
+
if row.execution_metadata.invocation_id is None:
|
|
58
|
+
raise ValueError("Invocation ID is required in RemoteRolloutProcessor")
|
|
59
|
+
if row.execution_metadata.experiment_id is None:
|
|
60
|
+
raise ValueError("Experiment ID is required in RemoteRolloutProcessor")
|
|
61
|
+
if row.execution_metadata.rollout_id is None:
|
|
62
|
+
raise ValueError("Rollout ID is required in RemoteRolloutProcessor")
|
|
63
|
+
if row.execution_metadata.run_id is None:
|
|
64
|
+
raise ValueError("Run ID is required in RemoteRolloutProcessor")
|
|
65
|
+
if row.input_metadata.row_id is None:
|
|
66
|
+
raise ValueError("Row ID is required in RemoteRolloutProcessor")
|
|
67
|
+
|
|
74
68
|
# Build request metadata and payload
|
|
75
|
-
meta:
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
69
|
+
meta: RolloutMetadata = RolloutMetadata(
|
|
70
|
+
invocation_id=row.execution_metadata.invocation_id,
|
|
71
|
+
experiment_id=row.execution_metadata.experiment_id,
|
|
72
|
+
rollout_id=row.execution_metadata.rollout_id,
|
|
73
|
+
run_id=row.execution_metadata.run_id,
|
|
74
|
+
row_id=row.input_metadata.row_id,
|
|
75
|
+
)
|
|
82
76
|
|
|
83
77
|
model: Optional[str] = None
|
|
84
78
|
if row.input_metadata and row.input_metadata.completion_params:
|
|
@@ -110,18 +104,21 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
110
104
|
}
|
|
111
105
|
clean_messages.append({k: v for k, v in md.items() if k in allowed_message_fields and v is not None})
|
|
112
106
|
|
|
113
|
-
|
|
114
|
-
"
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
107
|
+
if row.execution_metadata.rollout_id is None:
|
|
108
|
+
raise ValueError("Rollout ID is required in RemoteRolloutProcessor")
|
|
109
|
+
|
|
110
|
+
init_payload: InitRequest = InitRequest(
|
|
111
|
+
model=model,
|
|
112
|
+
messages=clean_messages,
|
|
113
|
+
tools=row.tools,
|
|
114
|
+
metadata=meta,
|
|
115
|
+
model_base_url=config.kwargs.get("model_base_url", None),
|
|
116
|
+
)
|
|
120
117
|
|
|
121
118
|
# Fire-and-poll
|
|
122
119
|
def _post_init() -> None:
|
|
123
120
|
url = f"{remote_base_url}/init"
|
|
124
|
-
r = requests.post(url, json=init_payload, timeout=30)
|
|
121
|
+
r = requests.post(url, json=init_payload.model_dump(), timeout=30)
|
|
125
122
|
r.raise_for_status()
|
|
126
123
|
|
|
127
124
|
await asyncio.to_thread(_post_init)
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/types/remote_rollout_processor.py
RENAMED
|
@@ -20,10 +20,17 @@ class RolloutMetadata(BaseModel):
|
|
|
20
20
|
class InitRequest(BaseModel):
|
|
21
21
|
"""Request model for POST /init endpoint."""
|
|
22
22
|
|
|
23
|
-
rollout_id: str
|
|
24
23
|
model: str
|
|
25
24
|
messages: List[Message] = Field(min_length=1)
|
|
26
25
|
tools: Optional[List[Dict[str, Any]]] = None
|
|
26
|
+
|
|
27
|
+
model_base_url: Optional[str] = None
|
|
28
|
+
"""
|
|
29
|
+
A Base URL that the remote server can use to make LLM calls. This is useful
|
|
30
|
+
to configure on the eval-protocol side for flexibility in
|
|
31
|
+
development/traning.
|
|
32
|
+
"""
|
|
33
|
+
|
|
27
34
|
metadata: RolloutMetadata
|
|
28
35
|
|
|
29
36
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.30
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/math_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_envs/posting_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/bfcl_sim_api_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/docker_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/filesystem_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/agent/resources/python_state_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/data/airline_dataset.jsonl
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/data/retail_dataset.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_tau_bench_airline.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/benchmarks/test_tau_bench_retail.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/dynamic_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/factory_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/data_loader/inline_data_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/dataset_logger/dataset_logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/event_bus/sqlite_event_bus_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/orchestration/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_agent/orchestration/base_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/default_dataset_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/pytest/evaluation_test_postprocess.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.28 → eval_protocol-0.2.30}/eval_protocol/quickstart/llm_judge_braintrust.py
RENAMED
|
File without changes
|