eval-protocol 0.2.25__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.25/eval_protocol.egg-info → eval_protocol-0.2.26}/PKG-INFO +1 -1
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/__init__.py +4 -1
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/openai_responses.py +7 -9
- {eval_protocol-0.2.25 → eval_protocol-0.2.26/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/LICENSE +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/README.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/development/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/pyproject.toml +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/setup.cfg +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/setup.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_config.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_format.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_length.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_math.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_models.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/versioneer.py +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vite-app/dist/assets/index-C8woq7EO.js +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vite-app/dist/assets/index-C8woq7EO.js.map +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vite-app/dist/assets/index-CSKGq1w7.css +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.25 → eval_protocol-0.2.26}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.26
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -41,7 +41,10 @@ from .quickstart import aha_judge, multi_turn_assistant_to_ground_truth, assista
|
|
|
41
41
|
from .pytest import evaluation_test, SingleTurnRolloutProcessor
|
|
42
42
|
from .pytest.parameterize import DefaultParameterIdGenerator
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
try:
|
|
45
|
+
from .adapters import OpenAIResponsesAdapter
|
|
46
|
+
except ImportError:
|
|
47
|
+
OpenAIResponsesAdapter = None
|
|
45
48
|
|
|
46
49
|
try:
|
|
47
50
|
from .adapters import LangfuseAdapter, create_langfuse_adapter
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-09-
|
|
11
|
+
"date": "2025-09-23T16:59:02-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "bcc2d7c22085ccddf46d952e3481012250245d90",
|
|
15
|
+
"version": "0.2.26"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -10,12 +10,12 @@ from typing import List
|
|
|
10
10
|
from typing_extensions import Any
|
|
11
11
|
|
|
12
12
|
from openai.pagination import SyncCursorPage
|
|
13
|
-
from openai.types.chat.
|
|
13
|
+
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
14
14
|
from openai.types.chat.chat_completion_message import FunctionCall
|
|
15
15
|
from openai.types.responses import Response
|
|
16
16
|
from openai.types.responses.response_item import ResponseItem
|
|
17
|
-
from openai.types.chat.
|
|
18
|
-
|
|
17
|
+
from openai.types.chat.chat_completion_message_tool_call import (
|
|
18
|
+
ChatCompletionMessageToolCall,
|
|
19
19
|
Function,
|
|
20
20
|
)
|
|
21
21
|
from openai.types.responses.tool import Tool
|
|
@@ -114,11 +114,9 @@ class OpenAIResponsesAdapter(BaseAdapter):
|
|
|
114
114
|
),
|
|
115
115
|
)
|
|
116
116
|
|
|
117
|
-
def _responses_tools_to_chat_completion_tools(
|
|
118
|
-
self, tools: List[Tool]
|
|
119
|
-
) -> Sequence[ChatCompletionFunctionToolParam]:
|
|
117
|
+
def _responses_tools_to_chat_completion_tools(self, tools: List[Tool]) -> Sequence[ChatCompletionToolParam]:
|
|
120
118
|
"""Convert OpenAI Responses API tools to chat completion message function tool calls."""
|
|
121
|
-
chat_completion_tools: List[
|
|
119
|
+
chat_completion_tools: List[ChatCompletionToolParam] = []
|
|
122
120
|
for tool in tools:
|
|
123
121
|
if tool.type == "function":
|
|
124
122
|
chat_completion_tools.append(
|
|
@@ -146,7 +144,7 @@ class OpenAIResponsesAdapter(BaseAdapter):
|
|
|
146
144
|
be added before the assistant message with tool calls.
|
|
147
145
|
"""
|
|
148
146
|
messages: list[Message] = []
|
|
149
|
-
current_tool_calls: list[
|
|
147
|
+
current_tool_calls: list[ChatCompletionMessageToolCall] = []
|
|
150
148
|
tool_call_outputs: list[Message] = []
|
|
151
149
|
|
|
152
150
|
for item in input_items:
|
|
@@ -173,7 +171,7 @@ class OpenAIResponsesAdapter(BaseAdapter):
|
|
|
173
171
|
# Collect tool call outputs to add before assistant message
|
|
174
172
|
tool_call_outputs.append(Message(role="tool", content=item.output, tool_call_id=item.call_id))
|
|
175
173
|
elif item.type == "function_call":
|
|
176
|
-
tool_call =
|
|
174
|
+
tool_call = ChatCompletionMessageToolCall(
|
|
177
175
|
id=item.call_id, type="function", function=Function(name=item.name, arguments=item.arguments)
|
|
178
176
|
)
|
|
179
177
|
current_tool_calls.append(tool_call)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.26
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/math_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_envs/posting_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/bfcl_sim_api_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/docker_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/filesystem_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/agent/resources/python_state_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/data/airline_dataset.jsonl
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/data/retail_dataset.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_tau_bench_airline.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/benchmarks/test_tau_bench_retail.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/dataset_logger/dataset_logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/event_bus/sqlite_event_bus_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/orchestration/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_agent/orchestration/base_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/default_dataset_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/evaluation_test_postprocess.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_braintrust.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_langfuse.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_langsmith.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/quickstart/llm_judge_openai_responses.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/list_comparison_math_reward.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.25 → eval_protocol-0.2.26}/eval_protocol/rewards/multiple_choice_math_reward.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|