eval-protocol 0.2.35.dev1__tar.gz → 0.2.35.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.35.dev1/eval_protocol.egg-info → eval_protocol-0.2.35.dev2}/PKG-INFO +1 -1
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/__init__.py +2 -1
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli.py +1 -7
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -4
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +2 -1
- eval_protocol-0.2.35.dev2/eval_protocol/logging/elasticsearch_client.py +286 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/logging/elasticsearch_direct_http_handler.py +58 -20
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/logging/elasticsearch_index_manager.py +47 -66
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/elasticsearch_setup.py +8 -8
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/remote_rollout_processor.py +8 -3
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/remote_rollout_processor.py +9 -2
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/SOURCES.txt +1 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/LICENSE +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/README.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/pyproject.toml +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/setup.cfg +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/setup.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_format.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_math.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/versioneer.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/index-C8woq7EO.js +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/index-C8woq7EO.js.map +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/index-CSKGq1w7.css +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.35.
|
|
3
|
+
Version: 0.2.35.dev2
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -24,7 +24,7 @@ from .mcp_env import (
|
|
|
24
24
|
)
|
|
25
25
|
from .data_loader import DynamicDataLoader, InlineDataLoader
|
|
26
26
|
from . import mcp, rewards
|
|
27
|
-
from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata
|
|
27
|
+
from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata, Status
|
|
28
28
|
from .playback_policy import PlaybackPolicyBase
|
|
29
29
|
from .resources import create_llm_resource
|
|
30
30
|
from .reward_function import RewardFunction
|
|
@@ -63,6 +63,7 @@ except ImportError:
|
|
|
63
63
|
warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
|
|
64
64
|
|
|
65
65
|
__all__ = [
|
|
66
|
+
"Status",
|
|
66
67
|
"RemoteRolloutProcessor",
|
|
67
68
|
"InputMetadata",
|
|
68
69
|
"EvaluationRow",
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-02T12:04:07-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.35-
|
|
14
|
+
"full-revisionid": "52178b3b90bb27a7f53fcbbba0bfbb50e7ebb416",
|
|
15
|
+
"version": "0.2.35-dev2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -15,14 +15,8 @@ from pathlib import Path
|
|
|
15
15
|
logger = logging.getLogger(__name__)
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
from eval_protocol.evaluation import create_evaluation, preview_evaluation
|
|
19
|
-
|
|
20
18
|
from .cli_commands.agent_eval_cmd import agent_eval_command
|
|
21
|
-
from .cli_commands.common import
|
|
22
|
-
check_agent_environment,
|
|
23
|
-
check_environment,
|
|
24
|
-
setup_logging,
|
|
25
|
-
)
|
|
19
|
+
from .cli_commands.common import setup_logging
|
|
26
20
|
from .cli_commands.deploy import deploy_command
|
|
27
21
|
from .cli_commands.deploy_mcp import deploy_mcp_command
|
|
28
22
|
from .cli_commands.logs import logs_command
|
{eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py
RENAMED
|
@@ -27,11 +27,7 @@ import logging # For logger instance
|
|
|
27
27
|
import os # For environment variables
|
|
28
28
|
from pathlib import Path
|
|
29
29
|
|
|
30
|
-
from pydantic import ValidationError
|
|
31
|
-
|
|
32
|
-
from eval_protocol.agent import Orchestrator
|
|
33
30
|
from eval_protocol.agent.task_manager import TaskManager
|
|
34
|
-
from eval_protocol.models import TaskDefinitionModel # Import the new Pydantic model
|
|
35
31
|
|
|
36
32
|
# setup_logging is already called in cli.py's main, but good for standalone use if any
|
|
37
33
|
# from .common import setup_logging
|
{eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/run_eval_cmd.py
RENAMED
|
@@ -17,7 +17,6 @@ from omegaconf import ( # Ensure MISSING is imported if used in configs
|
|
|
17
17
|
OmegaConf,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
|
-
from eval_protocol.execution.pipeline import EvaluationPipeline
|
|
21
20
|
|
|
22
21
|
logger = logging.getLogger(__name__)
|
|
23
22
|
|
|
@@ -26,6 +25,8 @@ def run_evaluation_command_logic(cfg: DictConfig) -> None:
|
|
|
26
25
|
"""
|
|
27
26
|
Main logic for the 'run-evaluation' command.
|
|
28
27
|
"""
|
|
28
|
+
from eval_protocol.execution.pipeline import EvaluationPipeline
|
|
29
|
+
|
|
29
30
|
logger.info("Starting 'run-evaluation' command with resolved Hydra config.")
|
|
30
31
|
|
|
31
32
|
# Make Hydra's runtime output directory available to the pipeline if needed
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized Elasticsearch client for all Elasticsearch API operations.
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for all Elasticsearch operations
|
|
5
|
+
used throughout the codebase, including index management, document operations,
|
|
6
|
+
and search functionality.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import requests
|
|
11
|
+
from typing import Any, Dict, List, Optional, Union
|
|
12
|
+
from urllib.parse import urlparse
|
|
13
|
+
from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ElasticsearchClient:
|
|
17
|
+
"""Centralized client for all Elasticsearch operations."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: ElasticsearchConfig):
|
|
20
|
+
"""Initialize the Elasticsearch client.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
config: Elasticsearch configuration
|
|
24
|
+
"""
|
|
25
|
+
self.config = config
|
|
26
|
+
self.base_url = config.url.rstrip("/")
|
|
27
|
+
self.index_url = f"{self.base_url}/{config.index_name}"
|
|
28
|
+
self._headers = {"Content-Type": "application/json", "Authorization": f"ApiKey {config.api_key}"}
|
|
29
|
+
|
|
30
|
+
def _make_request(
|
|
31
|
+
self,
|
|
32
|
+
method: str,
|
|
33
|
+
url: str,
|
|
34
|
+
json_data: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
|
35
|
+
params: Optional[Dict[str, Any]] = None,
|
|
36
|
+
timeout: int = 30,
|
|
37
|
+
) -> requests.Response:
|
|
38
|
+
"""Make an HTTP request to Elasticsearch.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
method: HTTP method (GET, POST, PUT, DELETE, HEAD)
|
|
42
|
+
url: Full URL for the request
|
|
43
|
+
json_data: JSON data to send in request body
|
|
44
|
+
params: Query parameters
|
|
45
|
+
timeout: Request timeout in seconds
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
requests.Response object
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
requests.RequestException: If the request fails
|
|
52
|
+
"""
|
|
53
|
+
return requests.request(
|
|
54
|
+
method=method,
|
|
55
|
+
url=url,
|
|
56
|
+
headers=self._headers,
|
|
57
|
+
json=json_data,
|
|
58
|
+
params=params,
|
|
59
|
+
verify=self.config.verify_ssl,
|
|
60
|
+
timeout=timeout,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Index Management Operations
|
|
64
|
+
|
|
65
|
+
def create_index(self, mapping: Dict[str, Any]) -> bool:
|
|
66
|
+
"""Create an index with the specified mapping.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
mapping: Index mapping configuration
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
bool: True if successful, False otherwise
|
|
73
|
+
"""
|
|
74
|
+
try:
|
|
75
|
+
response = self._make_request("PUT", self.index_url, json_data=mapping)
|
|
76
|
+
return response.status_code in [200, 201]
|
|
77
|
+
except Exception:
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
def index_exists(self) -> bool:
|
|
81
|
+
"""Check if the index exists.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
bool: True if index exists, False otherwise
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
response = self._make_request("HEAD", self.index_url)
|
|
88
|
+
return response.status_code == 200
|
|
89
|
+
except Exception:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
def delete_index(self) -> bool:
|
|
93
|
+
"""Delete the index.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
bool: True if successful, False otherwise
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
response = self._make_request("DELETE", self.index_url)
|
|
100
|
+
return response.status_code in [200, 404] # 404 means index doesn't exist
|
|
101
|
+
except Exception:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def get_mapping(self) -> Optional[Dict[str, Any]]:
|
|
105
|
+
"""Get the index mapping.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Dict containing mapping data, or None if failed
|
|
109
|
+
"""
|
|
110
|
+
try:
|
|
111
|
+
response = self._make_request("GET", f"{self.index_url}/_mapping")
|
|
112
|
+
if response.status_code == 200:
|
|
113
|
+
return response.json()
|
|
114
|
+
return None
|
|
115
|
+
except Exception:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
def get_index_stats(self) -> Optional[Dict[str, Any]]:
|
|
119
|
+
"""Get index statistics.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Dict containing index statistics, or None if failed
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
response = self._make_request("GET", f"{self.index_url}/_stats")
|
|
126
|
+
if response.status_code == 200:
|
|
127
|
+
return response.json()
|
|
128
|
+
return None
|
|
129
|
+
except Exception:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# Document Operations
|
|
133
|
+
|
|
134
|
+
def index_document(self, document: Dict[str, Any], doc_id: Optional[str] = None) -> bool:
|
|
135
|
+
"""Index a document.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
document: Document to index
|
|
139
|
+
doc_id: Optional document ID
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
bool: True if successful, False otherwise
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
if doc_id:
|
|
146
|
+
url = f"{self.index_url}/_doc/{doc_id}"
|
|
147
|
+
else:
|
|
148
|
+
url = f"{self.index_url}/_doc"
|
|
149
|
+
|
|
150
|
+
response = self._make_request("POST", url, json_data=document)
|
|
151
|
+
return response.status_code in [200, 201]
|
|
152
|
+
except Exception:
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
def bulk_index_documents(self, documents: List[Dict[str, Any]]) -> bool:
|
|
156
|
+
"""Bulk index multiple documents.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
documents: List of documents to index
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
bool: True if successful, False otherwise
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
# Prepare bulk request body
|
|
166
|
+
bulk_body = []
|
|
167
|
+
for doc in documents:
|
|
168
|
+
bulk_body.append({"index": {}})
|
|
169
|
+
bulk_body.append(doc)
|
|
170
|
+
|
|
171
|
+
response = self._make_request("POST", f"{self.index_url}/_bulk", json_data=bulk_body)
|
|
172
|
+
return response.status_code == 200
|
|
173
|
+
except Exception:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
# Search Operations
|
|
177
|
+
|
|
178
|
+
def search(
|
|
179
|
+
self, query: Dict[str, Any], size: int = 10, from_: int = 0, sort: Optional[List[Dict[str, Any]]] = None
|
|
180
|
+
) -> Optional[Dict[str, Any]]:
|
|
181
|
+
"""Search documents in the index.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
query: Elasticsearch query
|
|
185
|
+
size: Number of results to return
|
|
186
|
+
from_: Starting offset
|
|
187
|
+
sort: Sort specification
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Dict containing search results, or None if failed
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
search_body = {"query": query, "size": size, "from": from_}
|
|
194
|
+
|
|
195
|
+
if sort:
|
|
196
|
+
search_body["sort"] = sort
|
|
197
|
+
|
|
198
|
+
response = self._make_request("POST", f"{self.index_url}/_search", json_data=search_body)
|
|
199
|
+
|
|
200
|
+
if response.status_code == 200:
|
|
201
|
+
return response.json()
|
|
202
|
+
return None
|
|
203
|
+
except Exception:
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
def search_by_term(self, field: str, value: Any, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
207
|
+
"""Search documents by exact term match.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
field: Field name to search
|
|
211
|
+
value: Value to match
|
|
212
|
+
size: Number of results to return
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict containing search results, or None if failed
|
|
216
|
+
"""
|
|
217
|
+
query = {"term": {field: value}}
|
|
218
|
+
return self.search(query, size=size)
|
|
219
|
+
|
|
220
|
+
def search_by_match(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
221
|
+
"""Search documents by text match.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
field: Field name to search
|
|
225
|
+
value: Text to match
|
|
226
|
+
size: Number of results to return
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Dict containing search results, or None if failed
|
|
230
|
+
"""
|
|
231
|
+
query = {"match": {field: value}}
|
|
232
|
+
return self.search(query, size=size)
|
|
233
|
+
|
|
234
|
+
def search_by_match_phrase_prefix(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
235
|
+
"""Search documents by phrase prefix match.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
field: Field name to search
|
|
239
|
+
value: Phrase prefix to match
|
|
240
|
+
size: Number of results to return
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Dict containing search results, or None if failed
|
|
244
|
+
"""
|
|
245
|
+
query = {"match_phrase_prefix": {field: value}}
|
|
246
|
+
return self.search(query, size=size)
|
|
247
|
+
|
|
248
|
+
def search_all(self, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
249
|
+
"""Search all documents in the index.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
size: Number of results to return
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Dict containing search results, or None if failed
|
|
256
|
+
"""
|
|
257
|
+
query = {"match_all": {}}
|
|
258
|
+
return self.search(query, size=size)
|
|
259
|
+
|
|
260
|
+
# Health and Status Operations
|
|
261
|
+
|
|
262
|
+
def health_check(self) -> bool:
|
|
263
|
+
"""Check if Elasticsearch is healthy.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
bool: True if healthy, False otherwise
|
|
267
|
+
"""
|
|
268
|
+
try:
|
|
269
|
+
response = self._make_request("GET", f"{self.base_url}/_cluster/health")
|
|
270
|
+
return response.status_code == 200
|
|
271
|
+
except Exception:
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
def get_cluster_info(self) -> Optional[Dict[str, Any]]:
|
|
275
|
+
"""Get cluster information.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Dict containing cluster info, or None if failed
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
response = self._make_request("GET", f"{self.base_url}/_cluster/health")
|
|
282
|
+
if response.status_code == 200:
|
|
283
|
+
return response.json()
|
|
284
|
+
return None
|
|
285
|
+
except Exception:
|
|
286
|
+
return None
|
|
@@ -1,50 +1,92 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import asyncio
|
|
4
|
+
import os
|
|
4
5
|
import threading
|
|
5
6
|
from concurrent.futures import ThreadPoolExecutor
|
|
6
7
|
from typing import Optional, Tuple, Any, Dict
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from urllib.parse import urlparse
|
|
9
|
-
import requests
|
|
10
9
|
|
|
11
|
-
from eval_protocol.types.remote_rollout_processor import
|
|
10
|
+
from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
|
|
11
|
+
from .elasticsearch_client import ElasticsearchClient
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class ElasticsearchDirectHttpHandler(logging.Handler):
|
|
15
|
-
def __init__(self, elasticsearch_config:
|
|
15
|
+
def __init__(self, elasticsearch_config: ElasticsearchConfig) -> None:
|
|
16
16
|
super().__init__()
|
|
17
|
-
self.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
self.config = ElasticsearchConfig(
|
|
18
|
+
url=elasticsearch_config.url,
|
|
19
|
+
api_key=elasticsearch_config.api_key,
|
|
20
|
+
index_name=elasticsearch_config.index_name,
|
|
21
|
+
)
|
|
22
|
+
self.client = ElasticsearchClient(self.config)
|
|
21
23
|
self.formatter: logging.Formatter = logging.Formatter()
|
|
22
24
|
self._executor = None
|
|
23
25
|
|
|
24
|
-
# Parse URL to determine if we should verify SSL
|
|
25
|
-
parsed_url = urlparse(elasticsearch_config.url)
|
|
26
|
-
self.verify_ssl = parsed_url.scheme == "https"
|
|
27
|
-
|
|
28
26
|
def emit(self, record: logging.LogRecord) -> None:
|
|
29
27
|
"""Emit a log record by scheduling it for async transmission."""
|
|
30
28
|
try:
|
|
31
29
|
# Create proper ISO 8601 timestamp
|
|
32
30
|
timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
33
31
|
|
|
32
|
+
rollout_id = self._get_rollout_id(record)
|
|
33
|
+
status_info = self._get_status_info(record)
|
|
34
|
+
|
|
34
35
|
data: Dict[str, Any] = {
|
|
35
36
|
"@timestamp": timestamp,
|
|
36
37
|
"level": record.levelname,
|
|
37
38
|
"message": record.getMessage(),
|
|
38
39
|
"logger_name": record.name,
|
|
39
|
-
|
|
40
|
+
"rollout_id": rollout_id,
|
|
40
41
|
}
|
|
41
42
|
|
|
43
|
+
# Add status information if present
|
|
44
|
+
if status_info:
|
|
45
|
+
data.update(status_info)
|
|
46
|
+
|
|
42
47
|
# Schedule the HTTP request to run asynchronously
|
|
43
48
|
self._schedule_async_send(data, record)
|
|
44
49
|
except Exception as e:
|
|
45
50
|
self.handleError(record)
|
|
46
51
|
print(f"Error preparing log for Elasticsearch: {e}")
|
|
47
52
|
|
|
53
|
+
def _get_rollout_id(self, record: logging.LogRecord) -> str:
|
|
54
|
+
"""Get the rollout ID from environment variables."""
|
|
55
|
+
rollout_id = os.getenv("EP_ROLLOUT_ID")
|
|
56
|
+
if rollout_id is None:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"EP_ROLLOUT_ID environment variable is not set but needed for ElasticsearchDirectHttpHandler"
|
|
59
|
+
)
|
|
60
|
+
return rollout_id
|
|
61
|
+
|
|
62
|
+
def _get_status_info(self, record: logging.LogRecord) -> Optional[Dict[str, Any]]:
|
|
63
|
+
"""Extract status information from the log record's extra data."""
|
|
64
|
+
# Check if 'status' is in the extra data (passed via extra parameter)
|
|
65
|
+
if hasattr(record, "status") and record.status is not None: # type: ignore
|
|
66
|
+
status = record.status # type: ignore
|
|
67
|
+
|
|
68
|
+
# Handle Status class instances (Pydantic BaseModel)
|
|
69
|
+
if hasattr(status, "code") and hasattr(status, "message"):
|
|
70
|
+
# Status object - extract code and message
|
|
71
|
+
status_code = status.code
|
|
72
|
+
# Handle both enum values and direct integer values
|
|
73
|
+
if hasattr(status_code, "value"):
|
|
74
|
+
status_code = status_code.value
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"status_code": status_code,
|
|
78
|
+
"status_message": status.message,
|
|
79
|
+
"status_details": getattr(status, "details", []),
|
|
80
|
+
}
|
|
81
|
+
elif isinstance(status, dict):
|
|
82
|
+
# Dictionary representation of status
|
|
83
|
+
return {
|
|
84
|
+
"status_code": status.get("code"),
|
|
85
|
+
"status_message": status.get("message"),
|
|
86
|
+
"status_details": status.get("details", []),
|
|
87
|
+
}
|
|
88
|
+
return None
|
|
89
|
+
|
|
48
90
|
def _schedule_async_send(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
|
|
49
91
|
"""Schedule an async task to send the log data to Elasticsearch."""
|
|
50
92
|
if self._executor is None:
|
|
@@ -59,13 +101,9 @@ class ElasticsearchDirectHttpHandler(logging.Handler):
|
|
|
59
101
|
def _send_to_elasticsearch(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
|
|
60
102
|
"""Send data to Elasticsearch (runs in thread pool)."""
|
|
61
103
|
try:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
data=json.dumps(data),
|
|
66
|
-
verify=self.verify_ssl, # If using HTTPS, verify SSL certificate
|
|
67
|
-
)
|
|
68
|
-
response.raise_for_status() # Raise an exception for HTTP errors
|
|
104
|
+
success = self.client.index_document(data)
|
|
105
|
+
if not success:
|
|
106
|
+
raise Exception("Failed to index document to Elasticsearch")
|
|
69
107
|
except Exception as e:
|
|
70
108
|
# Re-raise to be handled by the callback
|
|
71
109
|
raise e
|