eval-protocol 0.2.35.dev1__tar.gz → 0.2.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.35.dev1/eval_protocol.egg-info → eval_protocol-0.2.36}/PKG-INFO +1 -1
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/__init__.py +2 -1
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/_version.py +3 -3
- eval_protocol-0.2.36/eval_protocol/logging/elasticsearch_client.py +286 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/logging/elasticsearch_direct_http_handler.py +58 -20
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/logging/elasticsearch_index_manager.py +47 -66
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/elasticsearch_setup.py +8 -8
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/remote_rollout_processor.py +8 -3
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/remote_rollout_processor.py +9 -2
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/SOURCES.txt +1 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/LICENSE +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/README.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/pyproject.toml +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/setup.cfg +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/setup.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_format.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_length.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_math.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_models.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/versioneer.py +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/index-C8woq7EO.js +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/index-C8woq7EO.js.map +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/index-CSKGq1w7.css +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.36
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -24,7 +24,7 @@ from .mcp_env import (
|
|
|
24
24
|
)
|
|
25
25
|
from .data_loader import DynamicDataLoader, InlineDataLoader
|
|
26
26
|
from . import mcp, rewards
|
|
27
|
-
from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata
|
|
27
|
+
from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata, Status
|
|
28
28
|
from .playback_policy import PlaybackPolicyBase
|
|
29
29
|
from .resources import create_llm_resource
|
|
30
30
|
from .reward_function import RewardFunction
|
|
@@ -63,6 +63,7 @@ except ImportError:
|
|
|
63
63
|
warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
|
|
64
64
|
|
|
65
65
|
__all__ = [
|
|
66
|
+
"Status",
|
|
66
67
|
"RemoteRolloutProcessor",
|
|
67
68
|
"InputMetadata",
|
|
68
69
|
"EvaluationRow",
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-02T09:18:41-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "1283ce226147f55336cd4fe56c744640a0a9ba9b",
|
|
15
|
+
"version": "0.2.36"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Centralized Elasticsearch client for all Elasticsearch API operations.
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for all Elasticsearch operations
|
|
5
|
+
used throughout the codebase, including index management, document operations,
|
|
6
|
+
and search functionality.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import requests
|
|
11
|
+
from typing import Any, Dict, List, Optional, Union
|
|
12
|
+
from urllib.parse import urlparse
|
|
13
|
+
from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ElasticsearchClient:
|
|
17
|
+
"""Centralized client for all Elasticsearch operations."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: ElasticsearchConfig):
|
|
20
|
+
"""Initialize the Elasticsearch client.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
config: Elasticsearch configuration
|
|
24
|
+
"""
|
|
25
|
+
self.config = config
|
|
26
|
+
self.base_url = config.url.rstrip("/")
|
|
27
|
+
self.index_url = f"{self.base_url}/{config.index_name}"
|
|
28
|
+
self._headers = {"Content-Type": "application/json", "Authorization": f"ApiKey {config.api_key}"}
|
|
29
|
+
|
|
30
|
+
def _make_request(
|
|
31
|
+
self,
|
|
32
|
+
method: str,
|
|
33
|
+
url: str,
|
|
34
|
+
json_data: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
|
|
35
|
+
params: Optional[Dict[str, Any]] = None,
|
|
36
|
+
timeout: int = 30,
|
|
37
|
+
) -> requests.Response:
|
|
38
|
+
"""Make an HTTP request to Elasticsearch.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
method: HTTP method (GET, POST, PUT, DELETE, HEAD)
|
|
42
|
+
url: Full URL for the request
|
|
43
|
+
json_data: JSON data to send in request body
|
|
44
|
+
params: Query parameters
|
|
45
|
+
timeout: Request timeout in seconds
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
requests.Response object
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
requests.RequestException: If the request fails
|
|
52
|
+
"""
|
|
53
|
+
return requests.request(
|
|
54
|
+
method=method,
|
|
55
|
+
url=url,
|
|
56
|
+
headers=self._headers,
|
|
57
|
+
json=json_data,
|
|
58
|
+
params=params,
|
|
59
|
+
verify=self.config.verify_ssl,
|
|
60
|
+
timeout=timeout,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Index Management Operations
|
|
64
|
+
|
|
65
|
+
def create_index(self, mapping: Dict[str, Any]) -> bool:
|
|
66
|
+
"""Create an index with the specified mapping.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
mapping: Index mapping configuration
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
bool: True if successful, False otherwise
|
|
73
|
+
"""
|
|
74
|
+
try:
|
|
75
|
+
response = self._make_request("PUT", self.index_url, json_data=mapping)
|
|
76
|
+
return response.status_code in [200, 201]
|
|
77
|
+
except Exception:
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
def index_exists(self) -> bool:
|
|
81
|
+
"""Check if the index exists.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
bool: True if index exists, False otherwise
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
response = self._make_request("HEAD", self.index_url)
|
|
88
|
+
return response.status_code == 200
|
|
89
|
+
except Exception:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
def delete_index(self) -> bool:
|
|
93
|
+
"""Delete the index.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
bool: True if successful, False otherwise
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
response = self._make_request("DELETE", self.index_url)
|
|
100
|
+
return response.status_code in [200, 404] # 404 means index doesn't exist
|
|
101
|
+
except Exception:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def get_mapping(self) -> Optional[Dict[str, Any]]:
|
|
105
|
+
"""Get the index mapping.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Dict containing mapping data, or None if failed
|
|
109
|
+
"""
|
|
110
|
+
try:
|
|
111
|
+
response = self._make_request("GET", f"{self.index_url}/_mapping")
|
|
112
|
+
if response.status_code == 200:
|
|
113
|
+
return response.json()
|
|
114
|
+
return None
|
|
115
|
+
except Exception:
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
def get_index_stats(self) -> Optional[Dict[str, Any]]:
|
|
119
|
+
"""Get index statistics.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Dict containing index statistics, or None if failed
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
response = self._make_request("GET", f"{self.index_url}/_stats")
|
|
126
|
+
if response.status_code == 200:
|
|
127
|
+
return response.json()
|
|
128
|
+
return None
|
|
129
|
+
except Exception:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# Document Operations
|
|
133
|
+
|
|
134
|
+
def index_document(self, document: Dict[str, Any], doc_id: Optional[str] = None) -> bool:
|
|
135
|
+
"""Index a document.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
document: Document to index
|
|
139
|
+
doc_id: Optional document ID
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
bool: True if successful, False otherwise
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
if doc_id:
|
|
146
|
+
url = f"{self.index_url}/_doc/{doc_id}"
|
|
147
|
+
else:
|
|
148
|
+
url = f"{self.index_url}/_doc"
|
|
149
|
+
|
|
150
|
+
response = self._make_request("POST", url, json_data=document)
|
|
151
|
+
return response.status_code in [200, 201]
|
|
152
|
+
except Exception:
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
def bulk_index_documents(self, documents: List[Dict[str, Any]]) -> bool:
|
|
156
|
+
"""Bulk index multiple documents.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
documents: List of documents to index
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
bool: True if successful, False otherwise
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
# Prepare bulk request body
|
|
166
|
+
bulk_body = []
|
|
167
|
+
for doc in documents:
|
|
168
|
+
bulk_body.append({"index": {}})
|
|
169
|
+
bulk_body.append(doc)
|
|
170
|
+
|
|
171
|
+
response = self._make_request("POST", f"{self.index_url}/_bulk", json_data=bulk_body)
|
|
172
|
+
return response.status_code == 200
|
|
173
|
+
except Exception:
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
# Search Operations
|
|
177
|
+
|
|
178
|
+
def search(
|
|
179
|
+
self, query: Dict[str, Any], size: int = 10, from_: int = 0, sort: Optional[List[Dict[str, Any]]] = None
|
|
180
|
+
) -> Optional[Dict[str, Any]]:
|
|
181
|
+
"""Search documents in the index.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
query: Elasticsearch query
|
|
185
|
+
size: Number of results to return
|
|
186
|
+
from_: Starting offset
|
|
187
|
+
sort: Sort specification
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Dict containing search results, or None if failed
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
search_body = {"query": query, "size": size, "from": from_}
|
|
194
|
+
|
|
195
|
+
if sort:
|
|
196
|
+
search_body["sort"] = sort
|
|
197
|
+
|
|
198
|
+
response = self._make_request("POST", f"{self.index_url}/_search", json_data=search_body)
|
|
199
|
+
|
|
200
|
+
if response.status_code == 200:
|
|
201
|
+
return response.json()
|
|
202
|
+
return None
|
|
203
|
+
except Exception:
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
def search_by_term(self, field: str, value: Any, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
207
|
+
"""Search documents by exact term match.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
field: Field name to search
|
|
211
|
+
value: Value to match
|
|
212
|
+
size: Number of results to return
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Dict containing search results, or None if failed
|
|
216
|
+
"""
|
|
217
|
+
query = {"term": {field: value}}
|
|
218
|
+
return self.search(query, size=size)
|
|
219
|
+
|
|
220
|
+
def search_by_match(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
221
|
+
"""Search documents by text match.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
field: Field name to search
|
|
225
|
+
value: Text to match
|
|
226
|
+
size: Number of results to return
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Dict containing search results, or None if failed
|
|
230
|
+
"""
|
|
231
|
+
query = {"match": {field: value}}
|
|
232
|
+
return self.search(query, size=size)
|
|
233
|
+
|
|
234
|
+
def search_by_match_phrase_prefix(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
235
|
+
"""Search documents by phrase prefix match.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
field: Field name to search
|
|
239
|
+
value: Phrase prefix to match
|
|
240
|
+
size: Number of results to return
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Dict containing search results, or None if failed
|
|
244
|
+
"""
|
|
245
|
+
query = {"match_phrase_prefix": {field: value}}
|
|
246
|
+
return self.search(query, size=size)
|
|
247
|
+
|
|
248
|
+
def search_all(self, size: int = 10) -> Optional[Dict[str, Any]]:
|
|
249
|
+
"""Search all documents in the index.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
size: Number of results to return
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Dict containing search results, or None if failed
|
|
256
|
+
"""
|
|
257
|
+
query = {"match_all": {}}
|
|
258
|
+
return self.search(query, size=size)
|
|
259
|
+
|
|
260
|
+
# Health and Status Operations
|
|
261
|
+
|
|
262
|
+
def health_check(self) -> bool:
|
|
263
|
+
"""Check if Elasticsearch is healthy.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
bool: True if healthy, False otherwise
|
|
267
|
+
"""
|
|
268
|
+
try:
|
|
269
|
+
response = self._make_request("GET", f"{self.base_url}/_cluster/health")
|
|
270
|
+
return response.status_code == 200
|
|
271
|
+
except Exception:
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
def get_cluster_info(self) -> Optional[Dict[str, Any]]:
|
|
275
|
+
"""Get cluster information.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Dict containing cluster info, or None if failed
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
response = self._make_request("GET", f"{self.base_url}/_cluster/health")
|
|
282
|
+
if response.status_code == 200:
|
|
283
|
+
return response.json()
|
|
284
|
+
return None
|
|
285
|
+
except Exception:
|
|
286
|
+
return None
|
|
@@ -1,50 +1,92 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import asyncio
|
|
4
|
+
import os
|
|
4
5
|
import threading
|
|
5
6
|
from concurrent.futures import ThreadPoolExecutor
|
|
6
7
|
from typing import Optional, Tuple, Any, Dict
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from urllib.parse import urlparse
|
|
9
|
-
import requests
|
|
10
9
|
|
|
11
|
-
from eval_protocol.types.remote_rollout_processor import
|
|
10
|
+
from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
|
|
11
|
+
from .elasticsearch_client import ElasticsearchClient
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class ElasticsearchDirectHttpHandler(logging.Handler):
|
|
15
|
-
def __init__(self, elasticsearch_config:
|
|
15
|
+
def __init__(self, elasticsearch_config: ElasticsearchConfig) -> None:
|
|
16
16
|
super().__init__()
|
|
17
|
-
self.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
self.config = ElasticsearchConfig(
|
|
18
|
+
url=elasticsearch_config.url,
|
|
19
|
+
api_key=elasticsearch_config.api_key,
|
|
20
|
+
index_name=elasticsearch_config.index_name,
|
|
21
|
+
)
|
|
22
|
+
self.client = ElasticsearchClient(self.config)
|
|
21
23
|
self.formatter: logging.Formatter = logging.Formatter()
|
|
22
24
|
self._executor = None
|
|
23
25
|
|
|
24
|
-
# Parse URL to determine if we should verify SSL
|
|
25
|
-
parsed_url = urlparse(elasticsearch_config.url)
|
|
26
|
-
self.verify_ssl = parsed_url.scheme == "https"
|
|
27
|
-
|
|
28
26
|
def emit(self, record: logging.LogRecord) -> None:
|
|
29
27
|
"""Emit a log record by scheduling it for async transmission."""
|
|
30
28
|
try:
|
|
31
29
|
# Create proper ISO 8601 timestamp
|
|
32
30
|
timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
33
31
|
|
|
32
|
+
rollout_id = self._get_rollout_id(record)
|
|
33
|
+
status_info = self._get_status_info(record)
|
|
34
|
+
|
|
34
35
|
data: Dict[str, Any] = {
|
|
35
36
|
"@timestamp": timestamp,
|
|
36
37
|
"level": record.levelname,
|
|
37
38
|
"message": record.getMessage(),
|
|
38
39
|
"logger_name": record.name,
|
|
39
|
-
|
|
40
|
+
"rollout_id": rollout_id,
|
|
40
41
|
}
|
|
41
42
|
|
|
43
|
+
# Add status information if present
|
|
44
|
+
if status_info:
|
|
45
|
+
data.update(status_info)
|
|
46
|
+
|
|
42
47
|
# Schedule the HTTP request to run asynchronously
|
|
43
48
|
self._schedule_async_send(data, record)
|
|
44
49
|
except Exception as e:
|
|
45
50
|
self.handleError(record)
|
|
46
51
|
print(f"Error preparing log for Elasticsearch: {e}")
|
|
47
52
|
|
|
53
|
+
def _get_rollout_id(self, record: logging.LogRecord) -> str:
|
|
54
|
+
"""Get the rollout ID from environment variables."""
|
|
55
|
+
rollout_id = os.getenv("EP_ROLLOUT_ID")
|
|
56
|
+
if rollout_id is None:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"EP_ROLLOUT_ID environment variable is not set but needed for ElasticsearchDirectHttpHandler"
|
|
59
|
+
)
|
|
60
|
+
return rollout_id
|
|
61
|
+
|
|
62
|
+
def _get_status_info(self, record: logging.LogRecord) -> Optional[Dict[str, Any]]:
|
|
63
|
+
"""Extract status information from the log record's extra data."""
|
|
64
|
+
# Check if 'status' is in the extra data (passed via extra parameter)
|
|
65
|
+
if hasattr(record, "status") and record.status is not None: # type: ignore
|
|
66
|
+
status = record.status # type: ignore
|
|
67
|
+
|
|
68
|
+
# Handle Status class instances (Pydantic BaseModel)
|
|
69
|
+
if hasattr(status, "code") and hasattr(status, "message"):
|
|
70
|
+
# Status object - extract code and message
|
|
71
|
+
status_code = status.code
|
|
72
|
+
# Handle both enum values and direct integer values
|
|
73
|
+
if hasattr(status_code, "value"):
|
|
74
|
+
status_code = status_code.value
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"status_code": status_code,
|
|
78
|
+
"status_message": status.message,
|
|
79
|
+
"status_details": getattr(status, "details", []),
|
|
80
|
+
}
|
|
81
|
+
elif isinstance(status, dict):
|
|
82
|
+
# Dictionary representation of status
|
|
83
|
+
return {
|
|
84
|
+
"status_code": status.get("code"),
|
|
85
|
+
"status_message": status.get("message"),
|
|
86
|
+
"status_details": status.get("details", []),
|
|
87
|
+
}
|
|
88
|
+
return None
|
|
89
|
+
|
|
48
90
|
def _schedule_async_send(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
|
|
49
91
|
"""Schedule an async task to send the log data to Elasticsearch."""
|
|
50
92
|
if self._executor is None:
|
|
@@ -59,13 +101,9 @@ class ElasticsearchDirectHttpHandler(logging.Handler):
|
|
|
59
101
|
def _send_to_elasticsearch(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
|
|
60
102
|
"""Send data to Elasticsearch (runs in thread pool)."""
|
|
61
103
|
try:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
data=json.dumps(data),
|
|
66
|
-
verify=self.verify_ssl, # If using HTTPS, verify SSL certificate
|
|
67
|
-
)
|
|
68
|
-
response.raise_for_status() # Raise an exception for HTTP errors
|
|
104
|
+
success = self.client.index_document(data)
|
|
105
|
+
if not success:
|
|
106
|
+
raise Exception("Failed to index document to Elasticsearch")
|
|
69
107
|
except Exception as e:
|
|
70
108
|
# Re-raise to be handled by the callback
|
|
71
109
|
raise e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import requests
|
|
2
1
|
from typing import Dict, Any, Optional
|
|
3
|
-
from
|
|
2
|
+
from .elasticsearch_client import ElasticsearchClient
|
|
3
|
+
from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ElasticsearchIndexManager:
|
|
@@ -14,16 +14,10 @@ class ElasticsearchIndexManager:
|
|
|
14
14
|
index_name: Name of the index to manage
|
|
15
15
|
api_key: API key for authentication
|
|
16
16
|
"""
|
|
17
|
-
self.base_url
|
|
18
|
-
self.
|
|
19
|
-
self.api_key: str = api_key
|
|
20
|
-
self.index_url: str = f"{self.base_url}/{self.index_name}"
|
|
17
|
+
self.config = ElasticsearchConfig(url=base_url, api_key=api_key, index_name=index_name)
|
|
18
|
+
self.client = ElasticsearchClient(self.config)
|
|
21
19
|
self._mapping_created: bool = False
|
|
22
20
|
|
|
23
|
-
# Parse URL to determine if we should verify SSL
|
|
24
|
-
parsed_url = urlparse(base_url)
|
|
25
|
-
self.verify_ssl = parsed_url.scheme == "https"
|
|
26
|
-
|
|
27
21
|
def create_logging_index_mapping(self) -> bool:
|
|
28
22
|
"""Create index with proper mapping for logging data.
|
|
29
23
|
|
|
@@ -41,25 +35,22 @@ class ElasticsearchIndexManager:
|
|
|
41
35
|
|
|
42
36
|
# If index exists but has wrong mapping, delete and recreate it
|
|
43
37
|
if self.index_exists():
|
|
44
|
-
print(
|
|
38
|
+
print(
|
|
39
|
+
f"Warning: Index {self.config.index_name} exists with incorrect mapping. Deleting and recreating..."
|
|
40
|
+
)
|
|
45
41
|
if not self.delete_index():
|
|
46
|
-
print(f"Warning: Failed to delete existing index {self.index_name}")
|
|
42
|
+
print(f"Warning: Failed to delete existing index {self.config.index_name}")
|
|
47
43
|
return False
|
|
48
44
|
|
|
49
45
|
# Create index with proper mapping
|
|
50
46
|
mapping = self._get_logging_mapping()
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
json=mapping,
|
|
55
|
-
verify=self.verify_ssl,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
if response.status_code in [200, 201]:
|
|
47
|
+
success = self.client.create_index(mapping)
|
|
48
|
+
|
|
49
|
+
if success:
|
|
59
50
|
self._mapping_created = True
|
|
60
51
|
return True
|
|
61
52
|
else:
|
|
62
|
-
print(
|
|
53
|
+
print("Warning: Failed to create index mapping")
|
|
63
54
|
return False
|
|
64
55
|
|
|
65
56
|
except Exception as e:
|
|
@@ -74,46 +65,50 @@ class ElasticsearchIndexManager:
|
|
|
74
65
|
"""
|
|
75
66
|
try:
|
|
76
67
|
# Check if index exists
|
|
77
|
-
|
|
78
|
-
self.index_url, headers={"Authorization": f"ApiKey {self.api_key}"}, verify=self.verify_ssl
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
if response.status_code != 200:
|
|
68
|
+
if not self.client.index_exists():
|
|
82
69
|
return False
|
|
83
70
|
|
|
84
71
|
# Check if mapping is correct
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
headers={"Authorization": f"ApiKey {self.api_key}"},
|
|
88
|
-
verify=self.verify_ssl,
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
if mapping_response.status_code != 200:
|
|
72
|
+
mapping_data = self.client.get_mapping()
|
|
73
|
+
if mapping_data is None:
|
|
92
74
|
return False
|
|
93
75
|
|
|
94
|
-
mapping_data = mapping_response.json()
|
|
95
76
|
return self._has_correct_timestamp_mapping(mapping_data)
|
|
96
77
|
|
|
97
78
|
except Exception:
|
|
98
79
|
return False
|
|
99
80
|
|
|
100
81
|
def _has_correct_timestamp_mapping(self, mapping_data: Dict[str, Any]) -> bool:
|
|
101
|
-
"""Check if the mapping has @timestamp as a date field.
|
|
82
|
+
"""Check if the mapping has @timestamp as a date field, rollout_id as a keyword field, and status fields.
|
|
102
83
|
|
|
103
84
|
Args:
|
|
104
85
|
mapping_data: Elasticsearch mapping response data
|
|
105
86
|
|
|
106
87
|
Returns:
|
|
107
|
-
bool: True if
|
|
88
|
+
bool: True if all required fields are correctly mapped
|
|
108
89
|
"""
|
|
109
90
|
try:
|
|
110
|
-
|
|
111
|
-
self.index_name in mapping_data
|
|
112
|
-
and "mappings" in mapping_data[self.index_name]
|
|
113
|
-
and "properties" in mapping_data[self.index_name]["mappings"]
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
91
|
+
if not (
|
|
92
|
+
self.config.index_name in mapping_data
|
|
93
|
+
and "mappings" in mapping_data[self.config.index_name]
|
|
94
|
+
and "properties" in mapping_data[self.config.index_name]["mappings"]
|
|
95
|
+
):
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
properties = mapping_data[self.config.index_name]["mappings"]["properties"]
|
|
99
|
+
|
|
100
|
+
# Check @timestamp is mapped as date
|
|
101
|
+
timestamp_ok = "@timestamp" in properties and properties["@timestamp"].get("type") == "date"
|
|
102
|
+
|
|
103
|
+
# Check rollout_id is mapped as keyword
|
|
104
|
+
rollout_id_ok = "rollout_id" in properties and properties["rollout_id"].get("type") == "keyword"
|
|
105
|
+
|
|
106
|
+
# Check status fields are mapped correctly
|
|
107
|
+
status_code_ok = "status_code" in properties and properties["status_code"].get("type") == "integer"
|
|
108
|
+
status_message_ok = "status_message" in properties and properties["status_message"].get("type") == "text"
|
|
109
|
+
status_details_ok = "status_details" in properties and properties["status_details"].get("type") == "object"
|
|
110
|
+
|
|
111
|
+
return timestamp_ok and rollout_id_ok and status_code_ok and status_message_ok and status_details_ok
|
|
117
112
|
except (KeyError, TypeError):
|
|
118
113
|
return False
|
|
119
114
|
|
|
@@ -130,6 +125,10 @@ class ElasticsearchIndexManager:
|
|
|
130
125
|
"level": {"type": "keyword"},
|
|
131
126
|
"message": {"type": "text"},
|
|
132
127
|
"logger_name": {"type": "keyword"},
|
|
128
|
+
"rollout_id": {"type": "keyword"},
|
|
129
|
+
"status_code": {"type": "integer"},
|
|
130
|
+
"status_message": {"type": "text"},
|
|
131
|
+
"status_details": {"type": "object"},
|
|
133
132
|
}
|
|
134
133
|
}
|
|
135
134
|
}
|
|
@@ -141,14 +140,12 @@ class ElasticsearchIndexManager:
|
|
|
141
140
|
bool: True if index was deleted successfully, False otherwise.
|
|
142
141
|
"""
|
|
143
142
|
try:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
)
|
|
147
|
-
if response.status_code in [200, 404]: # 404 means index doesn't exist, which is fine
|
|
143
|
+
success = self.client.delete_index()
|
|
144
|
+
if success:
|
|
148
145
|
self._mapping_created = False
|
|
149
146
|
return True
|
|
150
147
|
else:
|
|
151
|
-
print(
|
|
148
|
+
print("Warning: Failed to delete index")
|
|
152
149
|
return False
|
|
153
150
|
except Exception as e:
|
|
154
151
|
print(f"Warning: Failed to delete index: {e}")
|
|
@@ -160,13 +157,7 @@ class ElasticsearchIndexManager:
|
|
|
160
157
|
Returns:
|
|
161
158
|
bool: True if index exists, False otherwise.
|
|
162
159
|
"""
|
|
163
|
-
|
|
164
|
-
response = requests.head(
|
|
165
|
-
self.index_url, headers={"Authorization": f"ApiKey {self.api_key}"}, verify=self.verify_ssl
|
|
166
|
-
)
|
|
167
|
-
return response.status_code == 200
|
|
168
|
-
except Exception:
|
|
169
|
-
return False
|
|
160
|
+
return self.client.index_exists()
|
|
170
161
|
|
|
171
162
|
def get_index_stats(self) -> Optional[Dict[str, Any]]:
|
|
172
163
|
"""Get statistics about the index.
|
|
@@ -174,14 +165,4 @@ class ElasticsearchIndexManager:
|
|
|
174
165
|
Returns:
|
|
175
166
|
Dict containing index statistics, or None if failed
|
|
176
167
|
"""
|
|
177
|
-
|
|
178
|
-
response = requests.get(
|
|
179
|
-
f"{self.index_url}/_stats",
|
|
180
|
-
headers={"Authorization": f"ApiKey {self.api_key}"},
|
|
181
|
-
verify=self.verify_ssl,
|
|
182
|
-
)
|
|
183
|
-
if response.status_code == 200:
|
|
184
|
-
return response.json()
|
|
185
|
-
return None
|
|
186
|
-
except Exception:
|
|
187
|
-
return None
|
|
168
|
+
return self.client.get_index_stats()
|