eval-protocol 0.2.35.dev1__tar.gz → 0.2.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. {eval_protocol-0.2.35.dev1/eval_protocol.egg-info → eval_protocol-0.2.36}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/__init__.py +2 -1
  3. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/_version.py +3 -3
  4. eval_protocol-0.2.36/eval_protocol/logging/elasticsearch_client.py +286 -0
  5. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/logging/elasticsearch_direct_http_handler.py +58 -20
  6. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/logging/elasticsearch_index_manager.py +47 -66
  7. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/elasticsearch_setup.py +8 -8
  8. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/remote_rollout_processor.py +8 -3
  9. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/remote_rollout_processor.py +9 -2
  10. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36/eval_protocol.egg-info}/PKG-INFO +1 -1
  11. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/SOURCES.txt +1 -0
  12. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/LICENSE +0 -0
  13. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/README.md +0 -0
  14. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/__init__.py +0 -0
  15. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/normalize_sandbox_fusion.py +0 -0
  16. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/utils/__init__.py +0 -0
  17. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/utils/generate_api_key.py +0 -0
  18. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/development/utils/subprocess_manager.py +0 -0
  19. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/__main__.py +0 -0
  20. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/__init__.py +0 -0
  21. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/base.py +0 -0
  22. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/bigquery.py +0 -0
  23. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/braintrust.py +0 -0
  24. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/huggingface.py +0 -0
  25. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/langchain.py +0 -0
  26. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/langfuse.py +0 -0
  27. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/langsmith.py +0 -0
  28. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/openai_responses.py +0 -0
  29. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/trl.py +0 -0
  30. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/adapters/utils.py +0 -0
  31. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/__init__.py +0 -0
  32. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/models.py +0 -0
  33. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/orchestrator.py +0 -0
  34. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resource_abc.py +0 -0
  35. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resource_pool.py +0 -0
  36. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/__init__.py +0 -0
  37. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  38. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  39. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  40. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  41. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  42. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/docker_resource.py +0 -0
  43. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  44. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  45. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/resources/sql_resource.py +0 -0
  46. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/task_manager.py +0 -0
  47. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/agent/tool_registry.py +0 -0
  48. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/auth.py +0 -0
  49. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/__init__.py +0 -0
  50. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  51. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  52. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_aime25.py +0 -0
  53. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  54. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  55. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  56. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  57. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli.py +0 -0
  58. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/__init__.py +0 -0
  59. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  60. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/common.py +0 -0
  61. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/deploy.py +0 -0
  62. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  63. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/logs.py +0 -0
  64. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/preview.py +0 -0
  65. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  66. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/cli_commands/upload.py +0 -0
  67. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/common_utils.py +0 -0
  68. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/config.py +0 -0
  69. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/__init__.py +0 -0
  70. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  71. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  72. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  73. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/data_loader/models.py +0 -0
  74. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/__init__.py +0 -0
  75. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  76. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  77. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  78. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  79. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/datasets/__init__.py +0 -0
  80. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/datasets/loader.py +0 -0
  81. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/directory_utils.py +0 -0
  82. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/evaluation.py +0 -0
  83. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/__init__.py +0 -0
  84. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/event_bus.py +0 -0
  85. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/logger.py +0 -0
  86. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  87. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  88. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/execution/__init__.py +0 -0
  89. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/execution/pipeline.py +0 -0
  90. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/gcp_tools.py +0 -0
  91. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generation/cache.py +0 -0
  92. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generation/clients/base.py +0 -0
  93. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generation/clients.py +0 -0
  94. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/generic_server.py +0 -0
  95. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/get_pep440_version.py +0 -0
  96. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/human_id/__init__.py +0 -0
  97. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/human_id/dictionary.py +0 -0
  98. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/__init__.py +0 -0
  99. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/deepeval.py +0 -0
  100. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/openeval.py +0 -0
  101. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/integrations/trl.py +0 -0
  102. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/logging_utils.py +0 -0
  103. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/__init__.py +0 -0
  104. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/adapter.py +0 -0
  105. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/client/__init__.py +0 -0
  106. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/client/connection.py +0 -0
  107. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/clients.py +0 -0
  108. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/__init__.py +0 -0
  109. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/base_policy.py +0 -0
  110. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/manager.py +0 -0
  111. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/execution/policy.py +0 -0
  112. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/grid_renderer.py +0 -0
  113. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  114. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/mcpgym.py +0 -0
  115. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/process_manager.py +0 -0
  116. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/session/__init__.py +0 -0
  117. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/session/manager.py +0 -0
  118. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/simple_process_manager.py +0 -0
  119. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp/simulation_server.py +0 -0
  120. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/__init__.py +0 -0
  121. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/config.py +0 -0
  122. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/main.py +0 -0
  123. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  124. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  125. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  126. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  127. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_env.py +0 -0
  128. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/__init__.py +0 -0
  129. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  130. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  131. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  132. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  133. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  134. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  135. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  136. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  137. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  138. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  139. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  140. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/models.py +0 -0
  141. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/packaging.py +0 -0
  142. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/platform_api.py +0 -0
  143. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/playback_policy.py +0 -0
  144. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/__init__.py +0 -0
  145. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  146. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  147. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  148. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  149. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  150. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  151. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  152. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  153. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/evaluation_test.py +0 -0
  154. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  155. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/exception_config.py +0 -0
  156. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/execution.py +0 -0
  157. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  158. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  159. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/parameterize.py +0 -0
  160. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/plugin.py +0 -0
  161. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/rollout_processor.py +0 -0
  162. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/store_experiment_link.py +0 -0
  163. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/store_results_url.py +0 -0
  164. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/types.py +0 -0
  165. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/utils.py +0 -0
  166. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/pytest/validate_signature.py +0 -0
  167. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/__init__.py +0 -0
  168. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge.py +0 -0
  169. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  170. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  171. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  172. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  173. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/quickstart/utils.py +0 -0
  174. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/resources.py +0 -0
  175. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/reward_function.py +0 -0
  176. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/__init__.py +0 -0
  177. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/accuracy.py +0 -0
  178. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/accuracy_length.py +0 -0
  179. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  180. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  181. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/apps_testing_util.py +0 -0
  182. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/bfcl_reward.py +0 -0
  183. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/code_execution.py +0 -0
  184. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/code_execution_utils.py +0 -0
  185. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/cpp_code.py +0 -0
  186. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  187. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/format.py +0 -0
  188. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/function_calling.py +0 -0
  189. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/json_schema.py +0 -0
  190. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/language_consistency.py +0 -0
  191. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/lean_prover.py +0 -0
  192. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/length.py +0 -0
  193. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  194. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/math.py +0 -0
  195. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  196. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/reasoning_steps.py +0 -0
  197. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/repetition.py +0 -0
  198. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rewards/tag_count.py +0 -0
  199. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/rl_processing.py +0 -0
  200. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/server.py +0 -0
  201. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/stats/__init__.py +0 -0
  202. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/stats/confidence_intervals.py +0 -0
  203. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/typed_interface.py +0 -0
  204. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/__init__.py +0 -0
  205. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/errors.py +0 -0
  206. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/types/types.py +0 -0
  207. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/__init__.py +0 -0
  208. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/batch_evaluation.py +0 -0
  209. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/batch_transformation.py +0 -0
  210. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/check_server_status.py +0 -0
  211. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/dataset_helpers.py +0 -0
  212. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/logs_server.py +0 -0
  213. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/module_loader.py +0 -0
  214. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/packaging_utils.py +0 -0
  215. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/show_results_url.py +0 -0
  216. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/static_policy.py +0 -0
  217. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/subprocess_utils.py +0 -0
  218. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol/utils/vite_server.py +0 -0
  219. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/dependency_links.txt +0 -0
  220. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/entry_points.txt +0 -0
  221. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/requires.txt +0 -0
  222. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/eval_protocol.egg-info/top_level.txt +0 -0
  223. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/pyproject.toml +0 -0
  224. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/setup.cfg +0 -0
  225. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/setup.py +0 -0
  226. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_accuracy.py +0 -0
  227. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_accuracy_length.py +0 -0
  228. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_adapters_e2e.py +0 -0
  229. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_agent_orchestrator.py +0 -0
  230. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_agent_resources.py +0 -0
  231. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_auth.py +0 -0
  232. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_batch_evaluation.py +0 -0
  233. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cli.py +0 -0
  234. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cli_agent.py +0 -0
  235. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cli_args.py +0 -0
  236. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_code_execution.py +0 -0
  237. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_config.py +0 -0
  238. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_control_plane_separation.py +0 -0
  239. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_cpp_code.py +0 -0
  240. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_data_driven_task_manager.py +0 -0
  241. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_deepcoder_reward.py +0 -0
  242. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_deepeval_integration.py +0 -0
  243. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_deploy_integration.py +0 -0
  244. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_e2b_integration.py +0 -0
  245. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_e2b_js_integration.py +0 -0
  246. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_edge_cases.py +0 -0
  247. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_eval_protocol_import.py +0 -0
  248. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation.py +0 -0
  249. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation_integration.py +0 -0
  250. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation_postprocess.py +0 -0
  251. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_evaluation_preview_integration.py +0 -0
  252. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_event_bus.py +0 -0
  253. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_examples_end_to_end.py +0 -0
  254. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_fireworks_api.py +0 -0
  255. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_format.py +0 -0
  256. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_fractional_code.py +0 -0
  257. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_function_calling.py +0 -0
  258. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_gcp_tools.py +0 -0
  259. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_generic_server.py +0 -0
  260. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_human_id.py +0 -0
  261. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_integration.py +0 -0
  262. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_json_schema.py +0 -0
  263. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_kwargs_validation.py +0 -0
  264. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_language_consistency.py +0 -0
  265. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_lean_prover.py +0 -0
  266. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_lean_prover_runner.py +0 -0
  267. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_length.py +0 -0
  268. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_list_comparison_math_reward.py +0 -0
  269. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_logs_server.py +0 -0
  270. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_logs_server_simple.py +0 -0
  271. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_math.py +0 -0
  272. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_minimal.py +0 -0
  273. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_models.py +0 -0
  274. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_models_rl.py +0 -0
  275. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_multiple_choice_math_reward.py +0 -0
  276. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_n_variant_batch_integration.py +0 -0
  277. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_n_variant_integration.py +0 -0
  278. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_openai_compatibility.py +0 -0
  279. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_openeval_integration.py +0 -0
  280. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_packaging.py +0 -0
  281. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_parallel_rollouts.py +0 -0
  282. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_platform_api.py +0 -0
  283. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_quickstart_utils.py +0 -0
  284. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_readiness.py +0 -0
  285. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_reasoning_steps.py +0 -0
  286. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_repetition.py +0 -0
  287. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_repetition_debug.py +0 -0
  288. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_retry_mechanism.py +0 -0
  289. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_reward_function.py +0 -0
  290. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_reward_protocol_import.py +0 -0
  291. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_rl_processing.py +0 -0
  292. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_rollout_control_plane_integration.py +0 -0
  293. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_server.py +0 -0
  294. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_show_results_url.py +0 -0
  295. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_status_migration_changes.py +0 -0
  296. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_status_migration_integration.py +0 -0
  297. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_status_model.py +0 -0
  298. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_tag_count.py +0 -0
  299. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_tau_bench_airline_smoke.py +0 -0
  300. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_typed_interface.py +0 -0
  301. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_typed_interface_rl.py +0 -0
  302. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_url_handling.py +0 -0
  303. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/tests/test_vite_server.py +0 -0
  304. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/__init__.py +0 -0
  305. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/agent/__init__.py +0 -0
  306. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/agent/base.py +0 -0
  307. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/agent/llm_agent.py +0 -0
  308. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/__init__.py +0 -0
  309. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/api_config.py +0 -0
  310. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/data_model.py +0 -0
  311. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/api_service/simulation_service.py +0 -0
  312. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/cli.py +0 -0
  313. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/config.py +0 -0
  314. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/airline/policy.md +0 -0
  315. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/mock/policy.md +0 -0
  316. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  317. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/retail/policy.md +0 -0
  318. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  319. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  320. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  321. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  322. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  323. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  324. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  325. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/__init__.py +0 -0
  326. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/message.py +0 -0
  327. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/simulation.py +0 -0
  328. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/data_model/tasks.py +0 -0
  329. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/__init__.py +0 -0
  330. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/__init__.py +0 -0
  331. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/data_model.py +0 -0
  332. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/environment.py +0 -0
  333. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/tools.py +0 -0
  334. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/airline/utils.py +0 -0
  335. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/__init__.py +0 -0
  336. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/data_model.py +0 -0
  337. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/environment.py +0 -0
  338. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/tools.py +0 -0
  339. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/mock/utils.py +0 -0
  340. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/__init__.py +0 -0
  341. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/data_model.py +0 -0
  342. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/environment.py +0 -0
  343. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/tools.py +0 -0
  344. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/retail/utils.py +0 -0
  345. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/__init__.py +0 -0
  346. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/data_model.py +0 -0
  347. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/environment.py +0 -0
  348. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  349. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  350. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  351. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  352. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  353. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  354. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  355. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  356. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/tools.py +0 -0
  357. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  358. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  359. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/domains/telecom/utils.py +0 -0
  360. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/__init__.py +0 -0
  361. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/db.py +0 -0
  362. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/environment.py +0 -0
  363. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/server.py +0 -0
  364. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/tool.py +0 -0
  365. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/toolkit.py +0 -0
  366. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  367. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/__init__.py +0 -0
  368. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator.py +0 -0
  369. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  370. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  371. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  372. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  373. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  374. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/metrics/__init__.py +0 -0
  375. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/metrics/agent_metrics.py +0 -0
  376. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  377. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/__init__.py +0 -0
  378. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  379. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  380. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/orchestrator/utils.py +0 -0
  381. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/registry.py +0 -0
  382. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/run.py +0 -0
  383. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/__init__.py +0 -0
  384. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/check_data.py +0 -0
  385. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  386. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/start_servers.py +0 -0
  387. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/scripts/view_simulations.py +0 -0
  388. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/user/__init__.py +0 -0
  389. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/user/base.py +0 -0
  390. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/user/user_simulator.py +0 -0
  391. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/__init__.py +0 -0
  392. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/display.py +0 -0
  393. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/io_utils.py +0 -0
  394. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/llm_utils.py +0 -0
  395. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/pydantic_utils.py +0 -0
  396. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vendor/tau2/utils/utils.py +0 -0
  397. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/versioneer.py +0 -0
  398. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  399. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/index-C8woq7EO.js +0 -0
  400. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/index-C8woq7EO.js.map +0 -0
  401. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/index-CSKGq1w7.css +0 -0
  402. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  403. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.36}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.35.dev1
3
+ Version: 0.2.36
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -24,7 +24,7 @@ from .mcp_env import (
24
24
  )
25
25
  from .data_loader import DynamicDataLoader, InlineDataLoader
26
26
  from . import mcp, rewards
27
- from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata
27
+ from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata, Status
28
28
  from .playback_policy import PlaybackPolicyBase
29
29
  from .resources import create_llm_resource
30
30
  from .reward_function import RewardFunction
@@ -63,6 +63,7 @@ except ImportError:
63
63
  warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
64
64
 
65
65
  __all__ = [
66
+ "Status",
66
67
  "RemoteRolloutProcessor",
67
68
  "InputMetadata",
68
69
  "EvaluationRow",
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-02T01:54:12-0700",
11
+ "date": "2025-10-02T09:18:41-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "c4108ba1d87ba6fb76100c63e6ee16f48bc06598",
15
- "version": "0.2.35-dev1"
14
+ "full-revisionid": "1283ce226147f55336cd4fe56c744640a0a9ba9b",
15
+ "version": "0.2.36"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -0,0 +1,286 @@
1
+ """
2
+ Centralized Elasticsearch client for all Elasticsearch API operations.
3
+
4
+ This module provides a unified interface for all Elasticsearch operations
5
+ used throughout the codebase, including index management, document operations,
6
+ and search functionality.
7
+ """
8
+
9
+ import json
10
+ import requests
11
+ from typing import Any, Dict, List, Optional, Union
12
+ from urllib.parse import urlparse
13
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
14
+
15
+
16
+ class ElasticsearchClient:
17
+ """Centralized client for all Elasticsearch operations."""
18
+
19
+ def __init__(self, config: ElasticsearchConfig):
20
+ """Initialize the Elasticsearch client.
21
+
22
+ Args:
23
+ config: Elasticsearch configuration
24
+ """
25
+ self.config = config
26
+ self.base_url = config.url.rstrip("/")
27
+ self.index_url = f"{self.base_url}/{config.index_name}"
28
+ self._headers = {"Content-Type": "application/json", "Authorization": f"ApiKey {config.api_key}"}
29
+
30
+ def _make_request(
31
+ self,
32
+ method: str,
33
+ url: str,
34
+ json_data: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
35
+ params: Optional[Dict[str, Any]] = None,
36
+ timeout: int = 30,
37
+ ) -> requests.Response:
38
+ """Make an HTTP request to Elasticsearch.
39
+
40
+ Args:
41
+ method: HTTP method (GET, POST, PUT, DELETE, HEAD)
42
+ url: Full URL for the request
43
+ json_data: JSON data to send in request body
44
+ params: Query parameters
45
+ timeout: Request timeout in seconds
46
+
47
+ Returns:
48
+ requests.Response object
49
+
50
+ Raises:
51
+ requests.RequestException: If the request fails
52
+ """
53
+ return requests.request(
54
+ method=method,
55
+ url=url,
56
+ headers=self._headers,
57
+ json=json_data,
58
+ params=params,
59
+ verify=self.config.verify_ssl,
60
+ timeout=timeout,
61
+ )
62
+
63
+ # Index Management Operations
64
+
65
+ def create_index(self, mapping: Dict[str, Any]) -> bool:
66
+ """Create an index with the specified mapping.
67
+
68
+ Args:
69
+ mapping: Index mapping configuration
70
+
71
+ Returns:
72
+ bool: True if successful, False otherwise
73
+ """
74
+ try:
75
+ response = self._make_request("PUT", self.index_url, json_data=mapping)
76
+ return response.status_code in [200, 201]
77
+ except Exception:
78
+ return False
79
+
80
+ def index_exists(self) -> bool:
81
+ """Check if the index exists.
82
+
83
+ Returns:
84
+ bool: True if index exists, False otherwise
85
+ """
86
+ try:
87
+ response = self._make_request("HEAD", self.index_url)
88
+ return response.status_code == 200
89
+ except Exception:
90
+ return False
91
+
92
+ def delete_index(self) -> bool:
93
+ """Delete the index.
94
+
95
+ Returns:
96
+ bool: True if successful, False otherwise
97
+ """
98
+ try:
99
+ response = self._make_request("DELETE", self.index_url)
100
+ return response.status_code in [200, 404] # 404 means index doesn't exist
101
+ except Exception:
102
+ return False
103
+
104
+ def get_mapping(self) -> Optional[Dict[str, Any]]:
105
+ """Get the index mapping.
106
+
107
+ Returns:
108
+ Dict containing mapping data, or None if failed
109
+ """
110
+ try:
111
+ response = self._make_request("GET", f"{self.index_url}/_mapping")
112
+ if response.status_code == 200:
113
+ return response.json()
114
+ return None
115
+ except Exception:
116
+ return None
117
+
118
+ def get_index_stats(self) -> Optional[Dict[str, Any]]:
119
+ """Get index statistics.
120
+
121
+ Returns:
122
+ Dict containing index statistics, or None if failed
123
+ """
124
+ try:
125
+ response = self._make_request("GET", f"{self.index_url}/_stats")
126
+ if response.status_code == 200:
127
+ return response.json()
128
+ return None
129
+ except Exception:
130
+ return None
131
+
132
+ # Document Operations
133
+
134
+ def index_document(self, document: Dict[str, Any], doc_id: Optional[str] = None) -> bool:
135
+ """Index a document.
136
+
137
+ Args:
138
+ document: Document to index
139
+ doc_id: Optional document ID
140
+
141
+ Returns:
142
+ bool: True if successful, False otherwise
143
+ """
144
+ try:
145
+ if doc_id:
146
+ url = f"{self.index_url}/_doc/{doc_id}"
147
+ else:
148
+ url = f"{self.index_url}/_doc"
149
+
150
+ response = self._make_request("POST", url, json_data=document)
151
+ return response.status_code in [200, 201]
152
+ except Exception:
153
+ return False
154
+
155
+ def bulk_index_documents(self, documents: List[Dict[str, Any]]) -> bool:
156
+ """Bulk index multiple documents.
157
+
158
+ Args:
159
+ documents: List of documents to index
160
+
161
+ Returns:
162
+ bool: True if successful, False otherwise
163
+ """
164
+ try:
165
+ # Prepare bulk request body
166
+ bulk_body = []
167
+ for doc in documents:
168
+ bulk_body.append({"index": {}})
169
+ bulk_body.append(doc)
170
+
171
+ response = self._make_request("POST", f"{self.index_url}/_bulk", json_data=bulk_body)
172
+ return response.status_code == 200
173
+ except Exception:
174
+ return False
175
+
176
+ # Search Operations
177
+
178
+ def search(
179
+ self, query: Dict[str, Any], size: int = 10, from_: int = 0, sort: Optional[List[Dict[str, Any]]] = None
180
+ ) -> Optional[Dict[str, Any]]:
181
+ """Search documents in the index.
182
+
183
+ Args:
184
+ query: Elasticsearch query
185
+ size: Number of results to return
186
+ from_: Starting offset
187
+ sort: Sort specification
188
+
189
+ Returns:
190
+ Dict containing search results, or None if failed
191
+ """
192
+ try:
193
+ search_body = {"query": query, "size": size, "from": from_}
194
+
195
+ if sort:
196
+ search_body["sort"] = sort
197
+
198
+ response = self._make_request("POST", f"{self.index_url}/_search", json_data=search_body)
199
+
200
+ if response.status_code == 200:
201
+ return response.json()
202
+ return None
203
+ except Exception:
204
+ return None
205
+
206
+ def search_by_term(self, field: str, value: Any, size: int = 10) -> Optional[Dict[str, Any]]:
207
+ """Search documents by exact term match.
208
+
209
+ Args:
210
+ field: Field name to search
211
+ value: Value to match
212
+ size: Number of results to return
213
+
214
+ Returns:
215
+ Dict containing search results, or None if failed
216
+ """
217
+ query = {"term": {field: value}}
218
+ return self.search(query, size=size)
219
+
220
+ def search_by_match(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
221
+ """Search documents by text match.
222
+
223
+ Args:
224
+ field: Field name to search
225
+ value: Text to match
226
+ size: Number of results to return
227
+
228
+ Returns:
229
+ Dict containing search results, or None if failed
230
+ """
231
+ query = {"match": {field: value}}
232
+ return self.search(query, size=size)
233
+
234
+ def search_by_match_phrase_prefix(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
235
+ """Search documents by phrase prefix match.
236
+
237
+ Args:
238
+ field: Field name to search
239
+ value: Phrase prefix to match
240
+ size: Number of results to return
241
+
242
+ Returns:
243
+ Dict containing search results, or None if failed
244
+ """
245
+ query = {"match_phrase_prefix": {field: value}}
246
+ return self.search(query, size=size)
247
+
248
+ def search_all(self, size: int = 10) -> Optional[Dict[str, Any]]:
249
+ """Search all documents in the index.
250
+
251
+ Args:
252
+ size: Number of results to return
253
+
254
+ Returns:
255
+ Dict containing search results, or None if failed
256
+ """
257
+ query = {"match_all": {}}
258
+ return self.search(query, size=size)
259
+
260
+ # Health and Status Operations
261
+
262
+ def health_check(self) -> bool:
263
+ """Check if Elasticsearch is healthy.
264
+
265
+ Returns:
266
+ bool: True if healthy, False otherwise
267
+ """
268
+ try:
269
+ response = self._make_request("GET", f"{self.base_url}/_cluster/health")
270
+ return response.status_code == 200
271
+ except Exception:
272
+ return False
273
+
274
+ def get_cluster_info(self) -> Optional[Dict[str, Any]]:
275
+ """Get cluster information.
276
+
277
+ Returns:
278
+ Dict containing cluster info, or None if failed
279
+ """
280
+ try:
281
+ response = self._make_request("GET", f"{self.base_url}/_cluster/health")
282
+ if response.status_code == 200:
283
+ return response.json()
284
+ return None
285
+ except Exception:
286
+ return None
@@ -1,50 +1,92 @@
1
1
  import json
2
2
  import logging
3
3
  import asyncio
4
+ import os
4
5
  import threading
5
6
  from concurrent.futures import ThreadPoolExecutor
6
7
  from typing import Optional, Tuple, Any, Dict
7
8
  from datetime import datetime
8
- from urllib.parse import urlparse
9
- import requests
10
9
 
11
- from eval_protocol.types.remote_rollout_processor import ElasticSearchConfig
10
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
11
+ from .elasticsearch_client import ElasticsearchClient
12
12
 
13
13
 
14
14
  class ElasticsearchDirectHttpHandler(logging.Handler):
15
- def __init__(self, elasticsearch_config: ElasticSearchConfig) -> None:
15
+ def __init__(self, elasticsearch_config: ElasticsearchConfig) -> None:
16
16
  super().__init__()
17
- self.base_url: str = elasticsearch_config.url.rstrip("/")
18
- self.index_name: str = elasticsearch_config.index_name
19
- self.api_key: str = elasticsearch_config.api_key
20
- self.url: str = f"{self.base_url}/{self.index_name}/_doc"
17
+ self.config = ElasticsearchConfig(
18
+ url=elasticsearch_config.url,
19
+ api_key=elasticsearch_config.api_key,
20
+ index_name=elasticsearch_config.index_name,
21
+ )
22
+ self.client = ElasticsearchClient(self.config)
21
23
  self.formatter: logging.Formatter = logging.Formatter()
22
24
  self._executor = None
23
25
 
24
- # Parse URL to determine if we should verify SSL
25
- parsed_url = urlparse(elasticsearch_config.url)
26
- self.verify_ssl = parsed_url.scheme == "https"
27
-
28
26
  def emit(self, record: logging.LogRecord) -> None:
29
27
  """Emit a log record by scheduling it for async transmission."""
30
28
  try:
31
29
  # Create proper ISO 8601 timestamp
32
30
  timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
33
31
 
32
+ rollout_id = self._get_rollout_id(record)
33
+ status_info = self._get_status_info(record)
34
+
34
35
  data: Dict[str, Any] = {
35
36
  "@timestamp": timestamp,
36
37
  "level": record.levelname,
37
38
  "message": record.getMessage(),
38
39
  "logger_name": record.name,
39
- # Add other relevant record attributes if needed
40
+ "rollout_id": rollout_id,
40
41
  }
41
42
 
43
+ # Add status information if present
44
+ if status_info:
45
+ data.update(status_info)
46
+
42
47
  # Schedule the HTTP request to run asynchronously
43
48
  self._schedule_async_send(data, record)
44
49
  except Exception as e:
45
50
  self.handleError(record)
46
51
  print(f"Error preparing log for Elasticsearch: {e}")
47
52
 
53
+ def _get_rollout_id(self, record: logging.LogRecord) -> str:
54
+ """Get the rollout ID from environment variables."""
55
+ rollout_id = os.getenv("EP_ROLLOUT_ID")
56
+ if rollout_id is None:
57
+ raise ValueError(
58
+ "EP_ROLLOUT_ID environment variable is not set but needed for ElasticsearchDirectHttpHandler"
59
+ )
60
+ return rollout_id
61
+
62
+ def _get_status_info(self, record: logging.LogRecord) -> Optional[Dict[str, Any]]:
63
+ """Extract status information from the log record's extra data."""
64
+ # Check if 'status' is in the extra data (passed via extra parameter)
65
+ if hasattr(record, "status") and record.status is not None: # type: ignore
66
+ status = record.status # type: ignore
67
+
68
+ # Handle Status class instances (Pydantic BaseModel)
69
+ if hasattr(status, "code") and hasattr(status, "message"):
70
+ # Status object - extract code and message
71
+ status_code = status.code
72
+ # Handle both enum values and direct integer values
73
+ if hasattr(status_code, "value"):
74
+ status_code = status_code.value
75
+
76
+ return {
77
+ "status_code": status_code,
78
+ "status_message": status.message,
79
+ "status_details": getattr(status, "details", []),
80
+ }
81
+ elif isinstance(status, dict):
82
+ # Dictionary representation of status
83
+ return {
84
+ "status_code": status.get("code"),
85
+ "status_message": status.get("message"),
86
+ "status_details": status.get("details", []),
87
+ }
88
+ return None
89
+
48
90
  def _schedule_async_send(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
49
91
  """Schedule an async task to send the log data to Elasticsearch."""
50
92
  if self._executor is None:
@@ -59,13 +101,9 @@ class ElasticsearchDirectHttpHandler(logging.Handler):
59
101
  def _send_to_elasticsearch(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
60
102
  """Send data to Elasticsearch (runs in thread pool)."""
61
103
  try:
62
- response: requests.Response = requests.post(
63
- self.url,
64
- headers={"Content-Type": "application/json", "Authorization": f"ApiKey {self.api_key}"},
65
- data=json.dumps(data),
66
- verify=self.verify_ssl, # If using HTTPS, verify SSL certificate
67
- )
68
- response.raise_for_status() # Raise an exception for HTTP errors
104
+ success = self.client.index_document(data)
105
+ if not success:
106
+ raise Exception("Failed to index document to Elasticsearch")
69
107
  except Exception as e:
70
108
  # Re-raise to be handled by the callback
71
109
  raise e
@@ -1,6 +1,6 @@
1
- import requests
2
1
  from typing import Dict, Any, Optional
3
- from urllib.parse import urlparse
2
+ from .elasticsearch_client import ElasticsearchClient
3
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
4
4
 
5
5
 
6
6
  class ElasticsearchIndexManager:
@@ -14,16 +14,10 @@ class ElasticsearchIndexManager:
14
14
  index_name: Name of the index to manage
15
15
  api_key: API key for authentication
16
16
  """
17
- self.base_url: str = base_url.rstrip("/")
18
- self.index_name: str = index_name
19
- self.api_key: str = api_key
20
- self.index_url: str = f"{self.base_url}/{self.index_name}"
17
+ self.config = ElasticsearchConfig(url=base_url, api_key=api_key, index_name=index_name)
18
+ self.client = ElasticsearchClient(self.config)
21
19
  self._mapping_created: bool = False
22
20
 
23
- # Parse URL to determine if we should verify SSL
24
- parsed_url = urlparse(base_url)
25
- self.verify_ssl = parsed_url.scheme == "https"
26
-
27
21
  def create_logging_index_mapping(self) -> bool:
28
22
  """Create index with proper mapping for logging data.
29
23
 
@@ -41,25 +35,22 @@ class ElasticsearchIndexManager:
41
35
 
42
36
  # If index exists but has wrong mapping, delete and recreate it
43
37
  if self.index_exists():
44
- print(f"Warning: Index {self.index_name} exists with incorrect mapping. Deleting and recreating...")
38
+ print(
39
+ f"Warning: Index {self.config.index_name} exists with incorrect mapping. Deleting and recreating..."
40
+ )
45
41
  if not self.delete_index():
46
- print(f"Warning: Failed to delete existing index {self.index_name}")
42
+ print(f"Warning: Failed to delete existing index {self.config.index_name}")
47
43
  return False
48
44
 
49
45
  # Create index with proper mapping
50
46
  mapping = self._get_logging_mapping()
51
- response = requests.put(
52
- self.index_url,
53
- headers={"Content-Type": "application/json", "Authorization": f"ApiKey {self.api_key}"},
54
- json=mapping,
55
- verify=self.verify_ssl,
56
- )
57
-
58
- if response.status_code in [200, 201]:
47
+ success = self.client.create_index(mapping)
48
+
49
+ if success:
59
50
  self._mapping_created = True
60
51
  return True
61
52
  else:
62
- print(f"Warning: Failed to create index mapping: {response.status_code} - {response.text}")
53
+ print("Warning: Failed to create index mapping")
63
54
  return False
64
55
 
65
56
  except Exception as e:
@@ -74,46 +65,50 @@ class ElasticsearchIndexManager:
74
65
  """
75
66
  try:
76
67
  # Check if index exists
77
- response = requests.head(
78
- self.index_url, headers={"Authorization": f"ApiKey {self.api_key}"}, verify=self.verify_ssl
79
- )
80
-
81
- if response.status_code != 200:
68
+ if not self.client.index_exists():
82
69
  return False
83
70
 
84
71
  # Check if mapping is correct
85
- mapping_response = requests.get(
86
- f"{self.index_url}/_mapping",
87
- headers={"Authorization": f"ApiKey {self.api_key}"},
88
- verify=self.verify_ssl,
89
- )
90
-
91
- if mapping_response.status_code != 200:
72
+ mapping_data = self.client.get_mapping()
73
+ if mapping_data is None:
92
74
  return False
93
75
 
94
- mapping_data = mapping_response.json()
95
76
  return self._has_correct_timestamp_mapping(mapping_data)
96
77
 
97
78
  except Exception:
98
79
  return False
99
80
 
100
81
  def _has_correct_timestamp_mapping(self, mapping_data: Dict[str, Any]) -> bool:
101
- """Check if the mapping has @timestamp as a date field.
82
+ """Check if the mapping has @timestamp as a date field, rollout_id as a keyword field, and status fields.
102
83
 
103
84
  Args:
104
85
  mapping_data: Elasticsearch mapping response data
105
86
 
106
87
  Returns:
107
- bool: True if @timestamp is correctly mapped as date field
88
+ bool: True if all required fields are correctly mapped
108
89
  """
109
90
  try:
110
- return (
111
- self.index_name in mapping_data
112
- and "mappings" in mapping_data[self.index_name]
113
- and "properties" in mapping_data[self.index_name]["mappings"]
114
- and "@timestamp" in mapping_data[self.index_name]["mappings"]["properties"]
115
- and mapping_data[self.index_name]["mappings"]["properties"]["@timestamp"].get("type") == "date"
116
- )
91
+ if not (
92
+ self.config.index_name in mapping_data
93
+ and "mappings" in mapping_data[self.config.index_name]
94
+ and "properties" in mapping_data[self.config.index_name]["mappings"]
95
+ ):
96
+ return False
97
+
98
+ properties = mapping_data[self.config.index_name]["mappings"]["properties"]
99
+
100
+ # Check @timestamp is mapped as date
101
+ timestamp_ok = "@timestamp" in properties and properties["@timestamp"].get("type") == "date"
102
+
103
+ # Check rollout_id is mapped as keyword
104
+ rollout_id_ok = "rollout_id" in properties and properties["rollout_id"].get("type") == "keyword"
105
+
106
+ # Check status fields are mapped correctly
107
+ status_code_ok = "status_code" in properties and properties["status_code"].get("type") == "integer"
108
+ status_message_ok = "status_message" in properties and properties["status_message"].get("type") == "text"
109
+ status_details_ok = "status_details" in properties and properties["status_details"].get("type") == "object"
110
+
111
+ return timestamp_ok and rollout_id_ok and status_code_ok and status_message_ok and status_details_ok
117
112
  except (KeyError, TypeError):
118
113
  return False
119
114
 
@@ -130,6 +125,10 @@ class ElasticsearchIndexManager:
130
125
  "level": {"type": "keyword"},
131
126
  "message": {"type": "text"},
132
127
  "logger_name": {"type": "keyword"},
128
+ "rollout_id": {"type": "keyword"},
129
+ "status_code": {"type": "integer"},
130
+ "status_message": {"type": "text"},
131
+ "status_details": {"type": "object"},
133
132
  }
134
133
  }
135
134
  }
@@ -141,14 +140,12 @@ class ElasticsearchIndexManager:
141
140
  bool: True if index was deleted successfully, False otherwise.
142
141
  """
143
142
  try:
144
- response = requests.delete(
145
- self.index_url, headers={"Authorization": f"ApiKey {self.api_key}"}, verify=self.verify_ssl
146
- )
147
- if response.status_code in [200, 404]: # 404 means index doesn't exist, which is fine
143
+ success = self.client.delete_index()
144
+ if success:
148
145
  self._mapping_created = False
149
146
  return True
150
147
  else:
151
- print(f"Warning: Failed to delete index: {response.status_code} - {response.text}")
148
+ print("Warning: Failed to delete index")
152
149
  return False
153
150
  except Exception as e:
154
151
  print(f"Warning: Failed to delete index: {e}")
@@ -160,13 +157,7 @@ class ElasticsearchIndexManager:
160
157
  Returns:
161
158
  bool: True if index exists, False otherwise.
162
159
  """
163
- try:
164
- response = requests.head(
165
- self.index_url, headers={"Authorization": f"ApiKey {self.api_key}"}, verify=self.verify_ssl
166
- )
167
- return response.status_code == 200
168
- except Exception:
169
- return False
160
+ return self.client.index_exists()
170
161
 
171
162
  def get_index_stats(self) -> Optional[Dict[str, Any]]:
172
163
  """Get statistics about the index.
@@ -174,14 +165,4 @@ class ElasticsearchIndexManager:
174
165
  Returns:
175
166
  Dict containing index statistics, or None if failed
176
167
  """
177
- try:
178
- response = requests.get(
179
- f"{self.index_url}/_stats",
180
- headers={"Authorization": f"ApiKey {self.api_key}"},
181
- verify=self.verify_ssl,
182
- )
183
- if response.status_code == 200:
184
- return response.json()
185
- return None
186
- except Exception:
187
- return None
168
+ return self.client.get_index_stats()