eval-protocol 0.2.35.dev1__tar.gz → 0.2.35.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (403) hide show
  1. {eval_protocol-0.2.35.dev1/eval_protocol.egg-info → eval_protocol-0.2.35.dev2}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/__init__.py +2 -1
  3. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/_version.py +3 -3
  4. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli.py +1 -7
  5. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -4
  6. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +2 -1
  7. eval_protocol-0.2.35.dev2/eval_protocol/logging/elasticsearch_client.py +286 -0
  8. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/logging/elasticsearch_direct_http_handler.py +58 -20
  9. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/logging/elasticsearch_index_manager.py +47 -66
  10. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/elasticsearch_setup.py +8 -8
  11. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/remote_rollout_processor.py +8 -3
  12. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/remote_rollout_processor.py +9 -2
  13. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2/eval_protocol.egg-info}/PKG-INFO +1 -1
  14. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/SOURCES.txt +1 -0
  15. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/LICENSE +0 -0
  16. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/README.md +0 -0
  17. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/__init__.py +0 -0
  18. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/normalize_sandbox_fusion.py +0 -0
  19. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/utils/__init__.py +0 -0
  20. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/utils/generate_api_key.py +0 -0
  21. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/development/utils/subprocess_manager.py +0 -0
  22. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/__main__.py +0 -0
  23. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/__init__.py +0 -0
  24. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/base.py +0 -0
  25. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/bigquery.py +0 -0
  26. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/braintrust.py +0 -0
  27. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/huggingface.py +0 -0
  28. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/langchain.py +0 -0
  29. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/langfuse.py +0 -0
  30. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/langsmith.py +0 -0
  31. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/openai_responses.py +0 -0
  32. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/trl.py +0 -0
  33. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/adapters/utils.py +0 -0
  34. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/__init__.py +0 -0
  35. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/models.py +0 -0
  36. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/orchestrator.py +0 -0
  37. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resource_abc.py +0 -0
  38. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resource_pool.py +0 -0
  39. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
  40. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  41. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  42. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  43. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  44. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  45. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
  46. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  47. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  48. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
  49. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/task_manager.py +0 -0
  50. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/agent/tool_registry.py +0 -0
  51. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/auth.py +0 -0
  52. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/__init__.py +0 -0
  53. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  54. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  55. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_aime25.py +0 -0
  56. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  57. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  58. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  59. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  60. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
  61. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/common.py +0 -0
  62. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/deploy.py +0 -0
  63. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  64. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/logs.py +0 -0
  65. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/preview.py +0 -0
  66. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/cli_commands/upload.py +0 -0
  67. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/common_utils.py +0 -0
  68. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/config.py +0 -0
  69. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/__init__.py +0 -0
  70. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  71. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  72. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  73. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/data_loader/models.py +0 -0
  74. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/__init__.py +0 -0
  75. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  76. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  77. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  78. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  79. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/datasets/__init__.py +0 -0
  80. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/datasets/loader.py +0 -0
  81. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/directory_utils.py +0 -0
  82. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/evaluation.py +0 -0
  83. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/__init__.py +0 -0
  84. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/event_bus.py +0 -0
  85. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/logger.py +0 -0
  86. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  87. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  88. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/execution/__init__.py +0 -0
  89. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/execution/pipeline.py +0 -0
  90. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/gcp_tools.py +0 -0
  91. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generation/cache.py +0 -0
  92. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generation/clients/base.py +0 -0
  93. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generation/clients.py +0 -0
  94. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/generic_server.py +0 -0
  95. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/get_pep440_version.py +0 -0
  96. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/human_id/__init__.py +0 -0
  97. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/human_id/dictionary.py +0 -0
  98. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/__init__.py +0 -0
  99. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/deepeval.py +0 -0
  100. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/openeval.py +0 -0
  101. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/integrations/trl.py +0 -0
  102. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/logging_utils.py +0 -0
  103. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/__init__.py +0 -0
  104. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/adapter.py +0 -0
  105. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
  106. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/client/connection.py +0 -0
  107. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/clients.py +0 -0
  108. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
  109. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/base_policy.py +0 -0
  110. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/manager.py +0 -0
  111. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
  112. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
  113. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  114. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/mcpgym.py +0 -0
  115. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/process_manager.py +0 -0
  116. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
  117. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/session/manager.py +0 -0
  118. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
  119. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
  120. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
  121. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/config.py +0 -0
  122. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/main.py +0 -0
  123. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  124. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  125. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  126. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  127. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_env.py +0 -0
  128. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/__init__.py +0 -0
  129. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  130. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  131. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  132. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  133. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  134. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  135. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  136. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  137. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  138. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  139. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  140. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/models.py +0 -0
  141. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/packaging.py +0 -0
  142. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/platform_api.py +0 -0
  143. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/playback_policy.py +0 -0
  144. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/__init__.py +0 -0
  145. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  146. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  147. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  148. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  149. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  150. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  151. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  152. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  153. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/evaluation_test.py +0 -0
  154. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  155. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/exception_config.py +0 -0
  156. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/execution.py +0 -0
  157. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  158. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  159. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/parameterize.py +0 -0
  160. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/plugin.py +0 -0
  161. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/rollout_processor.py +0 -0
  162. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/store_experiment_link.py +0 -0
  163. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/store_results_url.py +0 -0
  164. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/types.py +0 -0
  165. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/utils.py +0 -0
  166. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/pytest/validate_signature.py +0 -0
  167. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/__init__.py +0 -0
  168. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge.py +0 -0
  169. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  170. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  171. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  172. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  173. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/quickstart/utils.py +0 -0
  174. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/resources.py +0 -0
  175. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/reward_function.py +0 -0
  176. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/__init__.py +0 -0
  177. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/accuracy.py +0 -0
  178. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
  179. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  180. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  181. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
  182. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
  183. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/code_execution.py +0 -0
  184. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
  185. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
  186. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  187. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/format.py +0 -0
  188. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/function_calling.py +0 -0
  189. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/json_schema.py +0 -0
  190. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
  191. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
  192. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/length.py +0 -0
  193. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  194. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/math.py +0 -0
  195. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  196. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
  197. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/repetition.py +0 -0
  198. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rewards/tag_count.py +0 -0
  199. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/rl_processing.py +0 -0
  200. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/server.py +0 -0
  201. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/stats/__init__.py +0 -0
  202. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/stats/confidence_intervals.py +0 -0
  203. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/typed_interface.py +0 -0
  204. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/__init__.py +0 -0
  205. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/errors.py +0 -0
  206. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/types/types.py +0 -0
  207. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/__init__.py +0 -0
  208. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
  209. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
  210. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/check_server_status.py +0 -0
  211. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
  212. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/logs_server.py +0 -0
  213. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/module_loader.py +0 -0
  214. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
  215. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/show_results_url.py +0 -0
  216. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/static_policy.py +0 -0
  217. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/subprocess_utils.py +0 -0
  218. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol/utils/vite_server.py +0 -0
  219. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
  220. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/entry_points.txt +0 -0
  221. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/requires.txt +0 -0
  222. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
  223. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/pyproject.toml +0 -0
  224. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/setup.cfg +0 -0
  225. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/setup.py +0 -0
  226. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_accuracy.py +0 -0
  227. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_accuracy_length.py +0 -0
  228. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_adapters_e2e.py +0 -0
  229. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_agent_orchestrator.py +0 -0
  230. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_agent_resources.py +0 -0
  231. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_auth.py +0 -0
  232. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_batch_evaluation.py +0 -0
  233. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cli.py +0 -0
  234. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cli_agent.py +0 -0
  235. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cli_args.py +0 -0
  236. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_code_execution.py +0 -0
  237. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_config.py +0 -0
  238. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_control_plane_separation.py +0 -0
  239. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_cpp_code.py +0 -0
  240. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_data_driven_task_manager.py +0 -0
  241. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_deepcoder_reward.py +0 -0
  242. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_deepeval_integration.py +0 -0
  243. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_deploy_integration.py +0 -0
  244. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_e2b_integration.py +0 -0
  245. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_e2b_js_integration.py +0 -0
  246. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_edge_cases.py +0 -0
  247. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_eval_protocol_import.py +0 -0
  248. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation.py +0 -0
  249. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation_integration.py +0 -0
  250. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation_postprocess.py +0 -0
  251. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_evaluation_preview_integration.py +0 -0
  252. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_event_bus.py +0 -0
  253. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_examples_end_to_end.py +0 -0
  254. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_fireworks_api.py +0 -0
  255. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_format.py +0 -0
  256. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_fractional_code.py +0 -0
  257. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_function_calling.py +0 -0
  258. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_gcp_tools.py +0 -0
  259. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_generic_server.py +0 -0
  260. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_human_id.py +0 -0
  261. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_integration.py +0 -0
  262. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_json_schema.py +0 -0
  263. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_kwargs_validation.py +0 -0
  264. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_language_consistency.py +0 -0
  265. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_lean_prover.py +0 -0
  266. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_lean_prover_runner.py +0 -0
  267. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_length.py +0 -0
  268. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_list_comparison_math_reward.py +0 -0
  269. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_logs_server.py +0 -0
  270. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_logs_server_simple.py +0 -0
  271. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_math.py +0 -0
  272. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_minimal.py +0 -0
  273. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_models.py +0 -0
  274. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_models_rl.py +0 -0
  275. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
  276. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_n_variant_batch_integration.py +0 -0
  277. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_n_variant_integration.py +0 -0
  278. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_openai_compatibility.py +0 -0
  279. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_openeval_integration.py +0 -0
  280. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_packaging.py +0 -0
  281. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_parallel_rollouts.py +0 -0
  282. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_platform_api.py +0 -0
  283. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_quickstart_utils.py +0 -0
  284. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_readiness.py +0 -0
  285. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_reasoning_steps.py +0 -0
  286. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_repetition.py +0 -0
  287. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_repetition_debug.py +0 -0
  288. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_retry_mechanism.py +0 -0
  289. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_reward_function.py +0 -0
  290. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_reward_protocol_import.py +0 -0
  291. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_rl_processing.py +0 -0
  292. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
  293. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_server.py +0 -0
  294. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_show_results_url.py +0 -0
  295. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_status_migration_changes.py +0 -0
  296. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_status_migration_integration.py +0 -0
  297. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_status_model.py +0 -0
  298. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_tag_count.py +0 -0
  299. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_tau_bench_airline_smoke.py +0 -0
  300. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_typed_interface.py +0 -0
  301. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_typed_interface_rl.py +0 -0
  302. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_url_handling.py +0 -0
  303. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/tests/test_vite_server.py +0 -0
  304. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/__init__.py +0 -0
  305. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/agent/__init__.py +0 -0
  306. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/agent/base.py +0 -0
  307. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/agent/llm_agent.py +0 -0
  308. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/__init__.py +0 -0
  309. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/api_config.py +0 -0
  310. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/data_model.py +0 -0
  311. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
  312. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/cli.py +0 -0
  313. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/config.py +0 -0
  314. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/airline/policy.md +0 -0
  315. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/mock/policy.md +0 -0
  316. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  317. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/retail/policy.md +0 -0
  318. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  319. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  320. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  321. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  322. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  323. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  324. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  325. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/__init__.py +0 -0
  326. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/message.py +0 -0
  327. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/simulation.py +0 -0
  328. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/data_model/tasks.py +0 -0
  329. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/__init__.py +0 -0
  330. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
  331. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
  332. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
  333. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
  334. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
  335. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
  336. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
  337. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
  338. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
  339. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
  340. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
  341. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
  342. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
  343. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
  344. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
  345. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
  346. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
  347. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
  348. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  349. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  350. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  351. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  352. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  353. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  354. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  355. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  356. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
  357. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  358. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  359. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
  360. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/__init__.py +0 -0
  361. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/db.py +0 -0
  362. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/environment.py +0 -0
  363. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/server.py +0 -0
  364. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/tool.py +0 -0
  365. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/toolkit.py +0 -0
  366. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  367. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
  368. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
  369. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  370. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  371. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  372. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  373. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  374. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/metrics/__init__.py +0 -0
  375. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
  376. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  377. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
  378. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  379. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  380. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
  381. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/registry.py +0 -0
  382. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/run.py +0 -0
  383. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/__init__.py +0 -0
  384. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/check_data.py +0 -0
  385. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  386. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
  387. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
  388. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/user/__init__.py +0 -0
  389. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/user/base.py +0 -0
  390. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/user/user_simulator.py +0 -0
  391. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/__init__.py +0 -0
  392. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/display.py +0 -0
  393. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/io_utils.py +0 -0
  394. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/llm_utils.py +0 -0
  395. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
  396. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vendor/tau2/utils/utils.py +0 -0
  397. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/versioneer.py +0 -0
  398. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  399. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/index-C8woq7EO.js +0 -0
  400. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/index-C8woq7EO.js.map +0 -0
  401. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/index-CSKGq1w7.css +0 -0
  402. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  403. {eval_protocol-0.2.35.dev1 → eval_protocol-0.2.35.dev2}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.35.dev1
3
+ Version: 0.2.35.dev2
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -24,7 +24,7 @@ from .mcp_env import (
24
24
  )
25
25
  from .data_loader import DynamicDataLoader, InlineDataLoader
26
26
  from . import mcp, rewards
27
- from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata
27
+ from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata, Status
28
28
  from .playback_policy import PlaybackPolicyBase
29
29
  from .resources import create_llm_resource
30
30
  from .reward_function import RewardFunction
@@ -63,6 +63,7 @@ except ImportError:
63
63
  warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
64
64
 
65
65
  __all__ = [
66
+ "Status",
66
67
  "RemoteRolloutProcessor",
67
68
  "InputMetadata",
68
69
  "EvaluationRow",
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-02T01:54:12-0700",
11
+ "date": "2025-10-02T12:04:07-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "c4108ba1d87ba6fb76100c63e6ee16f48bc06598",
15
- "version": "0.2.35-dev1"
14
+ "full-revisionid": "52178b3b90bb27a7f53fcbbba0bfbb50e7ebb416",
15
+ "version": "0.2.35-dev2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -15,14 +15,8 @@ from pathlib import Path
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
17
 
18
- from eval_protocol.evaluation import create_evaluation, preview_evaluation
19
-
20
18
  from .cli_commands.agent_eval_cmd import agent_eval_command
21
- from .cli_commands.common import (
22
- check_agent_environment,
23
- check_environment,
24
- setup_logging,
25
- )
19
+ from .cli_commands.common import setup_logging
26
20
  from .cli_commands.deploy import deploy_command
27
21
  from .cli_commands.deploy_mcp import deploy_mcp_command
28
22
  from .cli_commands.logs import logs_command
@@ -27,11 +27,7 @@ import logging # For logger instance
27
27
  import os # For environment variables
28
28
  from pathlib import Path
29
29
 
30
- from pydantic import ValidationError
31
-
32
- from eval_protocol.agent import Orchestrator
33
30
  from eval_protocol.agent.task_manager import TaskManager
34
- from eval_protocol.models import TaskDefinitionModel # Import the new Pydantic model
35
31
 
36
32
  # setup_logging is already called in cli.py's main, but good for standalone use if any
37
33
  # from .common import setup_logging
@@ -17,7 +17,6 @@ from omegaconf import ( # Ensure MISSING is imported if used in configs
17
17
  OmegaConf,
18
18
  )
19
19
 
20
- from eval_protocol.execution.pipeline import EvaluationPipeline
21
20
 
22
21
  logger = logging.getLogger(__name__)
23
22
 
@@ -26,6 +25,8 @@ def run_evaluation_command_logic(cfg: DictConfig) -> None:
26
25
  """
27
26
  Main logic for the 'run-evaluation' command.
28
27
  """
28
+ from eval_protocol.execution.pipeline import EvaluationPipeline
29
+
29
30
  logger.info("Starting 'run-evaluation' command with resolved Hydra config.")
30
31
 
31
32
  # Make Hydra's runtime output directory available to the pipeline if needed
@@ -0,0 +1,286 @@
1
+ """
2
+ Centralized Elasticsearch client for all Elasticsearch API operations.
3
+
4
+ This module provides a unified interface for all Elasticsearch operations
5
+ used throughout the codebase, including index management, document operations,
6
+ and search functionality.
7
+ """
8
+
9
+ import json
10
+ import requests
11
+ from typing import Any, Dict, List, Optional, Union
12
+ from urllib.parse import urlparse
13
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
14
+
15
+
16
+ class ElasticsearchClient:
17
+ """Centralized client for all Elasticsearch operations."""
18
+
19
+ def __init__(self, config: ElasticsearchConfig):
20
+ """Initialize the Elasticsearch client.
21
+
22
+ Args:
23
+ config: Elasticsearch configuration
24
+ """
25
+ self.config = config
26
+ self.base_url = config.url.rstrip("/")
27
+ self.index_url = f"{self.base_url}/{config.index_name}"
28
+ self._headers = {"Content-Type": "application/json", "Authorization": f"ApiKey {config.api_key}"}
29
+
30
+ def _make_request(
31
+ self,
32
+ method: str,
33
+ url: str,
34
+ json_data: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
35
+ params: Optional[Dict[str, Any]] = None,
36
+ timeout: int = 30,
37
+ ) -> requests.Response:
38
+ """Make an HTTP request to Elasticsearch.
39
+
40
+ Args:
41
+ method: HTTP method (GET, POST, PUT, DELETE, HEAD)
42
+ url: Full URL for the request
43
+ json_data: JSON data to send in request body
44
+ params: Query parameters
45
+ timeout: Request timeout in seconds
46
+
47
+ Returns:
48
+ requests.Response object
49
+
50
+ Raises:
51
+ requests.RequestException: If the request fails
52
+ """
53
+ return requests.request(
54
+ method=method,
55
+ url=url,
56
+ headers=self._headers,
57
+ json=json_data,
58
+ params=params,
59
+ verify=self.config.verify_ssl,
60
+ timeout=timeout,
61
+ )
62
+
63
+ # Index Management Operations
64
+
65
+ def create_index(self, mapping: Dict[str, Any]) -> bool:
66
+ """Create an index with the specified mapping.
67
+
68
+ Args:
69
+ mapping: Index mapping configuration
70
+
71
+ Returns:
72
+ bool: True if successful, False otherwise
73
+ """
74
+ try:
75
+ response = self._make_request("PUT", self.index_url, json_data=mapping)
76
+ return response.status_code in [200, 201]
77
+ except Exception:
78
+ return False
79
+
80
+ def index_exists(self) -> bool:
81
+ """Check if the index exists.
82
+
83
+ Returns:
84
+ bool: True if index exists, False otherwise
85
+ """
86
+ try:
87
+ response = self._make_request("HEAD", self.index_url)
88
+ return response.status_code == 200
89
+ except Exception:
90
+ return False
91
+
92
+ def delete_index(self) -> bool:
93
+ """Delete the index.
94
+
95
+ Returns:
96
+ bool: True if successful, False otherwise
97
+ """
98
+ try:
99
+ response = self._make_request("DELETE", self.index_url)
100
+ return response.status_code in [200, 404] # 404 means index doesn't exist
101
+ except Exception:
102
+ return False
103
+
104
+ def get_mapping(self) -> Optional[Dict[str, Any]]:
105
+ """Get the index mapping.
106
+
107
+ Returns:
108
+ Dict containing mapping data, or None if failed
109
+ """
110
+ try:
111
+ response = self._make_request("GET", f"{self.index_url}/_mapping")
112
+ if response.status_code == 200:
113
+ return response.json()
114
+ return None
115
+ except Exception:
116
+ return None
117
+
118
+ def get_index_stats(self) -> Optional[Dict[str, Any]]:
119
+ """Get index statistics.
120
+
121
+ Returns:
122
+ Dict containing index statistics, or None if failed
123
+ """
124
+ try:
125
+ response = self._make_request("GET", f"{self.index_url}/_stats")
126
+ if response.status_code == 200:
127
+ return response.json()
128
+ return None
129
+ except Exception:
130
+ return None
131
+
132
+ # Document Operations
133
+
134
+ def index_document(self, document: Dict[str, Any], doc_id: Optional[str] = None) -> bool:
135
+ """Index a document.
136
+
137
+ Args:
138
+ document: Document to index
139
+ doc_id: Optional document ID
140
+
141
+ Returns:
142
+ bool: True if successful, False otherwise
143
+ """
144
+ try:
145
+ if doc_id:
146
+ url = f"{self.index_url}/_doc/{doc_id}"
147
+ else:
148
+ url = f"{self.index_url}/_doc"
149
+
150
+ response = self._make_request("POST", url, json_data=document)
151
+ return response.status_code in [200, 201]
152
+ except Exception:
153
+ return False
154
+
155
+ def bulk_index_documents(self, documents: List[Dict[str, Any]]) -> bool:
156
+ """Bulk index multiple documents.
157
+
158
+ Args:
159
+ documents: List of documents to index
160
+
161
+ Returns:
162
+ bool: True if successful, False otherwise
163
+ """
164
+ try:
165
+ # Prepare bulk request body
166
+ bulk_body = []
167
+ for doc in documents:
168
+ bulk_body.append({"index": {}})
169
+ bulk_body.append(doc)
170
+
171
+ response = self._make_request("POST", f"{self.index_url}/_bulk", json_data=bulk_body)
172
+ return response.status_code == 200
173
+ except Exception:
174
+ return False
175
+
176
+ # Search Operations
177
+
178
+ def search(
179
+ self, query: Dict[str, Any], size: int = 10, from_: int = 0, sort: Optional[List[Dict[str, Any]]] = None
180
+ ) -> Optional[Dict[str, Any]]:
181
+ """Search documents in the index.
182
+
183
+ Args:
184
+ query: Elasticsearch query
185
+ size: Number of results to return
186
+ from_: Starting offset
187
+ sort: Sort specification
188
+
189
+ Returns:
190
+ Dict containing search results, or None if failed
191
+ """
192
+ try:
193
+ search_body = {"query": query, "size": size, "from": from_}
194
+
195
+ if sort:
196
+ search_body["sort"] = sort
197
+
198
+ response = self._make_request("POST", f"{self.index_url}/_search", json_data=search_body)
199
+
200
+ if response.status_code == 200:
201
+ return response.json()
202
+ return None
203
+ except Exception:
204
+ return None
205
+
206
+ def search_by_term(self, field: str, value: Any, size: int = 10) -> Optional[Dict[str, Any]]:
207
+ """Search documents by exact term match.
208
+
209
+ Args:
210
+ field: Field name to search
211
+ value: Value to match
212
+ size: Number of results to return
213
+
214
+ Returns:
215
+ Dict containing search results, or None if failed
216
+ """
217
+ query = {"term": {field: value}}
218
+ return self.search(query, size=size)
219
+
220
+ def search_by_match(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
221
+ """Search documents by text match.
222
+
223
+ Args:
224
+ field: Field name to search
225
+ value: Text to match
226
+ size: Number of results to return
227
+
228
+ Returns:
229
+ Dict containing search results, or None if failed
230
+ """
231
+ query = {"match": {field: value}}
232
+ return self.search(query, size=size)
233
+
234
+ def search_by_match_phrase_prefix(self, field: str, value: str, size: int = 10) -> Optional[Dict[str, Any]]:
235
+ """Search documents by phrase prefix match.
236
+
237
+ Args:
238
+ field: Field name to search
239
+ value: Phrase prefix to match
240
+ size: Number of results to return
241
+
242
+ Returns:
243
+ Dict containing search results, or None if failed
244
+ """
245
+ query = {"match_phrase_prefix": {field: value}}
246
+ return self.search(query, size=size)
247
+
248
+ def search_all(self, size: int = 10) -> Optional[Dict[str, Any]]:
249
+ """Search all documents in the index.
250
+
251
+ Args:
252
+ size: Number of results to return
253
+
254
+ Returns:
255
+ Dict containing search results, or None if failed
256
+ """
257
+ query = {"match_all": {}}
258
+ return self.search(query, size=size)
259
+
260
+ # Health and Status Operations
261
+
262
+ def health_check(self) -> bool:
263
+ """Check if Elasticsearch is healthy.
264
+
265
+ Returns:
266
+ bool: True if healthy, False otherwise
267
+ """
268
+ try:
269
+ response = self._make_request("GET", f"{self.base_url}/_cluster/health")
270
+ return response.status_code == 200
271
+ except Exception:
272
+ return False
273
+
274
+ def get_cluster_info(self) -> Optional[Dict[str, Any]]:
275
+ """Get cluster information.
276
+
277
+ Returns:
278
+ Dict containing cluster info, or None if failed
279
+ """
280
+ try:
281
+ response = self._make_request("GET", f"{self.base_url}/_cluster/health")
282
+ if response.status_code == 200:
283
+ return response.json()
284
+ return None
285
+ except Exception:
286
+ return None
@@ -1,50 +1,92 @@
1
1
  import json
2
2
  import logging
3
3
  import asyncio
4
+ import os
4
5
  import threading
5
6
  from concurrent.futures import ThreadPoolExecutor
6
7
  from typing import Optional, Tuple, Any, Dict
7
8
  from datetime import datetime
8
- from urllib.parse import urlparse
9
- import requests
10
9
 
11
- from eval_protocol.types.remote_rollout_processor import ElasticSearchConfig
10
+ from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
11
+ from .elasticsearch_client import ElasticsearchClient
12
12
 
13
13
 
14
14
  class ElasticsearchDirectHttpHandler(logging.Handler):
15
- def __init__(self, elasticsearch_config: ElasticSearchConfig) -> None:
15
+ def __init__(self, elasticsearch_config: ElasticsearchConfig) -> None:
16
16
  super().__init__()
17
- self.base_url: str = elasticsearch_config.url.rstrip("/")
18
- self.index_name: str = elasticsearch_config.index_name
19
- self.api_key: str = elasticsearch_config.api_key
20
- self.url: str = f"{self.base_url}/{self.index_name}/_doc"
17
+ self.config = ElasticsearchConfig(
18
+ url=elasticsearch_config.url,
19
+ api_key=elasticsearch_config.api_key,
20
+ index_name=elasticsearch_config.index_name,
21
+ )
22
+ self.client = ElasticsearchClient(self.config)
21
23
  self.formatter: logging.Formatter = logging.Formatter()
22
24
  self._executor = None
23
25
 
24
- # Parse URL to determine if we should verify SSL
25
- parsed_url = urlparse(elasticsearch_config.url)
26
- self.verify_ssl = parsed_url.scheme == "https"
27
-
28
26
  def emit(self, record: logging.LogRecord) -> None:
29
27
  """Emit a log record by scheduling it for async transmission."""
30
28
  try:
31
29
  # Create proper ISO 8601 timestamp
32
30
  timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
33
31
 
32
+ rollout_id = self._get_rollout_id(record)
33
+ status_info = self._get_status_info(record)
34
+
34
35
  data: Dict[str, Any] = {
35
36
  "@timestamp": timestamp,
36
37
  "level": record.levelname,
37
38
  "message": record.getMessage(),
38
39
  "logger_name": record.name,
39
- # Add other relevant record attributes if needed
40
+ "rollout_id": rollout_id,
40
41
  }
41
42
 
43
+ # Add status information if present
44
+ if status_info:
45
+ data.update(status_info)
46
+
42
47
  # Schedule the HTTP request to run asynchronously
43
48
  self._schedule_async_send(data, record)
44
49
  except Exception as e:
45
50
  self.handleError(record)
46
51
  print(f"Error preparing log for Elasticsearch: {e}")
47
52
 
53
+ def _get_rollout_id(self, record: logging.LogRecord) -> str:
54
+ """Get the rollout ID from environment variables."""
55
+ rollout_id = os.getenv("EP_ROLLOUT_ID")
56
+ if rollout_id is None:
57
+ raise ValueError(
58
+ "EP_ROLLOUT_ID environment variable is not set but needed for ElasticsearchDirectHttpHandler"
59
+ )
60
+ return rollout_id
61
+
62
+ def _get_status_info(self, record: logging.LogRecord) -> Optional[Dict[str, Any]]:
63
+ """Extract status information from the log record's extra data."""
64
+ # Check if 'status' is in the extra data (passed via extra parameter)
65
+ if hasattr(record, "status") and record.status is not None: # type: ignore
66
+ status = record.status # type: ignore
67
+
68
+ # Handle Status class instances (Pydantic BaseModel)
69
+ if hasattr(status, "code") and hasattr(status, "message"):
70
+ # Status object - extract code and message
71
+ status_code = status.code
72
+ # Handle both enum values and direct integer values
73
+ if hasattr(status_code, "value"):
74
+ status_code = status_code.value
75
+
76
+ return {
77
+ "status_code": status_code,
78
+ "status_message": status.message,
79
+ "status_details": getattr(status, "details", []),
80
+ }
81
+ elif isinstance(status, dict):
82
+ # Dictionary representation of status
83
+ return {
84
+ "status_code": status.get("code"),
85
+ "status_message": status.get("message"),
86
+ "status_details": status.get("details", []),
87
+ }
88
+ return None
89
+
48
90
  def _schedule_async_send(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
49
91
  """Schedule an async task to send the log data to Elasticsearch."""
50
92
  if self._executor is None:
@@ -59,13 +101,9 @@ class ElasticsearchDirectHttpHandler(logging.Handler):
59
101
  def _send_to_elasticsearch(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
60
102
  """Send data to Elasticsearch (runs in thread pool)."""
61
103
  try:
62
- response: requests.Response = requests.post(
63
- self.url,
64
- headers={"Content-Type": "application/json", "Authorization": f"ApiKey {self.api_key}"},
65
- data=json.dumps(data),
66
- verify=self.verify_ssl, # If using HTTPS, verify SSL certificate
67
- )
68
- response.raise_for_status() # Raise an exception for HTTP errors
104
+ success = self.client.index_document(data)
105
+ if not success:
106
+ raise Exception("Failed to index document to Elasticsearch")
69
107
  except Exception as e:
70
108
  # Re-raise to be handled by the callback
71
109
  raise e