eval-protocol 0.2.45.dev0__tar.gz → 0.2.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (425) hide show
  1. {eval_protocol-0.2.45.dev0/eval_protocol.egg-info → eval_protocol-0.2.46}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/fireworks_tracing.py +2 -4
  4. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/evaluation_test.py +22 -1
  5. eval_protocol-0.2.46/eval_protocol/utils/browser_utils.py +114 -0
  6. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/logs_server.py +9 -1
  7. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46/eval_protocol.egg-info}/PKG-INFO +1 -1
  8. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol.egg-info/SOURCES.txt +1 -8
  9. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_show_results_url.py +141 -0
  10. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/__init__.py +0 -10
  11. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/app.py +0 -259
  12. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/auth.py +0 -12
  13. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/langfuse.py +0 -358
  14. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/litellm.py +0 -168
  15. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/main.py +0 -10
  16. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/models.py +0 -51
  17. eval_protocol-0.2.45.dev0/eval_protocol/proxy/proxy_core/redis_utils.py +0 -48
  18. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/LICENSE +0 -0
  19. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/README.md +0 -0
  20. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/development/__init__.py +0 -0
  21. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/development/normalize_sandbox_fusion.py +0 -0
  22. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/development/utils/__init__.py +0 -0
  23. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/development/utils/generate_api_key.py +0 -0
  24. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/development/utils/subprocess_manager.py +0 -0
  25. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/__init__.py +0 -0
  26. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/__main__.py +0 -0
  27. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/__init__.py +0 -0
  28. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/base.py +0 -0
  29. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/bigquery.py +0 -0
  30. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/braintrust.py +0 -0
  31. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/huggingface.py +0 -0
  32. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/langchain.py +0 -0
  33. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/langfuse.py +0 -0
  34. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/langsmith.py +0 -0
  35. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/openai_responses.py +0 -0
  36. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/trl.py +0 -0
  37. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/utils.py +0 -0
  38. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/adapters/weave.py +0 -0
  39. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/__init__.py +0 -0
  40. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/models.py +0 -0
  41. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/orchestrator.py +0 -0
  42. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resource_abc.py +0 -0
  43. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resource_pool.py +0 -0
  44. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/__init__.py +0 -0
  45. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  46. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  47. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  48. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  49. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  50. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/docker_resource.py +0 -0
  51. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  52. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  53. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/resources/sql_resource.py +0 -0
  54. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/task_manager.py +0 -0
  55. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/agent/tool_registry.py +0 -0
  56. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/auth.py +0 -0
  57. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/__init__.py +0 -0
  58. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  59. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  60. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/test_aime25.py +0 -0
  61. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  62. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  63. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  64. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  65. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  66. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli.py +0 -0
  67. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/__init__.py +0 -0
  68. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  69. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/common.py +0 -0
  70. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/deploy.py +0 -0
  71. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  72. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/logs.py +0 -0
  73. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/preview.py +0 -0
  74. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  75. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/cli_commands/upload.py +0 -0
  76. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/common_utils.py +0 -0
  77. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/config.py +0 -0
  78. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/data_loader/__init__.py +0 -0
  79. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  80. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  81. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  82. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/data_loader/models.py +0 -0
  83. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/dataset_logger/__init__.py +0 -0
  84. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  85. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  86. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  87. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  88. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/datasets/__init__.py +0 -0
  89. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/datasets/loader.py +0 -0
  90. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/directory_utils.py +0 -0
  91. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/evaluation.py +0 -0
  92. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/event_bus/__init__.py +0 -0
  93. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/event_bus/event_bus.py +0 -0
  94. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/event_bus/logger.py +0 -0
  95. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  96. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  97. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/execution/__init__.py +0 -0
  98. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/execution/pipeline.py +0 -0
  99. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/gcp_tools.py +0 -0
  100. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/generation/cache.py +0 -0
  101. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/generation/clients/base.py +0 -0
  102. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/generation/clients.py +0 -0
  103. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/generic_server.py +0 -0
  104. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/get_pep440_version.py +0 -0
  105. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/human_id/__init__.py +0 -0
  106. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/human_id/dictionary.py +0 -0
  107. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/integrations/__init__.py +0 -0
  108. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/integrations/deepeval.py +0 -0
  109. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/integrations/openeval.py +0 -0
  110. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/integrations/trl.py +0 -0
  111. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/log_utils/__init__.py +0 -0
  112. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  113. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  114. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  115. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  116. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/log_utils/util.py +0 -0
  117. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/logging_utils.py +0 -0
  118. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/__init__.py +0 -0
  119. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/adapter.py +0 -0
  120. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/client/__init__.py +0 -0
  121. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/client/connection.py +0 -0
  122. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/clients.py +0 -0
  123. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/execution/__init__.py +0 -0
  124. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/execution/base_policy.py +0 -0
  125. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/execution/manager.py +0 -0
  126. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/execution/policy.py +0 -0
  127. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/grid_renderer.py +0 -0
  128. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  129. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/mcpgym.py +0 -0
  130. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/process_manager.py +0 -0
  131. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/session/__init__.py +0 -0
  132. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/session/manager.py +0 -0
  133. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/simple_process_manager.py +0 -0
  134. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp/simulation_server.py +0 -0
  135. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/__init__.py +0 -0
  136. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/config.py +0 -0
  137. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/main.py +0 -0
  138. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  139. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  140. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  141. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  142. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_env.py +0 -0
  143. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/__init__.py +0 -0
  144. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  145. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  146. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  147. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  148. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  149. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  150. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  151. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  152. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  153. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  154. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  155. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  156. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  157. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  158. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/models.py +0 -0
  159. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/packaging.py +0 -0
  160. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/platform_api.py +0 -0
  161. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/playback_policy.py +0 -0
  162. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/__init__.py +0 -0
  163. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  164. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  165. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  166. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  167. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  168. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  169. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  170. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  171. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  172. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  173. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/exception_config.py +0 -0
  174. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/execution.py +0 -0
  175. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  176. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  177. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/parameterize.py +0 -0
  178. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/plugin.py +0 -0
  179. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  180. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/rollout_processor.py +0 -0
  181. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/store_experiment_link.py +0 -0
  182. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/store_results_url.py +0 -0
  183. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/types.py +0 -0
  184. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/utils.py +0 -0
  185. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/pytest/validate_signature.py +0 -0
  186. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/__init__.py +0 -0
  187. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/llm_judge.py +0 -0
  188. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  189. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
  190. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
  191. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
  192. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/quickstart/utils.py +0 -0
  193. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/resources.py +0 -0
  194. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/reward_function.py +0 -0
  195. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/__init__.py +0 -0
  196. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/accuracy.py +0 -0
  197. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/accuracy_length.py +0 -0
  198. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  199. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  200. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/apps_testing_util.py +0 -0
  201. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/bfcl_reward.py +0 -0
  202. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/code_execution.py +0 -0
  203. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/code_execution_utils.py +0 -0
  204. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/cpp_code.py +0 -0
  205. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  206. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/format.py +0 -0
  207. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/function_calling.py +0 -0
  208. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/json_schema.py +0 -0
  209. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/language_consistency.py +0 -0
  210. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/lean_prover.py +0 -0
  211. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/length.py +0 -0
  212. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  213. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/math.py +0 -0
  214. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  215. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/reasoning_steps.py +0 -0
  216. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/repetition.py +0 -0
  217. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rewards/tag_count.py +0 -0
  218. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/rl_processing.py +0 -0
  219. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/server.py +0 -0
  220. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/stats/__init__.py +0 -0
  221. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/stats/confidence_intervals.py +0 -0
  222. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/typed_interface.py +0 -0
  223. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/types/__init__.py +0 -0
  224. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/types/errors.py +0 -0
  225. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/types/remote_rollout_processor.py +0 -0
  226. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/types/types.py +0 -0
  227. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/__init__.py +0 -0
  228. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/batch_evaluation.py +0 -0
  229. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/batch_transformation.py +0 -0
  230. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/check_server_status.py +0 -0
  231. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/dataset_helpers.py +0 -0
  232. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/logs_models.py +0 -0
  233. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/module_loader.py +0 -0
  234. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/packaging_utils.py +0 -0
  235. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/show_results_url.py +0 -0
  236. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/static_policy.py +0 -0
  237. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/subprocess_utils.py +0 -0
  238. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol/utils/vite_server.py +0 -0
  239. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol.egg-info/dependency_links.txt +0 -0
  240. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol.egg-info/entry_points.txt +0 -0
  241. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol.egg-info/requires.txt +0 -0
  242. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/eval_protocol.egg-info/top_level.txt +0 -0
  243. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/pyproject.toml +0 -0
  244. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/setup.cfg +0 -0
  245. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/setup.py +0 -0
  246. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_accuracy.py +0 -0
  247. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_accuracy_length.py +0 -0
  248. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_adapters_e2e.py +0 -0
  249. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_agent_orchestrator.py +0 -0
  250. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_agent_resources.py +0 -0
  251. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_auth.py +0 -0
  252. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_batch_evaluation.py +0 -0
  253. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_cli.py +0 -0
  254. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_cli_agent.py +0 -0
  255. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_cli_args.py +0 -0
  256. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_code_execution.py +0 -0
  257. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_config.py +0 -0
  258. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_control_plane_separation.py +0 -0
  259. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_cpp_code.py +0 -0
  260. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_data_driven_task_manager.py +0 -0
  261. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_deepcoder_reward.py +0 -0
  262. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_deepeval_integration.py +0 -0
  263. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_deploy_integration.py +0 -0
  264. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_directory_utils.py +0 -0
  265. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_e2b_integration.py +0 -0
  266. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_e2b_js_integration.py +0 -0
  267. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_edge_cases.py +0 -0
  268. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_eval_protocol_import.py +0 -0
  269. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_evaluation.py +0 -0
  270. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_evaluation_integration.py +0 -0
  271. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_evaluation_postprocess.py +0 -0
  272. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_evaluation_preview_integration.py +0 -0
  273. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_event_bus.py +0 -0
  274. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_event_bus_helper.py +0 -0
  275. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_examples_end_to_end.py +0 -0
  276. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_fireworks_api.py +0 -0
  277. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_format.py +0 -0
  278. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_fractional_code.py +0 -0
  279. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_function_calling.py +0 -0
  280. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_gcp_tools.py +0 -0
  281. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_generic_server.py +0 -0
  282. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_human_id.py +0 -0
  283. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_integration.py +0 -0
  284. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_json_schema.py +0 -0
  285. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_kwargs_validation.py +0 -0
  286. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_language_consistency.py +0 -0
  287. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_lean_prover.py +0 -0
  288. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_lean_prover_runner.py +0 -0
  289. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_length.py +0 -0
  290. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_list_comparison_math_reward.py +0 -0
  291. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_logs_server.py +0 -0
  292. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_logs_server_simple.py +0 -0
  293. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_math.py +0 -0
  294. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_minimal.py +0 -0
  295. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_models.py +0 -0
  296. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_models_rl.py +0 -0
  297. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_multiple_choice_math_reward.py +0 -0
  298. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_n_variant_batch_integration.py +0 -0
  299. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_n_variant_integration.py +0 -0
  300. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_openai_compatibility.py +0 -0
  301. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_openeval_integration.py +0 -0
  302. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_packaging.py +0 -0
  303. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_parallel_rollouts.py +0 -0
  304. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_platform_api.py +0 -0
  305. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_quickstart_utils.py +0 -0
  306. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_readiness.py +0 -0
  307. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_reasoning_steps.py +0 -0
  308. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_repetition.py +0 -0
  309. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_repetition_debug.py +0 -0
  310. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_retry_mechanism.py +0 -0
  311. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_reward_function.py +0 -0
  312. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_reward_protocol_import.py +0 -0
  313. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_rl_processing.py +0 -0
  314. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_rollout_control_plane_integration.py +0 -0
  315. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_server.py +0 -0
  316. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_status_migration_changes.py +0 -0
  317. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_status_migration_integration.py +0 -0
  318. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_status_model.py +0 -0
  319. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_tag_count.py +0 -0
  320. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_tau_bench_airline_smoke.py +0 -0
  321. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_typed_interface.py +0 -0
  322. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_typed_interface_rl.py +0 -0
  323. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_upload_entrypoint.py +0 -0
  324. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_url_handling.py +0 -0
  325. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/tests/test_vite_server.py +0 -0
  326. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/__init__.py +0 -0
  327. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/agent/__init__.py +0 -0
  328. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/agent/base.py +0 -0
  329. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/agent/llm_agent.py +0 -0
  330. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/api_service/__init__.py +0 -0
  331. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/api_service/api_config.py +0 -0
  332. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/api_service/data_model.py +0 -0
  333. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/api_service/simulation_service.py +0 -0
  334. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/cli.py +0 -0
  335. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/config.py +0 -0
  336. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/airline/policy.md +0 -0
  337. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/mock/policy.md +0 -0
  338. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  339. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/retail/policy.md +0 -0
  340. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  341. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  342. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  343. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  344. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  345. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  346. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  347. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data_model/__init__.py +0 -0
  348. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data_model/message.py +0 -0
  349. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data_model/simulation.py +0 -0
  350. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/data_model/tasks.py +0 -0
  351. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/__init__.py +0 -0
  352. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/airline/__init__.py +0 -0
  353. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/airline/data_model.py +0 -0
  354. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/airline/environment.py +0 -0
  355. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/airline/tools.py +0 -0
  356. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/airline/utils.py +0 -0
  357. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/mock/__init__.py +0 -0
  358. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/mock/data_model.py +0 -0
  359. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/mock/environment.py +0 -0
  360. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/mock/tools.py +0 -0
  361. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/mock/utils.py +0 -0
  362. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/retail/__init__.py +0 -0
  363. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/retail/data_model.py +0 -0
  364. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/retail/environment.py +0 -0
  365. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/retail/tools.py +0 -0
  366. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/retail/utils.py +0 -0
  367. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/__init__.py +0 -0
  368. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/data_model.py +0 -0
  369. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/environment.py +0 -0
  370. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  371. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  372. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  373. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  374. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  375. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  376. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  377. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  378. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/tools.py +0 -0
  379. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  380. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  381. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/domains/telecom/utils.py +0 -0
  382. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/__init__.py +0 -0
  383. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/db.py +0 -0
  384. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/environment.py +0 -0
  385. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/server.py +0 -0
  386. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/tool.py +0 -0
  387. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/toolkit.py +0 -0
  388. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  389. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/__init__.py +0 -0
  390. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/evaluator.py +0 -0
  391. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  392. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  393. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  394. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  395. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  396. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/metrics/__init__.py +0 -0
  397. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/metrics/agent_metrics.py +0 -0
  398. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  399. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/orchestrator/__init__.py +0 -0
  400. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  401. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  402. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/orchestrator/utils.py +0 -0
  403. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/registry.py +0 -0
  404. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/run.py +0 -0
  405. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/scripts/__init__.py +0 -0
  406. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/scripts/check_data.py +0 -0
  407. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  408. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/scripts/start_servers.py +0 -0
  409. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/scripts/view_simulations.py +0 -0
  410. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/user/__init__.py +0 -0
  411. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/user/base.py +0 -0
  412. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/user/user_simulator.py +0 -0
  413. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/utils/__init__.py +0 -0
  414. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/utils/display.py +0 -0
  415. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/utils/io_utils.py +0 -0
  416. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/utils/llm_utils.py +0 -0
  417. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/utils/pydantic_utils.py +0 -0
  418. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vendor/tau2/utils/utils.py +0 -0
  419. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/versioneer.py +0 -0
  420. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  421. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vite-app/dist/assets/index-C81y9r9l.js +0 -0
  422. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vite-app/dist/assets/index-C81y9r9l.js.map +0 -0
  423. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
  424. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  425. {eval_protocol-0.2.45.dev0 → eval_protocol-0.2.46}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.45.dev0
3
+ Version: 0.2.46
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-09T01:23:30-0700",
11
+ "date": "2025-10-09T13:58:14-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "c2ec0c8bb3f927b3c7f77c8a0e4fb955c7685ea6",
15
- "version": "0.2.45-dev"
14
+ "full-revisionid": "e066febd15f9056f74c40c8f4c34d3c68768fd59",
15
+ "version": "0.2.46"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -7,9 +7,9 @@ to pull data from Langfuse deployments with simplified retry logic handling.
7
7
  from __future__ import annotations
8
8
  import logging
9
9
  import requests
10
+ import time
10
11
  from datetime import datetime
11
12
  from typing import Any, Dict, List, Optional, Protocol
12
- import os
13
13
 
14
14
  from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
15
15
  from .base import BaseAdapter
@@ -349,11 +349,9 @@ class FireworksTracingAdapter(BaseAdapter):
349
349
  else:
350
350
  url = f"{self.base_url}/v1/traces"
351
351
 
352
- headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
353
-
354
352
  result = None
355
353
  try:
356
- response = requests.get(url, params=params, timeout=self.timeout, headers=headers)
354
+ response = requests.get(url, params=params, timeout=self.timeout)
357
355
  response.raise_for_status()
358
356
  result = response.json()
359
357
  except requests.exceptions.HTTPError as e:
@@ -62,7 +62,8 @@ from eval_protocol.pytest.utils import (
62
62
  run_tasks_with_eval_progress,
63
63
  run_tasks_with_run_progress,
64
64
  )
65
- from eval_protocol.utils.show_results_url import store_local_ui_results_url
65
+ from eval_protocol.utils.show_results_url import store_local_ui_results_url, generate_invocation_filter_url
66
+ from eval_protocol.utils.browser_utils import is_logs_server_running, open_browser_tab
66
67
 
67
68
  from ..common_utils import load_jsonl
68
69
 
@@ -80,6 +81,7 @@ def evaluation_test(
80
81
  rollout_processor_kwargs: RolloutProcessorInputParam | None = None,
81
82
  aggregation_method: AggregationMethod = "mean",
82
83
  passed_threshold: EvaluationThreshold | float | EvaluationThresholdDict | None = None,
84
+ disable_browser_open: bool = False,
83
85
  num_runs: int = 1,
84
86
  filtered_row_ids: Sequence[str] | None = None,
85
87
  max_dataset_rows: int | None = None,
@@ -246,10 +248,29 @@ def evaluation_test(
246
248
  else:
247
249
  invocation_id = generate_id()
248
250
 
251
+ # Track whether we've opened browser for this invocation
252
+ browser_opened_for_invocation = False
253
+
249
254
  async def wrapper_body(**kwargs: Unpack[ParameterizedTestKwargs]) -> None:
255
+ nonlocal browser_opened_for_invocation
256
+
250
257
  # Store URL for viewing results (after all postprocessing is complete)
251
258
  store_local_ui_results_url(invocation_id)
252
259
 
260
+ # Auto-open browser if server is running and not disabled (only once per invocation)
261
+ if (
262
+ not browser_opened_for_invocation
263
+ and not disable_browser_open
264
+ and os.environ.get("EP_DISABLE_AUTO_BROWSER") is None
265
+ ):
266
+ is_running, port = is_logs_server_running()
267
+ if is_running:
268
+ # Generate URL for table view with invocation filter
269
+ base_url = f"http://localhost:{port}" if port else "http://localhost:8000"
270
+ table_url = generate_invocation_filter_url(invocation_id, f"{base_url}/table")
271
+ open_browser_tab(table_url)
272
+ browser_opened_for_invocation = True
273
+
253
274
  eval_metadata = None
254
275
 
255
276
  all_results: list[list[EvaluationRow]] = [[] for _ in range(num_runs)]
@@ -0,0 +1,114 @@
1
+ """
2
+ Browser utilities for auto-opening evaluation results in the local UI.
3
+ """
4
+
5
+ import json
6
+ import os
7
+ import threading
8
+ import time
9
+ import webbrowser
10
+ from pathlib import Path
11
+ from typing import Tuple, Optional
12
+
13
+ try:
14
+ import psutil
15
+
16
+ PSUTIL_AVAILABLE = True
17
+ except ImportError:
18
+ PSUTIL_AVAILABLE = False
19
+
20
+
21
+ def _get_pid_file_path() -> Path:
22
+ """Get the path to the logs server PID file."""
23
+ from eval_protocol.directory_utils import find_eval_protocol_dir
24
+
25
+ return Path(find_eval_protocol_dir()) / "logs_server.pid"
26
+
27
+
28
+ def write_pid_file(pid: int, port: int) -> None:
29
+ """
30
+ Write the server PID and port to a file for external processes to check.
31
+
32
+ Args:
33
+ pid: The process ID of the logs server
34
+ port: The port the server is running on
35
+ """
36
+ try:
37
+ pid_file = _get_pid_file_path()
38
+
39
+ data = {"pid": pid, "port": port}
40
+
41
+ with open(pid_file, "w") as f:
42
+ json.dump(data, f)
43
+
44
+ # Use print instead of logger to avoid circular imports
45
+ print(f"Wrote PID file: {pid_file} with PID {pid} and port {port}")
46
+ except Exception as e:
47
+ print(f"Warning: Failed to write PID file: {e}")
48
+
49
+
50
+ def is_logs_server_running() -> Tuple[bool, Optional[int]]:
51
+ """
52
+ Check if the logs server is running by reading the PID file and verifying the process.
53
+
54
+ Returns:
55
+ Tuple of (is_running, port) where:
56
+ - is_running: True if server is running, False otherwise
57
+ - port: The port the server is running on, or None if not running
58
+ """
59
+ if not PSUTIL_AVAILABLE:
60
+ return False, None
61
+
62
+ pid_file = _get_pid_file_path()
63
+ if not pid_file.exists():
64
+ return False, None
65
+
66
+ try:
67
+ with open(pid_file, "r") as f:
68
+ data = json.load(f)
69
+ pid = data.get("pid")
70
+ port = data.get("port")
71
+ except (json.JSONDecodeError, KeyError, FileNotFoundError):
72
+ return False, None
73
+
74
+ if pid is None:
75
+ return False, None
76
+
77
+ try:
78
+ # Check if the process is still running
79
+ process = psutil.Process(pid)
80
+ if not process.is_running():
81
+ return False, None
82
+
83
+ # Optionally verify it's listening on the expected port
84
+ if port is not None:
85
+ try:
86
+ connections = process.net_connections()
87
+ for conn in connections:
88
+ if conn.laddr.port == port and conn.status == "LISTEN":
89
+ return True, port
90
+ except (psutil.AccessDenied, psutil.NoSuchProcess):
91
+ # If we can't check connections, assume it's running if process exists
92
+ pass
93
+
94
+ return True, port
95
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
96
+ return False, None
97
+
98
+
99
+ def open_browser_tab(url: str, delay: float = 0.5) -> None:
100
+ """
101
+ Open a URL in a new browser tab with an optional delay.
102
+
103
+ Args:
104
+ url: The URL to open
105
+ delay: Delay in seconds before opening browser (default: 0.5)
106
+ """
107
+
108
+ def _open():
109
+ time.sleep(delay) # Give the server time to start
110
+ webbrowser.open_new_tab(url)
111
+
112
+ thread = threading.Thread(target=_open)
113
+ thread.daemon = True
114
+ thread.start()
@@ -6,6 +6,7 @@ import threading
6
6
  import time
7
7
  from datetime import datetime
8
8
  from contextlib import asynccontextmanager
9
+ from pathlib import Path
9
10
  from queue import Queue
10
11
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
11
12
 
@@ -23,6 +24,7 @@ from eval_protocol.utils.vite_server import ViteServer
23
24
  from eval_protocol.log_utils.elasticsearch_client import ElasticsearchClient
24
25
  from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
25
26
  from eval_protocol.utils.logs_models import LogEntry, LogsResponse
27
+ from eval_protocol.utils.browser_utils import write_pid_file
26
28
 
27
29
  if TYPE_CHECKING:
28
30
  from eval_protocol.models import EvaluationRow
@@ -378,7 +380,7 @@ class LogsServer(ViteServer):
378
380
  event_bus.subscribe(self._handle_event)
379
381
  logger.debug("[LOGS_SERVER_INIT] Successfully subscribed to event bus")
380
382
 
381
- logger.info(f"[LOGS_SERVER_INIT] LogsServer initialized on {host}:{port}")
383
+ logger.info(f"[LOGS_SERVER_INIT] LogsServer initialized on {self.host}:{self.port}")
382
384
 
383
385
  def _setup_websocket_routes(self):
384
386
  """Set up WebSocket routes for real-time communication."""
@@ -541,6 +543,12 @@ class LogsServer(ViteServer):
541
543
  )
542
544
 
543
545
  server = uvicorn.Server(config)
546
+
547
+ # Write PID file after server is configured but before serving
548
+ logger.debug(f"[LOGS_SERVER_RUN_ASYNC] Writing PID file for port {self.port}")
549
+ write_pid_file(os.getpid(), self.port)
550
+ logger.debug(f"[LOGS_SERVER_RUN_ASYNC] Successfully wrote PID file for port {self.port}")
551
+
544
552
  await server.serve()
545
553
 
546
554
  except KeyboardInterrupt:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.45.dev0
3
+ Version: 0.2.46
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -165,14 +165,6 @@ eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py
165
165
  eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md
166
166
  eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md
167
167
  eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md
168
- eval_protocol/proxy/proxy_core/__init__.py
169
- eval_protocol/proxy/proxy_core/app.py
170
- eval_protocol/proxy/proxy_core/auth.py
171
- eval_protocol/proxy/proxy_core/langfuse.py
172
- eval_protocol/proxy/proxy_core/litellm.py
173
- eval_protocol/proxy/proxy_core/main.py
174
- eval_protocol/proxy/proxy_core/models.py
175
- eval_protocol/proxy/proxy_core/redis_utils.py
176
168
  eval_protocol/pytest/__init__.py
177
169
  eval_protocol/pytest/default_agent_rollout_processor.py
178
170
  eval_protocol/pytest/default_dataset_adapter.py
@@ -237,6 +229,7 @@ eval_protocol/types/types.py
237
229
  eval_protocol/utils/__init__.py
238
230
  eval_protocol/utils/batch_evaluation.py
239
231
  eval_protocol/utils/batch_transformation.py
232
+ eval_protocol/utils/browser_utils.py
240
233
  eval_protocol/utils/check_server_status.py
241
234
  eval_protocol/utils/dataset_helpers.py
242
235
  eval_protocol/utils/logs_models.py
@@ -6,6 +6,13 @@ import socket
6
6
  from unittest.mock import patch, MagicMock
7
7
  import pytest
8
8
 
9
+ try:
10
+ import psutil
11
+
12
+ PSUTIL_AVAILABLE = True
13
+ except ImportError:
14
+ PSUTIL_AVAILABLE = False
15
+
9
16
  from eval_protocol.utils.show_results_url import (
10
17
  is_server_running,
11
18
  generate_invocation_filter_url,
@@ -193,3 +200,137 @@ class TestIntegration:
193
200
  assert "table" in call_args[2]
194
201
  assert "integration-test" in call_args[1]
195
202
  assert "integration-test" in call_args[2]
203
+
204
+
205
+ class TestBrowserUtilities:
206
+ """Test browser utility functions."""
207
+
208
+ def test_get_pid_file_path(self):
209
+ """Test PID file path generation."""
210
+ from eval_protocol.utils.browser_utils import _get_pid_file_path
211
+ from eval_protocol.directory_utils import find_eval_protocol_dir
212
+ from pathlib import Path
213
+
214
+ pid_file = _get_pid_file_path()
215
+ expected = Path(find_eval_protocol_dir()) / "logs_server.pid"
216
+ assert pid_file == expected
217
+
218
+ def test_is_logs_server_running_no_pid_file(self, tmp_path, monkeypatch):
219
+ """Test server detection when PID file doesn't exist."""
220
+ from eval_protocol.utils.browser_utils import is_logs_server_running
221
+
222
+ # Mock the PID file path to a non-existent file
223
+ monkeypatch.setattr(
224
+ "eval_protocol.utils.browser_utils._get_pid_file_path", lambda: tmp_path / "nonexistent.pid"
225
+ )
226
+
227
+ is_running, port = is_logs_server_running()
228
+ assert not is_running
229
+ assert port is None
230
+
231
+ def test_is_logs_server_running_invalid_pid_file(self, tmp_path, monkeypatch):
232
+ """Test server detection with invalid PID file content."""
233
+ from eval_protocol.utils.browser_utils import is_logs_server_running
234
+
235
+ # Create invalid PID file
236
+ pid_file = tmp_path / "invalid.pid"
237
+ pid_file.write_text("invalid json")
238
+ monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
239
+
240
+ is_running, port = is_logs_server_running()
241
+ assert not is_running
242
+ assert port is None
243
+
244
+ def test_is_logs_server_running_missing_pid_key(self, tmp_path, monkeypatch):
245
+ """Test server detection with PID file missing required keys."""
246
+ from eval_protocol.utils.browser_utils import is_logs_server_running
247
+ import json
248
+
249
+ # Create PID file with missing pid key
250
+ pid_file = tmp_path / "missing_pid.pid"
251
+ pid_file.write_text(json.dumps({"port": 8000}))
252
+ monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
253
+
254
+ is_running, port = is_logs_server_running()
255
+ assert not is_running
256
+ assert port is None
257
+
258
+ @pytest.mark.skipif(not PSUTIL_AVAILABLE, reason="psutil not available")
259
+ def test_is_logs_server_running_nonexistent_process(self, tmp_path, monkeypatch):
260
+ """Test server detection with PID file pointing to non-existent process."""
261
+ from eval_protocol.utils.browser_utils import is_logs_server_running
262
+ import json
263
+
264
+ # Create PID file with non-existent PID
265
+ pid_file = tmp_path / "nonexistent_process.pid"
266
+ pid_file.write_text(json.dumps({"pid": 999999, "port": 8000}))
267
+ monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
268
+
269
+ is_running, port = is_logs_server_running()
270
+ assert not is_running
271
+ assert port is None
272
+
273
+ @pytest.mark.skipif(not PSUTIL_AVAILABLE, reason="psutil not available")
274
+ def test_is_logs_server_running_current_process(self, tmp_path, monkeypatch):
275
+ """Test server detection with PID file pointing to current process."""
276
+ from eval_protocol.utils.browser_utils import is_logs_server_running
277
+ import json
278
+ import os
279
+
280
+ # Create PID file with current process PID
281
+ pid_file = tmp_path / "current_process.pid"
282
+ pid_file.write_text(json.dumps({"pid": os.getpid(), "port": 8000}))
283
+ monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
284
+
285
+ is_running, port = is_logs_server_running()
286
+ assert is_running
287
+ assert port == 8000
288
+
289
+ def test_open_browser_tab(self, monkeypatch):
290
+ """Test browser tab opening."""
291
+ from eval_protocol.utils.browser_utils import open_browser_tab
292
+
293
+ opened_urls = []
294
+
295
+ def mock_open_new_tab(url):
296
+ opened_urls.append(url)
297
+
298
+ monkeypatch.setattr("webbrowser.open_new_tab", mock_open_new_tab)
299
+
300
+ # Test with delay
301
+ open_browser_tab("http://example.com", delay=0.01)
302
+
303
+ # Wait a bit for the thread to execute
304
+ import time
305
+
306
+ time.sleep(0.02)
307
+
308
+ assert len(opened_urls) == 1
309
+ assert opened_urls[0] == "http://example.com"
310
+
311
+
312
+ class TestLogsServerPidFile:
313
+ """Test logs server PID file functionality."""
314
+
315
+ def test_write_pid_file(self, tmp_path, monkeypatch):
316
+ """Test PID file writing."""
317
+ from eval_protocol.utils.browser_utils import write_pid_file
318
+ import json
319
+
320
+ # Mock the find_eval_protocol_dir function
321
+ monkeypatch.setattr("eval_protocol.directory_utils.find_eval_protocol_dir", lambda: str(tmp_path))
322
+
323
+ # Test writing PID file
324
+ write_pid_file(12345, 8000)
325
+
326
+ # Check that PID file was created
327
+ pid_file = tmp_path / "logs_server.pid"
328
+ assert pid_file.exists()
329
+
330
+ # Check content
331
+ with open(pid_file, "r") as f:
332
+ data = json.load(f)
333
+ assert "pid" in data
334
+ assert "port" in data
335
+ assert data["port"] == 8000
336
+ assert data["pid"] == 12345
@@ -1,10 +0,0 @@
1
- from .models import ProxyConfig
2
- from .app import create_app
3
- from .auth import AuthProvider, NoAuthProvider
4
-
5
- __all__ = [
6
- "ProxyConfig",
7
- "create_app",
8
- "AuthProvider",
9
- "NoAuthProvider",
10
- ]