eval-protocol 0.2.69.dev3__tar.gz → 0.2.70__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (441) hide show
  1. {eval_protocol-0.2.69.dev3/eval_protocol.egg-info → eval_protocol-0.2.70}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/models.py +35 -3
  4. eval_protocol-0.2.70/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +177 -0
  5. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70/eval_protocol.egg-info}/PKG-INFO +1 -1
  6. eval_protocol-0.2.69.dev3/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -162
  7. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/LICENSE +0 -0
  8. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/README.md +0 -0
  9. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/development/__init__.py +0 -0
  10. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/development/normalize_sandbox_fusion.py +0 -0
  11. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/development/utils/__init__.py +0 -0
  12. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/development/utils/generate_api_key.py +0 -0
  13. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/development/utils/subprocess_manager.py +0 -0
  14. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/__init__.py +0 -0
  15. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/__main__.py +0 -0
  16. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/__init__.py +0 -0
  17. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/base.py +0 -0
  18. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/bigquery.py +0 -0
  19. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/braintrust.py +0 -0
  20. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/fireworks_tracing.py +0 -0
  21. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/huggingface.py +0 -0
  22. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/langchain.py +0 -0
  23. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/langfuse.py +0 -0
  24. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/langsmith.py +0 -0
  25. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/openai_responses.py +0 -0
  26. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/trl.py +0 -0
  27. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/utils.py +0 -0
  28. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/adapters/weave.py +0 -0
  29. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/__init__.py +0 -0
  30. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/models.py +0 -0
  31. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/orchestrator.py +0 -0
  32. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resource_abc.py +0 -0
  33. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resource_pool.py +0 -0
  34. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/__init__.py +0 -0
  35. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  36. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  37. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  38. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  39. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  40. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/docker_resource.py +0 -0
  41. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  42. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  43. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/resources/sql_resource.py +0 -0
  44. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/task_manager.py +0 -0
  45. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/agent/tool_registry.py +0 -0
  46. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/auth.py +0 -0
  47. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/__init__.py +0 -0
  48. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  49. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  50. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/test_aime25.py +0 -0
  51. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  52. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  53. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  54. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  55. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  56. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli.py +0 -0
  57. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/__init__.py +0 -0
  58. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  59. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/common.py +0 -0
  60. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/create_rft.py +0 -0
  61. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/deploy.py +0 -0
  62. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  63. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/logs.py +0 -0
  64. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/preview.py +0 -0
  65. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  66. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/cli_commands/upload.py +0 -0
  67. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/common_utils.py +0 -0
  68. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/config.py +0 -0
  69. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/data_loader/__init__.py +0 -0
  70. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  71. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  72. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  73. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/data_loader/models.py +0 -0
  74. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/dataset_logger/__init__.py +0 -0
  75. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  76. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  77. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  78. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  79. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/datasets/__init__.py +0 -0
  80. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/datasets/loader.py +0 -0
  81. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/directory_utils.py +0 -0
  82. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/evaluation.py +0 -0
  83. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/event_bus/__init__.py +0 -0
  84. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/event_bus/event_bus.py +0 -0
  85. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/event_bus/logger.py +0 -0
  86. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  87. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  88. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/execution/__init__.py +0 -0
  89. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/execution/pipeline.py +0 -0
  90. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/fireworks_rft.py +0 -0
  91. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/gcp_tools.py +0 -0
  92. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/generation/cache.py +0 -0
  93. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/generation/clients/base.py +0 -0
  94. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/generation/clients.py +0 -0
  95. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/generic_server.py +0 -0
  96. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/get_pep440_version.py +0 -0
  97. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/human_id/__init__.py +0 -0
  98. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/human_id/dictionary.py +0 -0
  99. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/integrations/__init__.py +0 -0
  100. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/integrations/deepeval.py +0 -0
  101. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/integrations/openeval.py +0 -0
  102. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/integrations/trl.py +0 -0
  103. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/__init__.py +0 -0
  104. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  105. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  106. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  107. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  108. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/init.py +0 -0
  109. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/rollout_context.py +0 -0
  110. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  111. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/log_utils/util.py +0 -0
  112. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/logging_utils.py +0 -0
  113. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/__init__.py +0 -0
  114. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/adapter.py +0 -0
  115. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/client/__init__.py +0 -0
  116. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/client/connection.py +0 -0
  117. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/clients.py +0 -0
  118. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/execution/__init__.py +0 -0
  119. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/execution/base_policy.py +0 -0
  120. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/execution/manager.py +0 -0
  121. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/execution/policy.py +0 -0
  122. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/grid_renderer.py +0 -0
  123. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  124. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/mcpgym.py +0 -0
  125. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/process_manager.py +0 -0
  126. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/session/__init__.py +0 -0
  127. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/session/manager.py +0 -0
  128. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/simple_process_manager.py +0 -0
  129. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp/simulation_server.py +0 -0
  130. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/__init__.py +0 -0
  131. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/config.py +0 -0
  132. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/main.py +0 -0
  133. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  134. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  135. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  136. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  137. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_env.py +0 -0
  138. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/__init__.py +0 -0
  139. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  140. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  141. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  142. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  143. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  144. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  145. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  146. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  147. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  148. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  149. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  150. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  151. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  152. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  153. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/packaging.py +0 -0
  154. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/platform_api.py +0 -0
  155. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/playback_policy.py +0 -0
  156. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/__init__.py +0 -0
  157. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  158. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/app.py +0 -0
  159. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  160. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  161. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  162. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/main.py +0 -0
  163. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/models.py +0 -0
  164. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  165. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/__init__.py +0 -0
  166. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  167. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  168. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  169. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  170. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  171. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  172. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  173. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  174. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  175. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/evaluation_test.py +0 -0
  176. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  177. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  178. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/exception_config.py +0 -0
  179. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/execution.py +0 -0
  180. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  181. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  182. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  183. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/parameterize.py +0 -0
  184. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/plugin.py +0 -0
  185. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  186. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/rollout_processor.py +0 -0
  187. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/store_experiment_link.py +0 -0
  188. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/store_results_url.py +0 -0
  189. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/tracing_utils.py +0 -0
  190. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/types.py +0 -0
  191. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/pytest/validate_signature.py +0 -0
  192. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/__init__.py +0 -0
  193. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  194. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  195. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  196. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  197. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  198. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  199. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  200. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/llm_judge.py +0 -0
  201. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  202. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/quickstart/utils.py +0 -0
  203. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/resources.py +0 -0
  204. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/reward_function.py +0 -0
  205. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/__init__.py +0 -0
  206. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/accuracy.py +0 -0
  207. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/accuracy_length.py +0 -0
  208. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  209. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  210. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/apps_testing_util.py +0 -0
  211. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/bfcl_reward.py +0 -0
  212. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/code_execution.py +0 -0
  213. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/code_execution_utils.py +0 -0
  214. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/cpp_code.py +0 -0
  215. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  216. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/format.py +0 -0
  217. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/function_calling.py +0 -0
  218. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/json_schema.py +0 -0
  219. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/language_consistency.py +0 -0
  220. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/lean_prover.py +0 -0
  221. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/length.py +0 -0
  222. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  223. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/math.py +0 -0
  224. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  225. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/reasoning_steps.py +0 -0
  226. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/repetition.py +0 -0
  227. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rewards/tag_count.py +0 -0
  228. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/rl_processing.py +0 -0
  229. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/server.py +0 -0
  230. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/stats/__init__.py +0 -0
  231. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/stats/confidence_intervals.py +0 -0
  232. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/typed_interface.py +0 -0
  233. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/types/__init__.py +0 -0
  234. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/types/errors.py +0 -0
  235. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/types/remote_rollout_processor.py +0 -0
  236. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/types/types.py +0 -0
  237. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/__init__.py +0 -0
  238. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/batch_evaluation.py +0 -0
  239. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/batch_transformation.py +0 -0
  240. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/browser_utils.py +0 -0
  241. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/check_server_status.py +0 -0
  242. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/dataset_helpers.py +0 -0
  243. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  244. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/logs_models.py +0 -0
  245. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/logs_server.py +0 -0
  246. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/module_loader.py +0 -0
  247. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/packaging_utils.py +0 -0
  248. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/show_results_url.py +0 -0
  249. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/static_policy.py +0 -0
  250. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/subprocess_utils.py +0 -0
  251. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol/utils/vite_server.py +0 -0
  252. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol.egg-info/SOURCES.txt +0 -0
  253. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol.egg-info/dependency_links.txt +0 -0
  254. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol.egg-info/entry_points.txt +0 -0
  255. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol.egg-info/requires.txt +0 -0
  256. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/eval_protocol.egg-info/top_level.txt +0 -0
  257. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/pyproject.toml +0 -0
  258. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/setup.cfg +0 -0
  259. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/setup.py +0 -0
  260. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_accuracy.py +0 -0
  261. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_accuracy_length.py +0 -0
  262. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_adapters_e2e.py +0 -0
  263. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_agent_orchestrator.py +0 -0
  264. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_agent_resources.py +0 -0
  265. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_auth.py +0 -0
  266. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_batch_evaluation.py +0 -0
  267. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_cli.py +0 -0
  268. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_cli_agent.py +0 -0
  269. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_cli_args.py +0 -0
  270. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_code_execution.py +0 -0
  271. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_config.py +0 -0
  272. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_control_plane_separation.py +0 -0
  273. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_cpp_code.py +0 -0
  274. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_data_driven_task_manager.py +0 -0
  275. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_deepcoder_reward.py +0 -0
  276. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_deepeval_integration.py +0 -0
  277. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_deploy_integration.py +0 -0
  278. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_directory_utils.py +0 -0
  279. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_e2b_integration.py +0 -0
  280. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_e2b_js_integration.py +0 -0
  281. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_edge_cases.py +0 -0
  282. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_ep_upload_e2e.py +0 -0
  283. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_eval_protocol_import.py +0 -0
  284. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_evaluation.py +0 -0
  285. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_evaluation_integration.py +0 -0
  286. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_evaluation_postprocess.py +0 -0
  287. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_evaluation_preview_integration.py +0 -0
  288. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_event_bus.py +0 -0
  289. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_event_bus_helper.py +0 -0
  290. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_examples_end_to_end.py +0 -0
  291. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_fireworks_api.py +0 -0
  292. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_format.py +0 -0
  293. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_fractional_code.py +0 -0
  294. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_function_calling.py +0 -0
  295. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_gcp_tools.py +0 -0
  296. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_generic_server.py +0 -0
  297. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_human_id.py +0 -0
  298. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_integration.py +0 -0
  299. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_json_schema.py +0 -0
  300. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_kwargs_validation.py +0 -0
  301. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_language_consistency.py +0 -0
  302. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_lean_prover.py +0 -0
  303. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_lean_prover_runner.py +0 -0
  304. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_length.py +0 -0
  305. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_list_comparison_math_reward.py +0 -0
  306. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_logs_server.py +0 -0
  307. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_logs_server_simple.py +0 -0
  308. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_math.py +0 -0
  309. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_minimal.py +0 -0
  310. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_models.py +0 -0
  311. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_models_rl.py +0 -0
  312. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_multiple_choice_math_reward.py +0 -0
  313. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_n_variant_batch_integration.py +0 -0
  314. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_n_variant_integration.py +0 -0
  315. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_openai_compatibility.py +0 -0
  316. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_openeval_integration.py +0 -0
  317. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_packaging.py +0 -0
  318. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_parallel_rollouts.py +0 -0
  319. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_platform_api.py +0 -0
  320. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_quickstart_utils.py +0 -0
  321. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_readiness.py +0 -0
  322. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_reasoning_steps.py +0 -0
  323. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_repetition.py +0 -0
  324. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_repetition_debug.py +0 -0
  325. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_retry_mechanism.py +0 -0
  326. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_reward_function.py +0 -0
  327. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_reward_protocol_import.py +0 -0
  328. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_rl_processing.py +0 -0
  329. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_rollout_control_plane_integration.py +0 -0
  330. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_server.py +0 -0
  331. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_show_results_url.py +0 -0
  332. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_status_migration_changes.py +0 -0
  333. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_status_migration_integration.py +0 -0
  334. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_status_model.py +0 -0
  335. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_tag_count.py +0 -0
  336. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_tau_bench_airline_smoke.py +0 -0
  337. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_typed_interface.py +0 -0
  338. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_typed_interface_rl.py +0 -0
  339. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_upload_entrypoint.py +0 -0
  340. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_url_handling.py +0 -0
  341. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/tests/test_vite_server.py +0 -0
  342. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/__init__.py +0 -0
  343. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/agent/__init__.py +0 -0
  344. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/agent/base.py +0 -0
  345. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/agent/llm_agent.py +0 -0
  346. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/api_service/__init__.py +0 -0
  347. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/api_service/api_config.py +0 -0
  348. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/api_service/data_model.py +0 -0
  349. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/api_service/simulation_service.py +0 -0
  350. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/cli.py +0 -0
  351. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/config.py +0 -0
  352. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/airline/policy.md +0 -0
  353. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/mock/policy.md +0 -0
  354. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  355. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/retail/policy.md +0 -0
  356. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  357. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  358. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  359. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  360. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  361. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  362. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  363. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data_model/__init__.py +0 -0
  364. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data_model/message.py +0 -0
  365. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data_model/simulation.py +0 -0
  366. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/data_model/tasks.py +0 -0
  367. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/__init__.py +0 -0
  368. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/airline/__init__.py +0 -0
  369. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/airline/data_model.py +0 -0
  370. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/airline/environment.py +0 -0
  371. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/airline/tools.py +0 -0
  372. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/airline/utils.py +0 -0
  373. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/mock/__init__.py +0 -0
  374. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/mock/data_model.py +0 -0
  375. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/mock/environment.py +0 -0
  376. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/mock/tools.py +0 -0
  377. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/mock/utils.py +0 -0
  378. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/retail/__init__.py +0 -0
  379. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/retail/data_model.py +0 -0
  380. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/retail/environment.py +0 -0
  381. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/retail/tools.py +0 -0
  382. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/retail/utils.py +0 -0
  383. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/__init__.py +0 -0
  384. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/data_model.py +0 -0
  385. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/environment.py +0 -0
  386. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  387. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  388. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  389. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  390. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  391. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  392. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  393. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  394. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/tools.py +0 -0
  395. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  396. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  397. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/domains/telecom/utils.py +0 -0
  398. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/__init__.py +0 -0
  399. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/db.py +0 -0
  400. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/environment.py +0 -0
  401. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/server.py +0 -0
  402. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/tool.py +0 -0
  403. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/toolkit.py +0 -0
  404. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  405. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/__init__.py +0 -0
  406. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/evaluator.py +0 -0
  407. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  408. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  409. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  410. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  411. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  412. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/metrics/__init__.py +0 -0
  413. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/metrics/agent_metrics.py +0 -0
  414. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  415. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/orchestrator/__init__.py +0 -0
  416. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  417. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  418. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/orchestrator/utils.py +0 -0
  419. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/registry.py +0 -0
  420. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/run.py +0 -0
  421. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/scripts/__init__.py +0 -0
  422. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/scripts/check_data.py +0 -0
  423. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  424. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/scripts/start_servers.py +0 -0
  425. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/scripts/view_simulations.py +0 -0
  426. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/user/__init__.py +0 -0
  427. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/user/base.py +0 -0
  428. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/user/user_simulator.py +0 -0
  429. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/utils/__init__.py +0 -0
  430. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/utils/display.py +0 -0
  431. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/utils/io_utils.py +0 -0
  432. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/utils/llm_utils.py +0 -0
  433. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/utils/pydantic_utils.py +0 -0
  434. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vendor/tau2/utils/utils.py +0 -0
  435. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/versioneer.py +0 -0
  436. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  437. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
  438. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
  439. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
  440. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  441. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.69.dev3
3
+ Version: 0.2.70
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-29T03:48:45-0700",
11
+ "date": "2025-10-29T04:00:08-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "f84133471cd09ac683c082262720f30b9dfaaa2d",
15
- "version": "0.2.69-dev3"
14
+ "full-revisionid": "c705cb8d88a8d5966f22c84172d885a4352debc0",
15
+ "version": "0.2.70"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -214,10 +214,10 @@ class Status(BaseModel):
214
214
  logger.info(f"Re-raising {exception_type} from status details")
215
215
  raise exception_to_raise
216
216
  else:
217
- logger.debug(f"Could not create instance of {exception_type}")
217
+ logger.info(f"Could not create instance of {exception_type}")
218
218
  continue
219
219
  else:
220
- logger.debug(f"Could not import exception type: {exception_type}")
220
+ logger.info(f"Could not import exception type: {exception_type}")
221
221
  continue
222
222
 
223
223
  return False
@@ -244,7 +244,9 @@ class Status(BaseModel):
244
244
  # NOTE: we are losing some diagnostic information here by not passing the model and llm_provider. We could try to capture full exception state in rollout_error_from_exception.
245
245
  lambda: exception_class(message, model="unknown", llm_provider="unknown"),
246
246
  lambda: exception_class(message=message, model="unknown", llm_provider="unknown"),
247
- # Pattern 4: No arguments (fallback)
247
+ # Pattern 5: OpenAI exceptions - create mock response object
248
+ lambda: cls._create_openai_exception(exception_class, message),
249
+ # Pattern 7: No arguments (fallback)
248
250
  lambda: exception_class(),
249
251
  ]
250
252
 
@@ -260,6 +262,36 @@ class Status(BaseModel):
260
262
  logger.debug(f"All constructor patterns failed for {exception_class.__name__}")
261
263
  return None
262
264
 
265
+ @classmethod
266
+ def _create_openai_exception(cls, exception_class: type, message: str) -> Optional[Exception]:
267
+ """
268
+ Create OpenAI exception with a mock response object.
269
+
270
+ OpenAI exceptions require httpx.Response objects which are complex to create,
271
+ so we create a minimal mock that satisfies the basic requirements.
272
+ """
273
+ try:
274
+ import httpx
275
+
276
+ # Create a minimal mock response object
277
+ class MockRequest:
278
+ def __init__(self):
279
+ self.method = "POST"
280
+ self.url = "https://api.openai.com/v1/chat/completions"
281
+
282
+ class MockResponse:
283
+ def __init__(self):
284
+ self.status_code = 404
285
+ self.headers = {"x-request-id": "mock-request-id"}
286
+ self.request = MockRequest()
287
+
288
+ mock_response = MockResponse()
289
+ return exception_class(message, response=mock_response, body=None)
290
+
291
+ except Exception as e:
292
+ logging.getLogger(__name__).debug(f"Failed to create OpenAI exception with mock response: {e}")
293
+ return None
294
+
263
295
  @classmethod
264
296
  def _import_exception_class(cls, exception_type: str) -> Optional[type]:
265
297
  """
@@ -0,0 +1,177 @@
1
+ """
2
+ Vercel serverless function for SVGBench remote evaluation.
3
+
4
+ This function handles the model call part of the evaluation pipeline.
5
+ The SVG evaluation logic remains in the test client.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import logging
11
+ import sys
12
+ import asyncio
13
+ from flask import Flask, request, jsonify
14
+ from openai import OpenAI
15
+ from dotenv import load_dotenv
16
+
17
+ from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter
18
+
19
+ load_dotenv()
20
+
21
+ # Configure logging so INFO and below go to stdout, WARNING+ to stderr.
22
+ # This avoids Vercel marking INFO logs as [error] (stderr).
23
+ root_logger = logging.getLogger()
24
+ root_logger.handlers.clear()
25
+ root_logger.setLevel(logging.INFO)
26
+
27
+
28
+ class _InfoOnly(logging.Filter):
29
+ def filter(self, record: logging.LogRecord) -> bool:
30
+ return record.levelno <= logging.INFO
31
+
32
+
33
+ formatter = logging.Formatter("%(levelname)s:%(name)s:%(message)s")
34
+
35
+ stdout_handler = logging.StreamHandler(sys.stdout)
36
+ stdout_handler.addFilter(_InfoOnly())
37
+ stdout_handler.setFormatter(formatter)
38
+ root_logger.addHandler(stdout_handler)
39
+
40
+ stderr_handler = logging.StreamHandler(sys.stderr)
41
+ stderr_handler.setLevel(logging.WARNING)
42
+ stderr_handler.setFormatter(formatter)
43
+ root_logger.addHandler(stderr_handler)
44
+
45
+ # Attach Fireworks tracing handler to root logger (non-stream HTTP sink)
46
+ root_logger.addHandler(FireworksTracingHttpHandler())
47
+
48
+ # Create Flask app
49
+ app = Flask(__name__)
50
+
51
+
52
+ async def execute_rollout_background(req, api_key):
53
+ """Execute the OpenAI completion in background and log results"""
54
+ # Attach rollout_id filter to logger
55
+ logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
56
+ logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
57
+
58
+ try:
59
+ model = req.completion_params.get("model")
60
+ # Uncomment if you need to strip fireworks_ai/ prefix
61
+ # if model and isinstance(model, str) and model.startswith("fireworks_ai/"):
62
+ # model = model[len("fireworks_ai/"):]
63
+
64
+ # Prepare completion arguments
65
+ completion_kwargs = {
66
+ "messages": req.messages,
67
+ # "messages": [{"role": "user", "content": "Hello, how are you?"}],
68
+ "model": model,
69
+ "temperature": req.completion_params.get("temperature"),
70
+ "max_tokens": req.completion_params.get("max_tokens"),
71
+ }
72
+
73
+ # Add tools if present
74
+ if req.tools:
75
+ completion_kwargs["tools"] = req.tools
76
+
77
+ logger.info(
78
+ f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}"
79
+ )
80
+
81
+ # Create AsyncOpenAI client
82
+ # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key)
83
+ client = OpenAI(base_url=req.model_base_url, api_key=api_key)
84
+
85
+ logger.info(f"Sending completion request to model {model}")
86
+
87
+ # Make the async model call with timeout
88
+ import time
89
+
90
+ logger.info(f"timing start: {time.time()}")
91
+ completion = client.chat.completions.create(**completion_kwargs)
92
+ logger.info(f"Completed response: {completion}")
93
+ logger.info(f"timing end: {time.time()}")
94
+ # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR
95
+ logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
96
+
97
+ except Exception as e:
98
+ # Log error with structured status - THIS IS WHAT RemoteRolloutProcessor POLLS FOR
99
+ logger.error(
100
+ f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_error_from_exception(e)}
101
+ )
102
+
103
+
104
+ @app.route("/init", methods=["POST"])
105
+ async def init():
106
+ try:
107
+ # Parse as InitRequest
108
+ req = InitRequest(**request.get_json())
109
+
110
+ # Create logger for immediate validation logging
111
+ logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
112
+ logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
113
+
114
+ # Validate required fields
115
+ if not req.messages:
116
+ error_msg = "messages is required"
117
+ logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
118
+ return jsonify({"error": error_msg}), 400
119
+
120
+ # Get API key (prefer request api_key, fallback to environment)
121
+ if req.api_key:
122
+ logger.info("Using API key from request")
123
+ api_key = req.api_key
124
+ elif os.environ.get("FIREWORKS_API_KEY"):
125
+ logger.info("Using API key from environment")
126
+ api_key = os.environ.get("FIREWORKS_API_KEY")
127
+ else:
128
+ error_msg = "API key not provided in request or environment variable"
129
+ logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
130
+ return jsonify({"error": error_msg}), 401
131
+
132
+ # 🔥 FIRE: Return immediately with acceptance (within 30s requirement)
133
+ response_data = {
134
+ "status": "accepted",
135
+ "rollout_id": req.metadata.rollout_id,
136
+ "message": "Rollout processing started",
137
+ }
138
+
139
+ # Fire and forget: Execute rollout asynchronously
140
+ asyncio.create_task(execute_rollout_background(req, api_key))
141
+
142
+ return jsonify(response_data), 200
143
+
144
+ except Exception as e:
145
+ # For request parsing errors, return error immediately (don't retry)
146
+ return jsonify({"error": f"Request parsing error: {str(e)}"}), 400
147
+
148
+
149
+ @app.route("/", methods=["GET"])
150
+ def health_check():
151
+ """Health check endpoint"""
152
+ return jsonify(
153
+ {
154
+ "status": "ok",
155
+ "message": "SVGBench Vercel Serverless Function",
156
+ "endpoints": {"POST /": "Process SVGBench evaluation requests"},
157
+ }
158
+ )
159
+
160
+
161
+ @app.route("/", methods=["OPTIONS"])
162
+ def options_handler():
163
+ """Handle CORS preflight requests"""
164
+ response = jsonify({})
165
+ response.headers["Access-Control-Allow-Origin"] = "*"
166
+ response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS"
167
+ response.headers["Access-Control-Allow-Headers"] = "Content-Type"
168
+ return response
169
+
170
+
171
+ # Add CORS headers to all responses
172
+ @app.after_request
173
+ def add_cors_headers(response):
174
+ response.headers["Access-Control-Allow-Origin"] = "*"
175
+ response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS"
176
+ response.headers["Access-Control-Allow-Headers"] = "Content-Type"
177
+ return response
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.69.dev3
3
+ Version: 0.2.70
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -1,162 +0,0 @@
1
- """
2
- Vercel serverless function for SVGBench remote evaluation.
3
-
4
- This function handles the model call part of the evaluation pipeline.
5
- The SVG evaluation logic remains in the test client.
6
- """
7
-
8
- import json
9
- import os
10
- import logging
11
- import sys
12
- from http.server import BaseHTTPRequestHandler
13
- from openai import OpenAI
14
- from dotenv import load_dotenv
15
-
16
- from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter
17
-
18
- load_dotenv()
19
-
20
- # Configure logging so INFO and below go to stdout, WARNING+ to stderr.
21
- # This avoids Vercel marking INFO logs as [error] (stderr).
22
- root_logger = logging.getLogger()
23
- root_logger.handlers.clear()
24
- root_logger.setLevel(logging.INFO)
25
-
26
-
27
- class _InfoOnly(logging.Filter):
28
- def filter(self, record: logging.LogRecord) -> bool:
29
- return record.levelno <= logging.INFO
30
-
31
-
32
- formatter = logging.Formatter("%(levelname)s:%(name)s:%(message)s")
33
-
34
- stdout_handler = logging.StreamHandler(sys.stdout)
35
- stdout_handler.addFilter(_InfoOnly())
36
- stdout_handler.setFormatter(formatter)
37
- root_logger.addHandler(stdout_handler)
38
-
39
- stderr_handler = logging.StreamHandler(sys.stderr)
40
- stderr_handler.setLevel(logging.WARNING)
41
- stderr_handler.setFormatter(formatter)
42
- root_logger.addHandler(stderr_handler)
43
-
44
- # Attach Fireworks tracing handler to root logger (non-stream HTTP sink)
45
- root_logger.addHandler(FireworksTracingHttpHandler())
46
-
47
-
48
- class handler(BaseHTTPRequestHandler):
49
- def do_POST(self):
50
- try:
51
- # Read and parse request body
52
- content_length = int(self.headers.get("Content-Length", 0))
53
- request_body = self.rfile.read(content_length).decode("utf-8")
54
- request_data = json.loads(request_body)
55
-
56
- # Parse as InitRequest
57
- req = InitRequest(**request_data)
58
-
59
- # Attach rollout_id filter to logger
60
- logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
61
- logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
62
-
63
- # Validate required fields
64
- if not req.messages:
65
- error_msg = "messages is required"
66
- logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
67
- self._send_error(400, error_msg)
68
- return
69
-
70
- model = req.completion_params.get("model")
71
- if model and isinstance(model, str) and model.startswith("fireworks_ai/"):
72
- model = model[len("fireworks_ai/") :]
73
-
74
- # Prepare completion arguments
75
- completion_kwargs = {
76
- "messages": req.messages,
77
- "model": model,
78
- "temperature": req.completion_params.get("temperature"),
79
- "max_tokens": req.completion_params.get("max_tokens"),
80
- }
81
-
82
- # Add tools if present
83
- if req.tools:
84
- completion_kwargs["tools"] = req.tools
85
-
86
- # Get API key (prefer request api_key, fallback to environment)
87
- api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY")
88
- if not api_key:
89
- error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable"
90
- logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
91
- self._send_error(500, error_msg)
92
- return
93
-
94
- # Create OpenAI client
95
- client = OpenAI(base_url=req.model_base_url, api_key=api_key)
96
-
97
- logger.info(f"Sending completion request to model {req.completion_params.get('model')}")
98
-
99
- # Make the model call
100
- completion = client.chat.completions.create(**completion_kwargs)
101
-
102
- logger.info(f"Completed response: {completion}")
103
-
104
- # Log completion status
105
- logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
106
-
107
- # Return the completion response
108
- response_data = {
109
- "status": "completed",
110
- "rollout_id": req.metadata.rollout_id,
111
- "choices": [
112
- {
113
- "message": {
114
- "role": completion.choices[0].message.role,
115
- "content": completion.choices[0].message.content,
116
- }
117
- }
118
- ],
119
- }
120
-
121
- self._send_json_response(200, response_data)
122
-
123
- except Exception as e:
124
- # Log error if we have the request context
125
- if "req" in locals() and "logger" in locals():
126
- logger.error(f"❌ Error in rollout {req.metadata.rollout_id}: {e}")
127
- logger.error(str(e), extra={"status": Status.rollout_error(str(e))})
128
-
129
- self._send_error(500, str(e))
130
-
131
- def do_GET(self):
132
- """Health check endpoint"""
133
- self._send_json_response(
134
- 200,
135
- {
136
- "status": "ok",
137
- "message": "SVGBench Vercel Serverless Function",
138
- "endpoints": {"POST /": "Process SVGBench evaluation requests"},
139
- },
140
- )
141
-
142
- def do_OPTIONS(self):
143
- """Handle CORS preflight requests"""
144
- self.send_response(200)
145
- self.send_header("Access-Control-Allow-Origin", "*")
146
- self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
147
- self.send_header("Access-Control-Allow-Headers", "Content-Type")
148
- self.end_headers()
149
-
150
- def _send_json_response(self, status_code: int, data: dict):
151
- """Send a JSON response"""
152
- self.send_response(status_code)
153
- self.send_header("Content-Type", "application/json")
154
- self.send_header("Access-Control-Allow-Origin", "*")
155
- self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
156
- self.send_header("Access-Control-Allow-Headers", "Content-Type")
157
- self.end_headers()
158
- self.wfile.write(json.dumps(data).encode("utf-8"))
159
-
160
- def _send_error(self, status_code: int, message: str):
161
- """Send an error response"""
162
- self._send_json_response(status_code, {"error": message})