eval-protocol 0.2.69.dev3__tar.gz → 0.2.70.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. {eval_protocol-0.2.69.dev3/eval_protocol.egg-info → eval_protocol-0.2.70.dev1}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/_version.py +3 -3
  3. eval_protocol-0.2.70.dev1/eval_protocol/exceptions.py +176 -0
  4. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/models.py +162 -112
  5. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_single_turn_rollout_process.py +18 -2
  6. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/exception_config.py +14 -4
  7. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/remote_rollout_processor.py +7 -9
  8. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/tracing_utils.py +2 -2
  9. eval_protocol-0.2.70.dev1/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +177 -0
  10. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1/eval_protocol.egg-info}/PKG-INFO +1 -1
  11. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/SOURCES.txt +2 -0
  12. eval_protocol-0.2.70.dev1/tests/test_exceptions.py +350 -0
  13. eval_protocol-0.2.69.dev3/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -162
  14. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/LICENSE +0 -0
  15. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/README.md +0 -0
  16. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/__init__.py +0 -0
  17. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/normalize_sandbox_fusion.py +0 -0
  18. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/utils/__init__.py +0 -0
  19. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/utils/generate_api_key.py +0 -0
  20. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/development/utils/subprocess_manager.py +0 -0
  21. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/__init__.py +0 -0
  22. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/__main__.py +0 -0
  23. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/__init__.py +0 -0
  24. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/base.py +0 -0
  25. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/bigquery.py +0 -0
  26. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/braintrust.py +0 -0
  27. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/fireworks_tracing.py +0 -0
  28. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/huggingface.py +0 -0
  29. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/langchain.py +0 -0
  30. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/langfuse.py +0 -0
  31. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/langsmith.py +0 -0
  32. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/openai_responses.py +0 -0
  33. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/trl.py +0 -0
  34. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/utils.py +0 -0
  35. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/adapters/weave.py +0 -0
  36. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/__init__.py +0 -0
  37. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/models.py +0 -0
  38. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/orchestrator.py +0 -0
  39. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resource_abc.py +0 -0
  40. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resource_pool.py +0 -0
  41. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/__init__.py +0 -0
  42. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  43. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  44. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  45. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  46. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  47. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/docker_resource.py +0 -0
  48. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  49. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  50. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/resources/sql_resource.py +0 -0
  51. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/task_manager.py +0 -0
  52. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/agent/tool_registry.py +0 -0
  53. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/auth.py +0 -0
  54. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/__init__.py +0 -0
  55. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  56. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  57. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_aime25.py +0 -0
  58. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  59. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  60. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  61. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  62. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  63. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli.py +0 -0
  64. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/__init__.py +0 -0
  65. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  66. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/common.py +0 -0
  67. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/create_rft.py +0 -0
  68. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/deploy.py +0 -0
  69. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  70. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/logs.py +0 -0
  71. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/preview.py +0 -0
  72. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  73. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/cli_commands/upload.py +0 -0
  74. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/common_utils.py +0 -0
  75. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/config.py +0 -0
  76. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/__init__.py +0 -0
  77. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  78. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  79. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  80. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/data_loader/models.py +0 -0
  81. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/__init__.py +0 -0
  82. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  83. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  84. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  85. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  86. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/datasets/__init__.py +0 -0
  87. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/datasets/loader.py +0 -0
  88. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/directory_utils.py +0 -0
  89. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/evaluation.py +0 -0
  90. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/__init__.py +0 -0
  91. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/event_bus.py +0 -0
  92. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/logger.py +0 -0
  93. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  94. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  95. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/execution/__init__.py +0 -0
  96. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/execution/pipeline.py +0 -0
  97. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/fireworks_rft.py +0 -0
  98. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/gcp_tools.py +0 -0
  99. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generation/cache.py +0 -0
  100. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generation/clients/base.py +0 -0
  101. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generation/clients.py +0 -0
  102. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/generic_server.py +0 -0
  103. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/get_pep440_version.py +0 -0
  104. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/human_id/__init__.py +0 -0
  105. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/human_id/dictionary.py +0 -0
  106. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/__init__.py +0 -0
  107. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/deepeval.py +0 -0
  108. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/openeval.py +0 -0
  109. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/integrations/trl.py +0 -0
  110. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/__init__.py +0 -0
  111. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  112. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  113. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  114. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  115. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/init.py +0 -0
  116. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/rollout_context.py +0 -0
  117. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  118. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/log_utils/util.py +0 -0
  119. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/logging_utils.py +0 -0
  120. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/__init__.py +0 -0
  121. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/adapter.py +0 -0
  122. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/client/__init__.py +0 -0
  123. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/client/connection.py +0 -0
  124. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/clients.py +0 -0
  125. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/__init__.py +0 -0
  126. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/base_policy.py +0 -0
  127. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/manager.py +0 -0
  128. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/execution/policy.py +0 -0
  129. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/grid_renderer.py +0 -0
  130. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  131. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/mcpgym.py +0 -0
  132. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/process_manager.py +0 -0
  133. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/session/__init__.py +0 -0
  134. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/session/manager.py +0 -0
  135. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/simple_process_manager.py +0 -0
  136. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp/simulation_server.py +0 -0
  137. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/__init__.py +0 -0
  138. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/config.py +0 -0
  139. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/main.py +0 -0
  140. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  141. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  142. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  143. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  144. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_env.py +0 -0
  145. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/__init__.py +0 -0
  146. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  147. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  148. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  149. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  150. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  151. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  152. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  153. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  154. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  155. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  156. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  157. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  158. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  159. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  160. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/packaging.py +0 -0
  161. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/platform_api.py +0 -0
  162. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/playback_policy.py +0 -0
  163. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/__init__.py +0 -0
  164. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  165. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/app.py +0 -0
  166. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  167. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  168. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  169. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/main.py +0 -0
  170. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/models.py +0 -0
  171. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  172. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/__init__.py +0 -0
  173. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  174. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  175. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  176. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  177. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  178. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  179. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  180. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  181. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/evaluation_test.py +0 -0
  182. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  183. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  184. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/execution.py +0 -0
  185. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  186. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  187. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  188. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/parameterize.py +0 -0
  189. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/plugin.py +0 -0
  190. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/rollout_processor.py +0 -0
  191. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/store_experiment_link.py +0 -0
  192. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/store_results_url.py +0 -0
  193. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/types.py +0 -0
  194. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/pytest/validate_signature.py +0 -0
  195. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/__init__.py +0 -0
  196. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  197. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  198. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  199. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  200. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  201. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  202. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  203. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/llm_judge.py +0 -0
  204. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  205. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/quickstart/utils.py +0 -0
  206. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/resources.py +0 -0
  207. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/reward_function.py +0 -0
  208. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/__init__.py +0 -0
  209. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/accuracy.py +0 -0
  210. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/accuracy_length.py +0 -0
  211. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  212. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  213. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/apps_testing_util.py +0 -0
  214. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/bfcl_reward.py +0 -0
  215. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/code_execution.py +0 -0
  216. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/code_execution_utils.py +0 -0
  217. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/cpp_code.py +0 -0
  218. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  219. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/format.py +0 -0
  220. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/function_calling.py +0 -0
  221. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/json_schema.py +0 -0
  222. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/language_consistency.py +0 -0
  223. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/lean_prover.py +0 -0
  224. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/length.py +0 -0
  225. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  226. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/math.py +0 -0
  227. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  228. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/reasoning_steps.py +0 -0
  229. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/repetition.py +0 -0
  230. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rewards/tag_count.py +0 -0
  231. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/rl_processing.py +0 -0
  232. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/server.py +0 -0
  233. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/stats/__init__.py +0 -0
  234. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/stats/confidence_intervals.py +0 -0
  235. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/typed_interface.py +0 -0
  236. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/__init__.py +0 -0
  237. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/errors.py +0 -0
  238. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/remote_rollout_processor.py +0 -0
  239. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/types/types.py +0 -0
  240. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/__init__.py +0 -0
  241. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/batch_evaluation.py +0 -0
  242. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/batch_transformation.py +0 -0
  243. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/browser_utils.py +0 -0
  244. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/check_server_status.py +0 -0
  245. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/dataset_helpers.py +0 -0
  246. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  247. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/logs_models.py +0 -0
  248. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/logs_server.py +0 -0
  249. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/module_loader.py +0 -0
  250. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/packaging_utils.py +0 -0
  251. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/show_results_url.py +0 -0
  252. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/static_policy.py +0 -0
  253. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/subprocess_utils.py +0 -0
  254. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol/utils/vite_server.py +0 -0
  255. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/dependency_links.txt +0 -0
  256. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/entry_points.txt +0 -0
  257. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/requires.txt +0 -0
  258. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/eval_protocol.egg-info/top_level.txt +0 -0
  259. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/pyproject.toml +0 -0
  260. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/setup.cfg +0 -0
  261. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/setup.py +0 -0
  262. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_accuracy.py +0 -0
  263. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_accuracy_length.py +0 -0
  264. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_adapters_e2e.py +0 -0
  265. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_agent_orchestrator.py +0 -0
  266. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_agent_resources.py +0 -0
  267. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_auth.py +0 -0
  268. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_batch_evaluation.py +0 -0
  269. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cli.py +0 -0
  270. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cli_agent.py +0 -0
  271. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cli_args.py +0 -0
  272. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_code_execution.py +0 -0
  273. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_config.py +0 -0
  274. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_control_plane_separation.py +0 -0
  275. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_cpp_code.py +0 -0
  276. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_data_driven_task_manager.py +0 -0
  277. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_deepcoder_reward.py +0 -0
  278. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_deepeval_integration.py +0 -0
  279. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_deploy_integration.py +0 -0
  280. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_directory_utils.py +0 -0
  281. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_e2b_integration.py +0 -0
  282. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_e2b_js_integration.py +0 -0
  283. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_edge_cases.py +0 -0
  284. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_ep_upload_e2e.py +0 -0
  285. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_eval_protocol_import.py +0 -0
  286. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation.py +0 -0
  287. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation_integration.py +0 -0
  288. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation_postprocess.py +0 -0
  289. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_evaluation_preview_integration.py +0 -0
  290. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_event_bus.py +0 -0
  291. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_event_bus_helper.py +0 -0
  292. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_examples_end_to_end.py +0 -0
  293. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_fireworks_api.py +0 -0
  294. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_format.py +0 -0
  295. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_fractional_code.py +0 -0
  296. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_function_calling.py +0 -0
  297. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_gcp_tools.py +0 -0
  298. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_generic_server.py +0 -0
  299. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_human_id.py +0 -0
  300. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_integration.py +0 -0
  301. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_json_schema.py +0 -0
  302. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_kwargs_validation.py +0 -0
  303. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_language_consistency.py +0 -0
  304. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_lean_prover.py +0 -0
  305. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_lean_prover_runner.py +0 -0
  306. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_length.py +0 -0
  307. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_list_comparison_math_reward.py +0 -0
  308. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_logs_server.py +0 -0
  309. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_logs_server_simple.py +0 -0
  310. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_math.py +0 -0
  311. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_minimal.py +0 -0
  312. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_models.py +0 -0
  313. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_models_rl.py +0 -0
  314. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_multiple_choice_math_reward.py +0 -0
  315. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_n_variant_batch_integration.py +0 -0
  316. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_n_variant_integration.py +0 -0
  317. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_openai_compatibility.py +0 -0
  318. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_openeval_integration.py +0 -0
  319. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_packaging.py +0 -0
  320. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_parallel_rollouts.py +0 -0
  321. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_platform_api.py +0 -0
  322. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_quickstart_utils.py +0 -0
  323. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_readiness.py +0 -0
  324. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_reasoning_steps.py +0 -0
  325. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_repetition.py +0 -0
  326. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_repetition_debug.py +0 -0
  327. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_retry_mechanism.py +0 -0
  328. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_reward_function.py +0 -0
  329. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_reward_protocol_import.py +0 -0
  330. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_rl_processing.py +0 -0
  331. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_rollout_control_plane_integration.py +0 -0
  332. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_server.py +0 -0
  333. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_show_results_url.py +0 -0
  334. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_status_migration_changes.py +0 -0
  335. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_status_migration_integration.py +0 -0
  336. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_status_model.py +0 -0
  337. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_tag_count.py +0 -0
  338. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_tau_bench_airline_smoke.py +0 -0
  339. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_typed_interface.py +0 -0
  340. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_typed_interface_rl.py +0 -0
  341. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_upload_entrypoint.py +0 -0
  342. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_url_handling.py +0 -0
  343. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/tests/test_vite_server.py +0 -0
  344. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/__init__.py +0 -0
  345. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/agent/__init__.py +0 -0
  346. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/agent/base.py +0 -0
  347. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/agent/llm_agent.py +0 -0
  348. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/__init__.py +0 -0
  349. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/api_config.py +0 -0
  350. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/data_model.py +0 -0
  351. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/api_service/simulation_service.py +0 -0
  352. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/cli.py +0 -0
  353. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/config.py +0 -0
  354. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/airline/policy.md +0 -0
  355. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/mock/policy.md +0 -0
  356. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  357. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/retail/policy.md +0 -0
  358. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  359. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  360. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  361. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  362. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  363. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  364. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  365. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/__init__.py +0 -0
  366. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/message.py +0 -0
  367. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/simulation.py +0 -0
  368. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/data_model/tasks.py +0 -0
  369. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/__init__.py +0 -0
  370. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/__init__.py +0 -0
  371. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/data_model.py +0 -0
  372. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/environment.py +0 -0
  373. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/tools.py +0 -0
  374. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/airline/utils.py +0 -0
  375. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/__init__.py +0 -0
  376. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/data_model.py +0 -0
  377. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/environment.py +0 -0
  378. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/tools.py +0 -0
  379. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/mock/utils.py +0 -0
  380. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/__init__.py +0 -0
  381. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/data_model.py +0 -0
  382. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/environment.py +0 -0
  383. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/tools.py +0 -0
  384. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/retail/utils.py +0 -0
  385. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/__init__.py +0 -0
  386. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/data_model.py +0 -0
  387. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/environment.py +0 -0
  388. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  389. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  390. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  391. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  392. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  393. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  394. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  395. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  396. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/tools.py +0 -0
  397. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  398. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  399. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/domains/telecom/utils.py +0 -0
  400. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/__init__.py +0 -0
  401. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/db.py +0 -0
  402. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/environment.py +0 -0
  403. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/server.py +0 -0
  404. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/tool.py +0 -0
  405. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/toolkit.py +0 -0
  406. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  407. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/__init__.py +0 -0
  408. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator.py +0 -0
  409. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  410. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  411. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  412. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  413. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  414. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/metrics/__init__.py +0 -0
  415. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/metrics/agent_metrics.py +0 -0
  416. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  417. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/__init__.py +0 -0
  418. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  419. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  420. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/orchestrator/utils.py +0 -0
  421. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/registry.py +0 -0
  422. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/run.py +0 -0
  423. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/__init__.py +0 -0
  424. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/check_data.py +0 -0
  425. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  426. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/start_servers.py +0 -0
  427. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/scripts/view_simulations.py +0 -0
  428. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/user/__init__.py +0 -0
  429. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/user/base.py +0 -0
  430. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/user/user_simulator.py +0 -0
  431. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/__init__.py +0 -0
  432. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/display.py +0 -0
  433. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/io_utils.py +0 -0
  434. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/llm_utils.py +0 -0
  435. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/pydantic_utils.py +0 -0
  436. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vendor/tau2/utils/utils.py +0 -0
  437. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/versioneer.py +0 -0
  438. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  439. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
  440. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
  441. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
  442. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  443. {eval_protocol-0.2.69.dev3 → eval_protocol-0.2.70.dev1}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.69.dev3
3
+ Version: 0.2.70.dev1
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-29T03:48:45-0700",
11
+ "date": "2025-10-29T17:18:36-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "f84133471cd09ac683c082262720f30b9dfaaa2d",
15
- "version": "0.2.69-dev3"
14
+ "full-revisionid": "0ebd0177dafc55bfa302a49b2d674c0487516eff",
15
+ "version": "0.2.70-dev1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -0,0 +1,176 @@
1
+ """
2
+ Custom exceptions for Eval Protocol that map to gRPC Status codes.
3
+
4
+ These exceptions provide a clean way to handle errors and map them to appropriate
5
+ Status objects following the AIP-193 standard.
6
+ """
7
+
8
+ from typing import Optional
9
+
10
+
11
+ class EvalProtocolError(Exception):
12
+ """
13
+ Base exception for all Eval Protocol specific errors.
14
+
15
+ Maps to Status.Code and can be converted to Status objects for structured logging.
16
+ """
17
+
18
+ pass
19
+
20
+
21
+ # Standard gRPC status code exceptions
22
+ class CancelledError(EvalProtocolError):
23
+ """Operation was cancelled (Status.Code.CANCELLED = 1)"""
24
+
25
+ status_code = 1
26
+
27
+
28
+ class UnknownError(EvalProtocolError):
29
+ """Unknown error occurred (Status.Code.UNKNOWN = 2)"""
30
+
31
+ status_code = 2
32
+
33
+
34
+ class InvalidArgumentError(EvalProtocolError):
35
+ """Invalid argument provided (Status.Code.INVALID_ARGUMENT = 3)"""
36
+
37
+ status_code = 3
38
+
39
+
40
+ class DeadlineExceededError(EvalProtocolError):
41
+ """Deadline exceeded (Status.Code.DEADLINE_EXCEEDED = 4)"""
42
+
43
+ status_code = 4
44
+
45
+
46
+ class NotFoundError(EvalProtocolError):
47
+ """Resource not found (Status.Code.NOT_FOUND = 5)"""
48
+
49
+ status_code = 5
50
+
51
+
52
+ class AlreadyExistsError(EvalProtocolError):
53
+ """Resource already exists (Status.Code.ALREADY_EXISTS = 6)"""
54
+
55
+ status_code = 6
56
+
57
+
58
+ class PermissionDeniedError(EvalProtocolError):
59
+ """Permission denied (Status.Code.PERMISSION_DENIED = 7)"""
60
+
61
+ status_code = 7
62
+
63
+
64
+ class ResourceExhaustedError(EvalProtocolError):
65
+ """Resource exhausted (Status.Code.RESOURCE_EXHAUSTED = 8)"""
66
+
67
+ status_code = 8
68
+
69
+
70
+ class FailedPreconditionError(EvalProtocolError):
71
+ """Failed precondition (Status.Code.FAILED_PRECONDITION = 9)"""
72
+
73
+ status_code = 9
74
+
75
+
76
+ class AbortedError(EvalProtocolError):
77
+ """Operation was aborted (Status.Code.ABORTED = 10)"""
78
+
79
+ status_code = 10
80
+
81
+
82
+ class OutOfRangeError(EvalProtocolError):
83
+ """Value out of range (Status.Code.OUT_OF_RANGE = 11)"""
84
+
85
+ status_code = 11
86
+
87
+
88
+ class UnimplementedError(EvalProtocolError):
89
+ """Operation is not implemented (Status.Code.UNIMPLEMENTED = 12)"""
90
+
91
+ status_code = 12
92
+
93
+
94
+ class InternalError(EvalProtocolError):
95
+ """Internal server error (Status.Code.INTERNAL = 13)"""
96
+
97
+ status_code = 13
98
+
99
+
100
+ class UnavailableError(EvalProtocolError):
101
+ """Service unavailable (Status.Code.UNAVAILABLE = 14)"""
102
+
103
+ status_code = 14
104
+
105
+
106
+ class DataLossError(EvalProtocolError):
107
+ """Unrecoverable data loss (Status.Code.DATA_LOSS = 15)"""
108
+
109
+ status_code = 15
110
+
111
+
112
+ class UnauthenticatedError(EvalProtocolError):
113
+ """Request lacks valid authentication (Status.Code.UNAUTHENTICATED = 16)"""
114
+
115
+ status_code = 16
116
+
117
+
118
+ # Custom EP exceptions
119
+ class RolloutFinishedError(EvalProtocolError):
120
+ """Rollout completed successfully (Status.Code.FINISHED = 100)"""
121
+
122
+ status_code = 100
123
+
124
+
125
+ class RolloutRunningError(EvalProtocolError):
126
+ """Rollout is still running (Status.Code.RUNNING = 101)"""
127
+
128
+ status_code = 101
129
+
130
+
131
+ class ScoreInvalidError(EvalProtocolError):
132
+ """Score is invalid (Status.Code.SCORE_INVALID = 102)"""
133
+
134
+ status_code = 102
135
+
136
+
137
+ # Convenience mapping from status codes to exception classes
138
+ # Only actual error conditions should raise exceptions
139
+ STATUS_CODE_TO_EXCEPTION = {
140
+ 0: None, # OK - success, no exception
141
+ 1: CancelledError,
142
+ 2: UnknownError,
143
+ 3: InvalidArgumentError,
144
+ 4: DeadlineExceededError,
145
+ 5: NotFoundError,
146
+ 6: AlreadyExistsError,
147
+ 7: PermissionDeniedError,
148
+ 8: ResourceExhaustedError,
149
+ 9: FailedPreconditionError,
150
+ 10: AbortedError,
151
+ 11: OutOfRangeError,
152
+ 12: UnimplementedError,
153
+ 13: InternalError,
154
+ 14: UnavailableError,
155
+ 15: DataLossError,
156
+ 16: UnauthenticatedError,
157
+ 100: None, # FINISHED - success, no exception
158
+ 101: None, # RUNNING - in progress, no exception
159
+ 102: None, # SCORE_INVALID - success, no exception
160
+ }
161
+
162
+
163
+ def exception_for_status_code(code: int) -> Optional[EvalProtocolError]:
164
+ """
165
+ Create an exception instance for a given status code.
166
+
167
+ Args:
168
+ code: Status code from Status.Code enum
169
+
170
+ Returns:
171
+ Exception instance or None if code is OK (0)
172
+ """
173
+ exception_class = STATUS_CODE_TO_EXCEPTION.get(code)
174
+ if exception_class is None:
175
+ return None
176
+ return exception_class()
@@ -136,6 +136,13 @@ class Status(BaseModel):
136
136
  """Create a status indicating the evaluation finished."""
137
137
  return cls(code=cls.Code.FINISHED, message="Evaluation finished", details=[])
138
138
 
139
+ @staticmethod
140
+ def _build_details_with_extra_info(extra_info: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
141
+ """Helper to build details list from extra_info."""
142
+ if extra_info:
143
+ return [ErrorInfo.extra_info(extra_info).to_aip193_format()]
144
+ return []
145
+
139
146
  @classmethod
140
147
  def aborted(cls, message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
141
148
  """Create a status indicating the evaluation was aborted."""
@@ -160,148 +167,191 @@ class Status(BaseModel):
160
167
  """Create a status indicating the rollout finished."""
161
168
  return cls(code=cls.Code.FINISHED, message=message, details=details or [])
162
169
 
170
+ # Error methods organized by Status.Code enum values (1-16)
171
+
172
+ # CANCELLED = 1
163
173
  @classmethod
164
- def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
165
- """Create a status indicating the rollout failed with an error."""
166
- details = []
167
- if extra_info:
168
- details.append(ErrorInfo.extra_info(extra_info).to_aip193_format())
169
- return cls.error(error_message, details)
174
+ def rollout_cancelled_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
175
+ """Create a status indicating the rollout was cancelled."""
176
+ return cls.cancelled_error(error_message, cls._build_details_with_extra_info(extra_info))
177
+
178
+ @classmethod
179
+ def cancelled_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
180
+ """Create a status indicating the operation was cancelled."""
181
+ return cls(code=cls.Code.CANCELLED, message=error_message, details=details or [])
170
182
 
183
+ # UNKNOWN = 2
171
184
  @classmethod
172
- def rollout_error_from_exception(
173
- cls, exception: Exception, extra_info: Optional[Dict[str, Any]] = None
185
+ def rollout_unknown_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
186
+ """Create a status indicating the rollout failed with an unknown error."""
187
+ return cls.unknown_error(error_message, cls._build_details_with_extra_info(extra_info))
188
+
189
+ @classmethod
190
+ def unknown_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
191
+ """Create a status indicating an unknown error occurred."""
192
+ return cls(code=cls.Code.UNKNOWN, message=error_message, details=details or [])
193
+
194
+ # INVALID_ARGUMENT = 3
195
+ @classmethod
196
+ def rollout_invalid_argument_error(
197
+ cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
174
198
  ) -> "Status":
175
- """
176
- Create a status indicating the rollout failed with an exception.
177
- Simple approach that stores exception info directly in details.
178
- """
179
- details = []
199
+ """Create a status indicating the rollout failed with an invalid argument error."""
200
+ return cls.invalid_argument_error(error_message, cls._build_details_with_extra_info(extra_info))
180
201
 
181
- details.append(
182
- {
183
- "exception_type": f"{type(exception).__module__}.{type(exception).__name__}",
184
- "exception_message": str(exception),
185
- }
186
- )
202
+ @classmethod
203
+ def invalid_argument_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
204
+ """Create a status indicating an invalid argument error occurred."""
205
+ return cls(code=cls.Code.INVALID_ARGUMENT, message=error_message, details=details or [])
187
206
 
188
- if extra_info:
189
- details.append({"extra_info": extra_info})
207
+ # DEADLINE_EXCEEDED = 4
208
+ @classmethod
209
+ def rollout_deadline_exceeded_error(
210
+ cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
211
+ ) -> "Status":
212
+ """Create a status indicating the rollout failed with a deadline exceeded error."""
213
+ return cls.deadline_exceeded_error(error_message, cls._build_details_with_extra_info(extra_info))
190
214
 
191
- return cls(code=cls.Code.INTERNAL, message=str(exception), details=details)
215
+ @classmethod
216
+ def deadline_exceeded_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
217
+ """Create a status indicating a deadline exceeded error occurred."""
218
+ return cls(code=cls.Code.DEADLINE_EXCEEDED, message=error_message, details=details or [])
192
219
 
220
+ # NOT_FOUND = 5
193
221
  @classmethod
194
- def raise_from_status_details(cls, status_details: List[Dict[str, Any]]) -> bool:
195
- """
196
- Try to raise original exception from simple status details using dynamic imports.
197
- """
222
+ def rollout_not_found_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
223
+ """Create a status indicating the rollout failed with a not found error."""
224
+ return cls.not_found_error(error_message, cls._build_details_with_extra_info(extra_info))
198
225
 
199
- for detail in status_details:
200
- # Look for simple exception info
201
- if "exception_type" in detail and "exception_message" in detail:
202
- exception_type = detail["exception_type"]
203
- exception_message = detail["exception_message"]
204
-
205
- logger.info(f"Found exception info: {exception_type}")
206
-
207
- # Dynamically import and raise the exception
208
- exception_class = cls._import_exception_class(exception_type)
209
- if exception_class:
210
- logger.info(f"Found exception class: {exception_class}")
211
- # Try different constructor patterns
212
- exception_to_raise = cls._create_exception_instance(exception_class, exception_message)
213
- if exception_to_raise:
214
- logger.info(f"Re-raising {exception_type} from status details")
215
- raise exception_to_raise
216
- else:
217
- logger.debug(f"Could not create instance of {exception_type}")
218
- continue
219
- else:
220
- logger.debug(f"Could not import exception type: {exception_type}")
221
- continue
226
+ @classmethod
227
+ def not_found_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
228
+ """Create a status indicating a not found error occurred."""
229
+ return cls(code=cls.Code.NOT_FOUND, message=error_message, details=details or [])
222
230
 
223
- return False
231
+ # ALREADY_EXISTS = 6
232
+ @classmethod
233
+ def rollout_already_exists_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
234
+ """Create a status indicating the rollout failed with an already exists error."""
235
+ return cls.already_exists_error(error_message, cls._build_details_with_extra_info(extra_info))
224
236
 
225
237
  @classmethod
226
- def _create_exception_instance(cls, exception_class: type, message: str) -> Optional[Exception]:
227
- """
228
- Try to create an exception instance using different constructor patterns.
238
+ def already_exists_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
239
+ """Create a status indicating an already exists error occurred."""
240
+ return cls(code=cls.Code.ALREADY_EXISTS, message=error_message, details=details or [])
229
241
 
230
- Args:
231
- exception_class: The exception class to instantiate
232
- message: The error message
242
+ # PERMISSION_DENIED = 7
243
+ @classmethod
244
+ def rollout_permission_denied_error(
245
+ cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
246
+ ) -> "Status":
247
+ """Create a status indicating the rollout failed with a permission denied error."""
248
+ return cls.permission_denied_error(error_message, cls._build_details_with_extra_info(extra_info))
233
249
 
234
- Returns:
235
- Exception instance if successful, None otherwise
236
- """
237
- # Common constructor patterns to try
238
- patterns = [
239
- # Pattern 1: Just message
240
- lambda: exception_class(message),
241
- # Pattern 2: Message as named parameter
242
- lambda: exception_class(message=message),
243
- # Pattern 3: Message + common litellm parameters
244
- # NOTE: we are losing some diagnostic information here by not passing the model and llm_provider. We could try to capture full exception state in rollout_error_from_exception.
245
- lambda: exception_class(message, model="unknown", llm_provider="unknown"),
246
- lambda: exception_class(message=message, model="unknown", llm_provider="unknown"),
247
- # Pattern 4: No arguments (fallback)
248
- lambda: exception_class(),
249
- ]
250
-
251
- for i, pattern in enumerate(patterns):
252
- try:
253
- instance = pattern()
254
- logger.debug(f"Successfully created {exception_class.__name__} using pattern {i + 1}")
255
- return instance
256
- except (TypeError, ValueError) as e:
257
- logger.debug(f"Pattern {i + 1} failed for {exception_class.__name__}: {e}")
258
- continue
250
+ @classmethod
251
+ def permission_denied_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
252
+ """Create a status indicating a permission denied error occurred."""
253
+ return cls(code=cls.Code.PERMISSION_DENIED, message=error_message, details=details or [])
259
254
 
260
- logger.debug(f"All constructor patterns failed for {exception_class.__name__}")
261
- return None
255
+ # RESOURCE_EXHAUSTED = 8
256
+ @classmethod
257
+ def rollout_resource_exhausted_error(
258
+ cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
259
+ ) -> "Status":
260
+ """Create a status indicating the rollout failed with a resource exhausted error."""
261
+ return cls.resource_exhausted_error(error_message, cls._build_details_with_extra_info(extra_info))
262
262
 
263
263
  @classmethod
264
- def _import_exception_class(cls, exception_type: str) -> Optional[type]:
265
- """
266
- Dynamically import an exception class from a string.
264
+ def resource_exhausted_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
265
+ """Create a status indicating a resource exhausted error occurred."""
266
+ return cls(code=cls.Code.RESOURCE_EXHAUSTED, message=error_message, details=details or [])
267
267
 
268
- Args:
269
- exception_type: Exception type string like "litellm.exceptions.NotFoundError",
270
- "openai.BadRequestError", "requests.exceptions.ConnectionError", etc.
268
+ # FAILED_PRECONDITION = 9
269
+ @classmethod
270
+ def rollout_failed_precondition_error(
271
+ cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
272
+ ) -> "Status":
273
+ """Create a status indicating the rollout failed with a failed precondition error."""
274
+ return cls.failed_precondition_error(error_message, cls._build_details_with_extra_info(extra_info))
271
275
 
272
- Returns:
273
- The exception class if found, None otherwise
274
- """
275
- try:
276
- # Require fully qualified names (no automatic prefixing)
277
- if "." not in exception_type:
278
- logging.getLogger(__name__).debug(f"Exception type must be fully qualified: {exception_type}")
279
- return None
276
+ @classmethod
277
+ def failed_precondition_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
278
+ """Create a status indicating a failed precondition error occurred."""
279
+ return cls(code=cls.Code.FAILED_PRECONDITION, message=error_message, details=details or [])
280
280
 
281
- # Parse module and class name
282
- module_name, class_name = exception_type.rsplit(".", 1)
281
+ # ABORTED = 10
282
+ @classmethod
283
+ def rollout_aborted_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
284
+ """Create a status indicating the rollout was aborted."""
285
+ return cls.aborted(error_message, cls._build_details_with_extra_info(extra_info))
283
286
 
284
- # Import the module
285
- module = importlib.import_module(module_name)
287
+ # OUT_OF_RANGE = 11
288
+ @classmethod
289
+ def rollout_out_of_range_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
290
+ """Create a status indicating the rollout failed with an out of range error."""
291
+ return cls.out_of_range_error(error_message, cls._build_details_with_extra_info(extra_info))
286
292
 
287
- # Get the exception class
288
- exception_class = getattr(module, class_name, None)
293
+ @classmethod
294
+ def out_of_range_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
295
+ """Create a status indicating an out of range error occurred."""
296
+ return cls(code=cls.Code.OUT_OF_RANGE, message=error_message, details=details or [])
289
297
 
290
- # Verify it's actually an exception class
291
- if exception_class and issubclass(exception_class, BaseException):
292
- return exception_class
298
+ # UNIMPLEMENTED = 12
299
+ @classmethod
300
+ def rollout_unimplemented_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
301
+ """Create a status indicating the rollout failed with an unimplemented error."""
302
+ return cls.unimplemented_error(error_message, cls._build_details_with_extra_info(extra_info))
293
303
 
294
- return None
304
+ @classmethod
305
+ def unimplemented_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
306
+ """Create a status indicating an unimplemented error occurred."""
307
+ return cls(code=cls.Code.UNIMPLEMENTED, message=error_message, details=details or [])
295
308
 
296
- except (ImportError, AttributeError, ValueError) as e:
297
- logging.getLogger(__name__).debug(f"Could not import exception class {exception_type}: {e}")
298
- return None
309
+ # INTERNAL = 13
310
+ @classmethod
311
+ def rollout_internal_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
312
+ """Create a status indicating the rollout failed with an internal error."""
313
+ return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info))
299
314
 
300
315
  @classmethod
301
- def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
302
- """Create a status indicating the rollout failed with an error."""
316
+ def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
317
+ """Create a status indicating an internal error occurred."""
303
318
  return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
304
319
 
320
+ # UNAVAILABLE = 14
321
+ @classmethod
322
+ def rollout_unavailable_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
323
+ """Create a status indicating the rollout failed with an unavailable error."""
324
+ return cls.unavailable_error(error_message, cls._build_details_with_extra_info(extra_info))
325
+
326
+ @classmethod
327
+ def unavailable_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
328
+ """Create a status indicating an unavailable error occurred."""
329
+ return cls(code=cls.Code.UNAVAILABLE, message=error_message, details=details or [])
330
+
331
+ # DATA_LOSS = 15
332
+ @classmethod
333
+ def rollout_data_loss_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
334
+ """Create a status indicating the rollout failed with a data loss error."""
335
+ return cls.data_loss_error(error_message, cls._build_details_with_extra_info(extra_info))
336
+
337
+ @classmethod
338
+ def data_loss_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
339
+ """Create a status indicating a data loss error occurred."""
340
+ return cls(code=cls.Code.DATA_LOSS, message=error_message, details=details or [])
341
+
342
+ # UNAUTHENTICATED = 16
343
+ @classmethod
344
+ def rollout_unauthenticated_error(
345
+ cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None
346
+ ) -> "Status":
347
+ """Create a status indicating the rollout failed with an unauthenticated error."""
348
+ return cls.unauthenticated_error(error_message, cls._build_details_with_extra_info(extra_info))
349
+
350
+ @classmethod
351
+ def unauthenticated_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
352
+ """Create a status indicating an unauthenticated error occurred."""
353
+ return cls(code=cls.Code.UNAUTHENTICATED, message=error_message, details=details or [])
354
+
305
355
  @classmethod
306
356
  def score_invalid(
307
357
  cls, message: str = "Score is invalid", details: Optional[List[Dict[str, Any]]] = None
@@ -21,6 +21,16 @@ logger = logging.getLogger(__name__)
21
21
  class SingleTurnRolloutProcessor(RolloutProcessor):
22
22
  """Single turn rollout processor for direct LLM calls."""
23
23
 
24
+ def __init__(self, *, drop_trailing_assistant_messages: bool = True) -> None:
25
+ """
26
+ Args:
27
+ drop_trailing_assistant_messages: When True (default), strip any trailing
28
+ assistant messages from the input conversation before calling the model.
29
+ This helps when datasets include previous assistant turns and you want
30
+ the model to answer the latest user query.
31
+ """
32
+ self.drop_trailing_assistant_messages = drop_trailing_assistant_messages
33
+
24
34
  def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]:
25
35
  """Generate single turn rollout tasks and return them for external handling."""
26
36
  # Do not modify global LiteLLM cache. Disable caching per-request instead.
@@ -32,7 +42,13 @@ class SingleTurnRolloutProcessor(RolloutProcessor):
32
42
  if len(row.messages) == 0:
33
43
  raise ValueError("Messages is empty. Please provide a non-empty dataset")
34
44
 
35
- messages_payload = [message.model_dump() for message in row.messages]
45
+ # Optionally drop trailing assistant messages for single-turn prompts
46
+ messages_for_request: List[Message] = list(row.messages)
47
+ if self.drop_trailing_assistant_messages:
48
+ while messages_for_request and messages_for_request[-1].role == "assistant":
49
+ messages_for_request.pop()
50
+
51
+ messages_payload = [message.model_dump() for message in messages_for_request]
36
52
 
37
53
  request_params = {"messages": messages_payload, **config.completion_params}
38
54
  # Ensure caching is disabled only for this request (review feedback)
@@ -114,7 +130,7 @@ class SingleTurnRolloutProcessor(RolloutProcessor):
114
130
  except Exception:
115
131
  pass
116
132
 
117
- messages = list(row.messages) + [
133
+ messages = list(messages_for_request) + [
118
134
  Message(
119
135
  role="assistant",
120
136
  content=assistant_content,
@@ -11,7 +11,9 @@ import backoff
11
11
  import litellm
12
12
  import requests
13
13
  import httpx
14
- import openai
14
+
15
+ import eval_protocol.exceptions
16
+
15
17
 
16
18
  # Default exceptions that should be retried with backoff
17
19
  DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = {
@@ -29,14 +31,22 @@ DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = {
29
31
  httpx.TimeoutException,
30
32
  httpx.NetworkError,
31
33
  httpx.RemoteProtocolError,
34
+ # LiteLLM library exceptions
32
35
  litellm.exceptions.RateLimitError,
33
36
  litellm.exceptions.InternalServerError,
34
37
  litellm.exceptions.Timeout,
35
38
  litellm.exceptions.NotFoundError,
36
- litellm.exceptions.BadRequestError, # remove this once we have a long term solution
39
+ # litellm.exceptions.BadRequestError, # remove this once we have a long term solution
37
40
  litellm.exceptions.ServiceUnavailableError,
38
- openai.NotFoundError,
39
- openai.BadRequestError, # remove this once we have a long term solution
41
+ litellm.exceptions.APIError,
42
+ # Eval Protocol exceptions
43
+ eval_protocol.exceptions.UnknownError,
44
+ eval_protocol.exceptions.DeadlineExceededError,
45
+ eval_protocol.exceptions.NotFoundError,
46
+ eval_protocol.exceptions.PermissionDeniedError,
47
+ eval_protocol.exceptions.UnavailableError,
48
+ eval_protocol.exceptions.UnauthenticatedError,
49
+ eval_protocol.exceptions.ResourceExhaustedError,
40
50
  }
41
51
 
42
52
 
@@ -10,6 +10,7 @@ from eval_protocol.types.remote_rollout_processor import (
10
10
  DataLoaderConfig,
11
11
  )
12
12
  from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter
13
+ from eval_protocol.exceptions import exception_for_status_code
13
14
 
14
15
  from .rollout_processor import RolloutProcessor
15
16
  from .types import RolloutProcessorConfig
@@ -97,13 +98,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
97
98
  r.raise_for_status()
98
99
  except requests.exceptions.Timeout:
99
100
  raise TimeoutError(
100
- "The /init endpoint timed out after 30 seconds. "
101
- "CRITICAL: The /init endpoint must return immediately (within 30s) and NOT block on rollout execution. "
102
- "Your remote server should:\n"
103
- "1. Accept the /init request and return a 200 response immediately\n"
104
- "2. Process the actual rollout asynchronously in the background\n"
105
- "3. Use the /status endpoint to report progress\n"
106
- "For Python/Node.js: Start a separate process per rollout to avoid blocking the /init response."
101
+ f"The /init endpoint tried {url} with {init_payload.model_dump()} but timed out after 30 seconds."
107
102
  )
108
103
 
109
104
  await asyncio.to_thread(_post_init)
@@ -166,7 +161,10 @@ class RemoteRolloutProcessor(RolloutProcessor):
166
161
  f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}"
167
162
  )
168
163
 
169
- Status.raise_from_status_details(status_details)
164
+ # Create and raise exception if appropriate
165
+ exception = exception_for_status_code(status_code)
166
+ if exception is not None:
167
+ raise exception
170
168
 
171
169
  row.rollout_status = Status(
172
170
  code=Status.Code(status_code),
@@ -183,7 +181,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
183
181
  f"Loop completed without breaking for {row.execution_metadata.rollout_id}, which means we timed out"
184
182
  )
185
183
  # Loop completed without breaking, which means we timed out
186
- row.rollout_status = Status.rollout_error(
184
+ row.rollout_status = Status.rollout_deadline_exceeded_error(
187
185
  f"Rollout {row.execution_metadata.rollout_id} timed out after {timeout_seconds} seconds"
188
186
  )
189
187
 
@@ -151,14 +151,14 @@ def update_row_with_remote_trace(
151
151
  output_rows: List[EvaluationRow] = [r for result in results for r in result.rows]
152
152
 
153
153
  if len(output_rows) == 0: # Fallback to original row if no remote data found
154
- row.rollout_status = Status(code=Status.Code.NOT_FOUND, message="No remote data found for rollout")
154
+ row.rollout_status = Status.rollout_not_found_error("No remote data found for rollout")
155
155
  return None
156
156
  elif len(output_rows) == 1: # Return the remote row
157
157
  remote_row = output_rows[0]
158
158
 
159
159
  # if the remote_row has the same number of messages as the original row, something went wrong
160
160
  if len(remote_row.messages) == len(row.messages):
161
- row.rollout_status = Status.rollout_error(
161
+ row.rollout_status = Status.rollout_internal_error(
162
162
  "Rollout finished with the same number of messages as the original row"
163
163
  )
164
164
  return None