eval-protocol 0.2.71__tar.gz → 0.2.72__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (442) hide show
  1. {eval_protocol-0.2.71/eval_protocol.egg-info → eval_protocol-0.2.72}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/exceptions.py +3 -2
  4. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/models.py +7 -5
  5. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/evaluation_test_utils.py +1 -1
  6. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/exception_config.py +0 -1
  7. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/remote_rollout_processor.py +6 -6
  8. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +64 -39
  9. {eval_protocol-0.2.71 → eval_protocol-0.2.72/eval_protocol.egg-info}/PKG-INFO +1 -1
  10. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_exceptions.py +21 -0
  11. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/LICENSE +0 -0
  12. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/README.md +0 -0
  13. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/__init__.py +0 -0
  14. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/normalize_sandbox_fusion.py +0 -0
  15. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/utils/__init__.py +0 -0
  16. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/utils/generate_api_key.py +0 -0
  17. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/development/utils/subprocess_manager.py +0 -0
  18. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/__init__.py +0 -0
  19. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/__main__.py +0 -0
  20. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/__init__.py +0 -0
  21. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/base.py +0 -0
  22. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/bigquery.py +0 -0
  23. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/braintrust.py +0 -0
  24. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/fireworks_tracing.py +0 -0
  25. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/huggingface.py +0 -0
  26. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/langchain.py +0 -0
  27. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/langfuse.py +0 -0
  28. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/langsmith.py +0 -0
  29. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/openai_responses.py +0 -0
  30. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/trl.py +0 -0
  31. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/utils.py +0 -0
  32. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/adapters/weave.py +0 -0
  33. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/__init__.py +0 -0
  34. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/models.py +0 -0
  35. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/orchestrator.py +0 -0
  36. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resource_abc.py +0 -0
  37. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resource_pool.py +0 -0
  38. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/__init__.py +0 -0
  39. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  40. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  41. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  42. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  43. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  44. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/docker_resource.py +0 -0
  45. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  46. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  47. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/resources/sql_resource.py +0 -0
  48. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/task_manager.py +0 -0
  49. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/agent/tool_registry.py +0 -0
  50. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/auth.py +0 -0
  51. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/__init__.py +0 -0
  52. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  53. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  54. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_aime25.py +0 -0
  55. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  56. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  57. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  58. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  59. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  60. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli.py +0 -0
  61. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/__init__.py +0 -0
  62. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  63. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/common.py +0 -0
  64. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/create_rft.py +0 -0
  65. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/deploy.py +0 -0
  66. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  67. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/logs.py +0 -0
  68. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/preview.py +0 -0
  69. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  70. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/cli_commands/upload.py +0 -0
  71. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/common_utils.py +0 -0
  72. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/config.py +0 -0
  73. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/__init__.py +0 -0
  74. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  75. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  76. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  77. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/data_loader/models.py +0 -0
  78. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/__init__.py +0 -0
  79. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  80. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  81. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  82. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  83. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/datasets/__init__.py +0 -0
  84. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/datasets/loader.py +0 -0
  85. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/directory_utils.py +0 -0
  86. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/evaluation.py +0 -0
  87. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/__init__.py +0 -0
  88. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/event_bus.py +0 -0
  89. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/logger.py +0 -0
  90. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  91. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  92. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/execution/__init__.py +0 -0
  93. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/execution/pipeline.py +0 -0
  94. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/fireworks_rft.py +0 -0
  95. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/gcp_tools.py +0 -0
  96. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generation/cache.py +0 -0
  97. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generation/clients/base.py +0 -0
  98. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generation/clients.py +0 -0
  99. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/generic_server.py +0 -0
  100. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/get_pep440_version.py +0 -0
  101. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/human_id/__init__.py +0 -0
  102. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/human_id/dictionary.py +0 -0
  103. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/__init__.py +0 -0
  104. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/deepeval.py +0 -0
  105. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/openeval.py +0 -0
  106. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/integrations/trl.py +0 -0
  107. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/__init__.py +0 -0
  108. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  109. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  110. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  111. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  112. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/init.py +0 -0
  113. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/rollout_context.py +0 -0
  114. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  115. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/log_utils/util.py +0 -0
  116. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/logging_utils.py +0 -0
  117. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/__init__.py +0 -0
  118. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/adapter.py +0 -0
  119. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/client/__init__.py +0 -0
  120. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/client/connection.py +0 -0
  121. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/clients.py +0 -0
  122. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/__init__.py +0 -0
  123. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/base_policy.py +0 -0
  124. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/manager.py +0 -0
  125. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/execution/policy.py +0 -0
  126. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/grid_renderer.py +0 -0
  127. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  128. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/mcpgym.py +0 -0
  129. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/process_manager.py +0 -0
  130. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/session/__init__.py +0 -0
  131. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/session/manager.py +0 -0
  132. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/simple_process_manager.py +0 -0
  133. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp/simulation_server.py +0 -0
  134. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/__init__.py +0 -0
  135. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/config.py +0 -0
  136. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/main.py +0 -0
  137. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  138. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  139. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  140. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  141. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_env.py +0 -0
  142. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/__init__.py +0 -0
  143. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  144. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  145. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  146. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  147. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  148. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  149. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  150. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  151. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  152. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  153. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  154. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  155. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  156. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  157. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/packaging.py +0 -0
  158. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/platform_api.py +0 -0
  159. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/playback_policy.py +0 -0
  160. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/__init__.py +0 -0
  161. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  162. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/app.py +0 -0
  163. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  164. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  165. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  166. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/main.py +0 -0
  167. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/models.py +0 -0
  168. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  169. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/__init__.py +0 -0
  170. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  171. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  172. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  173. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  174. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  175. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  176. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  177. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  178. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  179. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/evaluation_test.py +0 -0
  180. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  181. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/execution.py +0 -0
  182. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  183. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  184. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  185. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/parameterize.py +0 -0
  186. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/plugin.py +0 -0
  187. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/rollout_processor.py +0 -0
  188. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/store_experiment_link.py +0 -0
  189. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/store_results_url.py +0 -0
  190. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/tracing_utils.py +0 -0
  191. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/types.py +0 -0
  192. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/pytest/validate_signature.py +0 -0
  193. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/__init__.py +0 -0
  194. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  195. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  196. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  197. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  198. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  199. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  200. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  201. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/llm_judge.py +0 -0
  202. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  203. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/quickstart/utils.py +0 -0
  204. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/resources.py +0 -0
  205. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/reward_function.py +0 -0
  206. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/__init__.py +0 -0
  207. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/accuracy.py +0 -0
  208. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/accuracy_length.py +0 -0
  209. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  210. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  211. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/apps_testing_util.py +0 -0
  212. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/bfcl_reward.py +0 -0
  213. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/code_execution.py +0 -0
  214. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/code_execution_utils.py +0 -0
  215. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/cpp_code.py +0 -0
  216. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  217. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/format.py +0 -0
  218. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/function_calling.py +0 -0
  219. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/json_schema.py +0 -0
  220. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/language_consistency.py +0 -0
  221. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/lean_prover.py +0 -0
  222. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/length.py +0 -0
  223. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  224. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/math.py +0 -0
  225. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  226. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/reasoning_steps.py +0 -0
  227. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/repetition.py +0 -0
  228. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rewards/tag_count.py +0 -0
  229. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/rl_processing.py +0 -0
  230. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/server.py +0 -0
  231. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/stats/__init__.py +0 -0
  232. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/stats/confidence_intervals.py +0 -0
  233. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/typed_interface.py +0 -0
  234. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/__init__.py +0 -0
  235. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/errors.py +0 -0
  236. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/remote_rollout_processor.py +0 -0
  237. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/types/types.py +0 -0
  238. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/__init__.py +0 -0
  239. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/batch_evaluation.py +0 -0
  240. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/batch_transformation.py +0 -0
  241. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/browser_utils.py +0 -0
  242. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/check_server_status.py +0 -0
  243. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/dataset_helpers.py +0 -0
  244. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  245. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/logs_models.py +0 -0
  246. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/logs_server.py +0 -0
  247. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/module_loader.py +0 -0
  248. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/packaging_utils.py +0 -0
  249. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/show_results_url.py +0 -0
  250. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/static_policy.py +0 -0
  251. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/subprocess_utils.py +0 -0
  252. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol/utils/vite_server.py +0 -0
  253. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/SOURCES.txt +0 -0
  254. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/dependency_links.txt +0 -0
  255. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/entry_points.txt +0 -0
  256. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/requires.txt +0 -0
  257. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/eval_protocol.egg-info/top_level.txt +0 -0
  258. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/pyproject.toml +0 -0
  259. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/setup.cfg +0 -0
  260. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/setup.py +0 -0
  261. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_accuracy.py +0 -0
  262. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_accuracy_length.py +0 -0
  263. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_adapters_e2e.py +0 -0
  264. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_agent_orchestrator.py +0 -0
  265. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_agent_resources.py +0 -0
  266. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_auth.py +0 -0
  267. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_batch_evaluation.py +0 -0
  268. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cli.py +0 -0
  269. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cli_agent.py +0 -0
  270. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cli_args.py +0 -0
  271. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_code_execution.py +0 -0
  272. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_config.py +0 -0
  273. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_control_plane_separation.py +0 -0
  274. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_cpp_code.py +0 -0
  275. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_data_driven_task_manager.py +0 -0
  276. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_deepcoder_reward.py +0 -0
  277. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_deepeval_integration.py +0 -0
  278. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_deploy_integration.py +0 -0
  279. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_directory_utils.py +0 -0
  280. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_e2b_integration.py +0 -0
  281. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_e2b_js_integration.py +0 -0
  282. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_edge_cases.py +0 -0
  283. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_ep_upload_e2e.py +0 -0
  284. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_eval_protocol_import.py +0 -0
  285. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation.py +0 -0
  286. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation_integration.py +0 -0
  287. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation_postprocess.py +0 -0
  288. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_evaluation_preview_integration.py +0 -0
  289. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_event_bus.py +0 -0
  290. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_event_bus_helper.py +0 -0
  291. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_examples_end_to_end.py +0 -0
  292. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_fireworks_api.py +0 -0
  293. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_format.py +0 -0
  294. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_fractional_code.py +0 -0
  295. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_function_calling.py +0 -0
  296. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_gcp_tools.py +0 -0
  297. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_generic_server.py +0 -0
  298. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_human_id.py +0 -0
  299. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_integration.py +0 -0
  300. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_json_schema.py +0 -0
  301. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_kwargs_validation.py +0 -0
  302. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_language_consistency.py +0 -0
  303. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_lean_prover.py +0 -0
  304. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_lean_prover_runner.py +0 -0
  305. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_length.py +0 -0
  306. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_list_comparison_math_reward.py +0 -0
  307. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_logs_server.py +0 -0
  308. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_logs_server_simple.py +0 -0
  309. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_math.py +0 -0
  310. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_minimal.py +0 -0
  311. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_models.py +0 -0
  312. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_models_rl.py +0 -0
  313. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_multiple_choice_math_reward.py +0 -0
  314. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_n_variant_batch_integration.py +0 -0
  315. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_n_variant_integration.py +0 -0
  316. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_openai_compatibility.py +0 -0
  317. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_openeval_integration.py +0 -0
  318. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_packaging.py +0 -0
  319. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_parallel_rollouts.py +0 -0
  320. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_platform_api.py +0 -0
  321. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_quickstart_utils.py +0 -0
  322. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_readiness.py +0 -0
  323. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_reasoning_steps.py +0 -0
  324. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_repetition.py +0 -0
  325. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_repetition_debug.py +0 -0
  326. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_retry_mechanism.py +0 -0
  327. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_reward_function.py +0 -0
  328. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_reward_protocol_import.py +0 -0
  329. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_rl_processing.py +0 -0
  330. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_rollout_control_plane_integration.py +0 -0
  331. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_server.py +0 -0
  332. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_show_results_url.py +0 -0
  333. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_status_migration_changes.py +0 -0
  334. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_status_migration_integration.py +0 -0
  335. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_status_model.py +0 -0
  336. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_tag_count.py +0 -0
  337. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_tau_bench_airline_smoke.py +0 -0
  338. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_typed_interface.py +0 -0
  339. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_typed_interface_rl.py +0 -0
  340. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_upload_entrypoint.py +0 -0
  341. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_url_handling.py +0 -0
  342. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/tests/test_vite_server.py +0 -0
  343. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/__init__.py +0 -0
  344. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/agent/__init__.py +0 -0
  345. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/agent/base.py +0 -0
  346. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/agent/llm_agent.py +0 -0
  347. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/__init__.py +0 -0
  348. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/api_config.py +0 -0
  349. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/data_model.py +0 -0
  350. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/api_service/simulation_service.py +0 -0
  351. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/cli.py +0 -0
  352. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/config.py +0 -0
  353. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/airline/policy.md +0 -0
  354. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/mock/policy.md +0 -0
  355. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  356. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/retail/policy.md +0 -0
  357. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  358. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  359. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  360. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  361. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  362. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  363. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  364. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/__init__.py +0 -0
  365. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/message.py +0 -0
  366. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/simulation.py +0 -0
  367. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/data_model/tasks.py +0 -0
  368. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/__init__.py +0 -0
  369. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/__init__.py +0 -0
  370. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/data_model.py +0 -0
  371. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/environment.py +0 -0
  372. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/tools.py +0 -0
  373. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/airline/utils.py +0 -0
  374. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/__init__.py +0 -0
  375. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/data_model.py +0 -0
  376. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/environment.py +0 -0
  377. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/tools.py +0 -0
  378. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/mock/utils.py +0 -0
  379. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/__init__.py +0 -0
  380. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/data_model.py +0 -0
  381. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/environment.py +0 -0
  382. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/tools.py +0 -0
  383. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/retail/utils.py +0 -0
  384. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/__init__.py +0 -0
  385. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/data_model.py +0 -0
  386. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/environment.py +0 -0
  387. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  388. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  389. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  390. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  391. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  392. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  393. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  394. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  395. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/tools.py +0 -0
  396. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  397. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  398. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/domains/telecom/utils.py +0 -0
  399. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/__init__.py +0 -0
  400. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/db.py +0 -0
  401. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/environment.py +0 -0
  402. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/server.py +0 -0
  403. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/tool.py +0 -0
  404. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/toolkit.py +0 -0
  405. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  406. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/__init__.py +0 -0
  407. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator.py +0 -0
  408. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  409. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  410. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  411. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  412. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  413. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/metrics/__init__.py +0 -0
  414. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/metrics/agent_metrics.py +0 -0
  415. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  416. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/__init__.py +0 -0
  417. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  418. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  419. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/orchestrator/utils.py +0 -0
  420. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/registry.py +0 -0
  421. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/run.py +0 -0
  422. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/__init__.py +0 -0
  423. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/check_data.py +0 -0
  424. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  425. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/start_servers.py +0 -0
  426. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/scripts/view_simulations.py +0 -0
  427. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/user/__init__.py +0 -0
  428. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/user/base.py +0 -0
  429. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/user/user_simulator.py +0 -0
  430. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/__init__.py +0 -0
  431. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/display.py +0 -0
  432. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/io_utils.py +0 -0
  433. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/llm_utils.py +0 -0
  434. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/pydantic_utils.py +0 -0
  435. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vendor/tau2/utils/utils.py +0 -0
  436. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/versioneer.py +0 -0
  437. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  438. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
  439. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
  440. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
  441. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  442. {eval_protocol-0.2.71 → eval_protocol-0.2.72}/vite-app/dist/index.html +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.71
3
+ Version: 0.2.72
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-10-29T17:44:43-0700",
11
+ "date": "2025-10-30T03:43:20-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "4c855e76a696143f7d7b40b40d336f3f7a76ffcf",
15
- "version": "0.2.71"
14
+ "full-revisionid": "a71074ec111c9321e5cb2e8366dbb56504f2fc3a",
15
+ "version": "0.2.72"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -160,12 +160,13 @@ STATUS_CODE_TO_EXCEPTION = {
160
160
  }
161
161
 
162
162
 
163
- def exception_for_status_code(code: int) -> Optional[EvalProtocolError]:
163
+ def exception_for_status_code(code: int, message: str = "") -> Optional[EvalProtocolError]:
164
164
  """
165
165
  Create an exception instance for a given status code.
166
166
 
167
167
  Args:
168
168
  code: Status code from Status.Code enum
169
+ message: Optional error message to include in the exception
169
170
 
170
171
  Returns:
171
172
  Exception instance or None if code is OK (0)
@@ -173,4 +174,4 @@ def exception_for_status_code(code: int) -> Optional[EvalProtocolError]:
173
174
  exception_class = STATUS_CODE_TO_EXCEPTION.get(code)
174
175
  if exception_class is None:
175
176
  return None
176
- return exception_class()
177
+ return exception_class(message) if message else exception_class()
@@ -21,9 +21,6 @@ from eval_protocol.human_id import generate_id
21
21
  from eval_protocol.types import TerminationReason
22
22
 
23
23
 
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
24
  class ErrorInfo(BaseModel):
28
25
  """
29
26
  AIP-193 ErrorInfo model for structured error details.
@@ -312,6 +309,11 @@ class Status(BaseModel):
312
309
  """Create a status indicating the rollout failed with an internal error."""
313
310
  return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info))
314
311
 
312
+ @classmethod
313
+ def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
314
+ """Create a status indicating an internal error occurred."""
315
+ return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
316
+
315
317
  # For backwards compatibility
316
318
  @classmethod
317
319
  def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status":
@@ -319,8 +321,8 @@ class Status(BaseModel):
319
321
  return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info))
320
322
 
321
323
  @classmethod
322
- def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
323
- """Create a status indicating an internal error occurred."""
324
+ def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status":
325
+ """Create a status indicating an error occurred."""
324
326
  return cls(code=cls.Code.INTERNAL, message=error_message, details=details or [])
325
327
 
326
328
  # UNAVAILABLE = 14
@@ -398,7 +398,7 @@ async def rollout_processor_with_retry(
398
398
  else:
399
399
  # Non-retryable exception - fail immediately
400
400
  logging.error(f"❌ Rollout failed (non-retryable error encountered): {repr(e)}")
401
- row.rollout_status = Status.rollout_error(repr(e))
401
+ row.rollout_status = Status.rollout_error(str(e))
402
402
  return row
403
403
 
404
404
  async def execute_row_with_backoff_and_log(
@@ -36,7 +36,6 @@ DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = {
36
36
  litellm.exceptions.InternalServerError,
37
37
  litellm.exceptions.Timeout,
38
38
  litellm.exceptions.NotFoundError,
39
- # litellm.exceptions.BadRequestError, # remove this once we have a long term solution
40
39
  litellm.exceptions.ServiceUnavailableError,
41
40
  litellm.exceptions.APIError,
42
41
  # Eval Protocol exceptions
@@ -94,7 +94,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
94
94
  def _post_init() -> None:
95
95
  url = f"{remote_base_url}/init"
96
96
  try:
97
- r = requests.post(url, json=init_payload.model_dump(), timeout=30)
97
+ r = requests.post(url, json=init_payload.model_dump(), timeout=300)
98
98
  r.raise_for_status()
99
99
  except requests.exceptions.Timeout:
100
100
  raise TimeoutError(
@@ -133,9 +133,9 @@ class RemoteRolloutProcessor(RolloutProcessor):
133
133
  # For all other exceptions, raise them
134
134
  raise
135
135
 
136
- # Search Fireworks tracing logs for completion
137
- completed_logs = self._tracing_adapter.search_logs(
138
- tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
136
+ # Search Fireworks tracing logs for completion (run in thread to avoid blocking event loop)
137
+ completed_logs = await asyncio.to_thread(
138
+ self._tracing_adapter.search_logs, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
139
139
  )
140
140
  # Filter for logs that actually have status information
141
141
  status_logs = []
@@ -161,8 +161,8 @@ class RemoteRolloutProcessor(RolloutProcessor):
161
161
  f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}"
162
162
  )
163
163
 
164
- # Create and raise exception if appropriate
165
- exception = exception_for_status_code(status_code)
164
+ # Create and raise exception if appropriate, preserving original message
165
+ exception = exception_for_status_code(status_code, status_message)
166
166
  if exception is not None:
167
167
  raise exception
168
168
 
@@ -12,6 +12,7 @@ import sys
12
12
  import asyncio
13
13
  from flask import Flask, request, jsonify
14
14
  from openai import OpenAI
15
+ import openai
15
16
  from dotenv import load_dotenv
16
17
 
17
18
  from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter
@@ -49,56 +50,80 @@ root_logger.addHandler(FireworksTracingHttpHandler())
49
50
  app = Flask(__name__)
50
51
 
51
52
 
52
- async def execute_rollout_background(req, api_key):
53
+ async def execute_rollout_background(req: InitRequest, api_key: str):
53
54
  """Execute the OpenAI completion in background and log results"""
54
55
  # Attach rollout_id filter to logger
55
56
  logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
56
57
  logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
57
58
 
58
- try:
59
- model = req.completion_params.get("model")
60
- # Uncomment if you need to strip fireworks_ai/ prefix
61
- # if model and isinstance(model, str) and model.startswith("fireworks_ai/"):
62
- # model = model[len("fireworks_ai/"):]
63
-
64
- # Prepare completion arguments
65
- completion_kwargs = {
66
- "messages": req.messages,
67
- # "messages": [{"role": "user", "content": "Hello, how are you?"}],
68
- "model": model,
69
- "temperature": req.completion_params.get("temperature"),
70
- "max_tokens": req.completion_params.get("max_tokens"),
71
- }
59
+ model = req.completion_params.get("model")
60
+ # Uncomment if you need to strip fireworks_ai/ prefix
61
+ # if model and isinstance(model, str) and model.startswith("fireworks_ai/"):
62
+ # model = model[len("fireworks_ai/"):]
63
+
64
+ # Prepare completion arguments
65
+ completion_kwargs = {
66
+ "messages": req.messages,
67
+ # "messages": [{"role": "user", "content": "Hello, how are you?"}],
68
+ "model": model,
69
+ "temperature": req.completion_params.get("temperature"),
70
+ "max_tokens": req.completion_params.get("max_tokens"),
71
+ }
72
+
73
+ # Add tools if present
74
+ if req.tools:
75
+ completion_kwargs["tools"] = req.tools
76
+
77
+ logger.info(
78
+ f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}"
79
+ )
72
80
 
73
- # Add tools if present
74
- if req.tools:
75
- completion_kwargs["tools"] = req.tools
81
+ # Create AsyncOpenAI client
82
+ # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key)
83
+ client = OpenAI(base_url=req.model_base_url, api_key=api_key)
76
84
 
77
- logger.info(
78
- f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}"
79
- )
85
+ logger.info(f"Sending completion request to model {model}")
80
86
 
81
- # Create AsyncOpenAI client
82
- # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key)
83
- client = OpenAI(base_url=req.model_base_url, api_key=api_key)
87
+ # Make the async model call with timeout
88
+ import time
84
89
 
85
- logger.info(f"Sending completion request to model {model}")
90
+ logger.info(f"timing start: {time.time()}")
86
91
 
87
- # Make the async model call with timeout
88
- import time
89
-
90
- logger.info(f"timing start: {time.time()}")
92
+ try:
91
93
  completion = client.chat.completions.create(**completion_kwargs)
92
- logger.info(f"Completed response: {completion}")
93
- logger.info(f"timing end: {time.time()}")
94
- # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR
95
- logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
96
-
94
+ except (
95
+ openai.AuthenticationError,
96
+ openai.PermissionDeniedError,
97
+ ) as e:
98
+ # These errors should be logged and will be retried by RemoteRolloutProcessor
99
+ logger.error(
100
+ f"Rollout {req.metadata.rollout_id} failed: {e}",
101
+ extra={"status": Status.rollout_permission_denied_error(str(e))},
102
+ )
103
+ return
104
+ except openai.NotFoundError as e:
105
+ logger.error(
106
+ f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_not_found_error(str(e))}
107
+ )
108
+ return
109
+ except openai.RateLimitError as e:
110
+ logger.error(
111
+ f"Rollout {req.metadata.rollout_id} failed: {e}",
112
+ extra={"status": Status.rollout_resource_exhausted_error(str(e))},
113
+ )
114
+ return
97
115
  except Exception as e:
98
- # Log error with structured status - THIS IS WHAT RemoteRolloutProcessor POLLS FOR
116
+ # Non-OpenAI errors (shouldn't normally happen but catch anyway)
99
117
  logger.error(
100
- f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_error_from_exception(e)}
118
+ f"Rollout {req.metadata.rollout_id} failed with unexpected error: {e}",
119
+ extra={"status": Status.rollout_internal_error(str(e))},
101
120
  )
121
+ return
122
+
123
+ logger.info(f"Completed response: {completion}")
124
+ logger.info(f"timing end: {time.time()}")
125
+ # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR
126
+ logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
102
127
 
103
128
 
104
129
  @app.route("/init", methods=["POST"])
@@ -114,7 +139,7 @@ async def init():
114
139
  # Validate required fields
115
140
  if not req.messages:
116
141
  error_msg = "messages is required"
117
- logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
142
+ logger.error(error_msg, extra={"status": Status.rollout_internal_error(error_msg)})
118
143
  return jsonify({"error": error_msg}), 400
119
144
 
120
145
  # Get API key (prefer request api_key, fallback to environment)
@@ -126,7 +151,7 @@ async def init():
126
151
  api_key = os.environ.get("FIREWORKS_API_KEY")
127
152
  else:
128
153
  error_msg = "API key not provided in request or environment variable"
129
- logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
154
+ logger.error(error_msg, extra={"status": Status.rollout_internal_error(error_msg)})
130
155
  return jsonify({"error": error_msg}), 401
131
156
 
132
157
  # 🔥 FIRE: Return immediately with acceptance (within 30s requirement)
@@ -137,7 +162,7 @@ async def init():
137
162
  }
138
163
 
139
164
  # Fire and forget: Execute rollout asynchronously
140
- asyncio.create_task(execute_rollout_background(req, api_key))
165
+ asyncio.create_task(execute_rollout_background(req, api_key or ""))
141
166
 
142
167
  return jsonify(response_data), 200
143
168
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.71
3
+ Version: 0.2.72
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -348,3 +348,24 @@ def test_integration_with_retry_logic():
348
348
  assert exception_class in DEFAULT_RETRYABLE_EXCEPTIONS, (
349
349
  f"{exception_class.__name__} should be in DEFAULT_RETRYABLE_EXCEPTIONS for retry support"
350
350
  )
351
+
352
+
353
+ def test_exception_message_preservation():
354
+ """Test that error messages are properly preserved in exceptions."""
355
+ test_cases = [
356
+ (13, "test error", InternalError),
357
+ (5, "Model xyz not found", NotFoundError),
358
+ (7, "Invalid API key", PermissionDeniedError),
359
+ ]
360
+
361
+ for status_code, message, expected_exception_class in test_cases:
362
+ # Test with message
363
+ exception = exception_for_status_code(status_code, message)
364
+ assert exception is not None
365
+ assert isinstance(exception, expected_exception_class)
366
+ assert str(exception) == message, f"Exception should preserve message '{message}'"
367
+
368
+ # Test without message (should still work)
369
+ exception_no_msg = exception_for_status_code(status_code)
370
+ assert exception_no_msg is not None
371
+ assert isinstance(exception_no_msg, expected_exception_class)
File without changes
File without changes