eval-protocol 0.2.94.dev3__tar.gz → 0.2.95__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (464) hide show
  1. {eval_protocol-0.2.94.dev3/eval_protocol.egg-info → eval_protocol-0.2.95}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_aime25.py +2 -5
  4. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_gpqa.py +2 -5
  5. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_livebench_data_analysis.py +2 -5
  6. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/execution/base_policy.py +7 -0
  7. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/execution/policy.py +8 -8
  8. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/models.py +1 -36
  9. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_agent_rollout_processor.py +3 -8
  10. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_single_turn_rollout_process.py +6 -10
  11. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/accuracy.py +3 -13
  12. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/json_schema.py +3 -11
  13. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/language_consistency.py +3 -13
  14. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/repetition.py +3 -13
  15. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/tag_count.py +3 -13
  16. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95/eval_protocol.egg-info}/PKG-INFO +1 -1
  17. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol.egg-info/SOURCES.txt +4 -3
  18. eval_protocol-0.2.95/tests/test_litellm_policy_provider_fields.py +95 -0
  19. eval_protocol-0.2.95/vite-app/dist/assets/index-BIhepl19.css +1 -0
  20. eval_protocol-0.2.95/vite-app/dist/assets/index-DaovgarD.js +137 -0
  21. eval_protocol-0.2.95/vite-app/dist/assets/index-DaovgarD.js.map +1 -0
  22. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vite-app/dist/index.html +2 -2
  23. eval_protocol-0.2.94.dev3/vite-app/dist/assets/index-CuQbfdPD.js +0 -46
  24. eval_protocol-0.2.94.dev3/vite-app/dist/assets/index-CuQbfdPD.js.map +0 -1
  25. eval_protocol-0.2.94.dev3/vite-app/dist/assets/index-iZp_HgyW.css +0 -1
  26. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/LICENSE +0 -0
  27. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/README.md +0 -0
  28. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/development/__init__.py +0 -0
  29. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/development/normalize_sandbox_fusion.py +0 -0
  30. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/development/utils/__init__.py +0 -0
  31. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/development/utils/generate_api_key.py +0 -0
  32. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/development/utils/subprocess_manager.py +0 -0
  33. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/__init__.py +0 -0
  34. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/__main__.py +0 -0
  35. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/__init__.py +0 -0
  36. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/base.py +0 -0
  37. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/bigquery.py +0 -0
  38. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/braintrust.py +0 -0
  39. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/fireworks_tracing.py +0 -0
  40. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/huggingface.py +0 -0
  41. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/langchain.py +0 -0
  42. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/langfuse.py +0 -0
  43. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/langsmith.py +0 -0
  44. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/openai_responses.py +0 -0
  45. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/trl.py +0 -0
  46. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/utils.py +0 -0
  47. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/adapters/weave.py +0 -0
  48. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/__init__.py +0 -0
  49. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/models.py +0 -0
  50. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/orchestrator.py +0 -0
  51. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resource_abc.py +0 -0
  52. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resource_pool.py +0 -0
  53. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/__init__.py +0 -0
  54. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  55. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  56. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  57. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  58. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  59. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/docker_resource.py +0 -0
  60. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  61. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  62. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/resources/sql_resource.py +0 -0
  63. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/task_manager.py +0 -0
  64. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/agent/tool_registry.py +0 -0
  65. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/auth.py +0 -0
  66. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/__init__.py +0 -0
  67. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  68. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  69. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  70. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
  71. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  72. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  73. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli.py +0 -0
  74. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/__init__.py +0 -0
  75. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  76. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/common.py +0 -0
  77. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/create_rft.py +0 -0
  78. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/deploy.py +0 -0
  79. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  80. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/local_test.py +0 -0
  81. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/logs.py +0 -0
  82. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/preview.py +0 -0
  83. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  84. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/upload.py +0 -0
  85. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/cli_commands/utils.py +0 -0
  86. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/common_utils.py +0 -0
  87. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/config.py +0 -0
  88. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/data_loader/__init__.py +0 -0
  89. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  90. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  91. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  92. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
  93. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/data_loader/models.py +0 -0
  94. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/dataset_logger/__init__.py +0 -0
  95. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  96. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  97. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  98. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  99. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/datasets/__init__.py +0 -0
  100. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/datasets/loader.py +0 -0
  101. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/directory_utils.py +0 -0
  102. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/evaluation.py +0 -0
  103. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/event_bus/__init__.py +0 -0
  104. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/event_bus/event_bus.py +0 -0
  105. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/event_bus/logger.py +0 -0
  106. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  107. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  108. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/exceptions.py +0 -0
  109. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/execution/__init__.py +0 -0
  110. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/execution/pipeline.py +0 -0
  111. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/fireworks_rft.py +0 -0
  112. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/gcp_tools.py +0 -0
  113. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/generation/cache.py +0 -0
  114. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/generation/clients/base.py +0 -0
  115. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/generation/clients.py +0 -0
  116. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/generic_server.py +0 -0
  117. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/get_pep440_version.py +0 -0
  118. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/human_id/__init__.py +0 -0
  119. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/human_id/dictionary.py +0 -0
  120. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/__init__.py +0 -0
  121. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/deepeval.py +0 -0
  122. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/openai_rft.py +0 -0
  123. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/openeval.py +0 -0
  124. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/tinker_cookbook.py +0 -0
  125. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
  126. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/integrations/trl.py +0 -0
  127. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/__init__.py +0 -0
  128. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  129. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  130. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  131. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  132. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/init.py +0 -0
  133. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/rollout_context.py +0 -0
  134. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  135. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/log_utils/util.py +0 -0
  136. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/logging_utils.py +0 -0
  137. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/__init__.py +0 -0
  138. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/adapter.py +0 -0
  139. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/client/__init__.py +0 -0
  140. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/client/connection.py +0 -0
  141. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/clients.py +0 -0
  142. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/execution/__init__.py +0 -0
  143. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/execution/manager.py +0 -0
  144. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
  145. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/grid_renderer.py +0 -0
  146. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  147. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/mcpgym.py +0 -0
  148. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/process_manager.py +0 -0
  149. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/session/__init__.py +0 -0
  150. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/session/manager.py +0 -0
  151. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/simple_process_manager.py +0 -0
  152. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp/simulation_server.py +0 -0
  153. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/__init__.py +0 -0
  154. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/config.py +0 -0
  155. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/main.py +0 -0
  156. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  157. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  158. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  159. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  160. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_env.py +0 -0
  161. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/__init__.py +0 -0
  162. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  163. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  164. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  165. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  166. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  167. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  168. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  169. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  170. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  171. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  172. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  173. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  174. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  175. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  176. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/packaging.py +0 -0
  177. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/platform_api.py +0 -0
  178. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/playback_policy.py +0 -0
  179. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/__init__.py +0 -0
  180. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  181. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/app.py +0 -0
  182. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  183. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  184. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  185. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/main.py +0 -0
  186. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/models.py +0 -0
  187. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  188. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/__init__.py +0 -0
  189. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  190. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  191. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  192. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  193. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  194. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  195. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  196. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/evaluation_test.py +0 -0
  197. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  198. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  199. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/exception_config.py +0 -0
  200. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/execution.py +0 -0
  201. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  202. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  203. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  204. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
  205. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
  206. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/parameterize.py +0 -0
  207. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/plugin.py +0 -0
  208. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
  209. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/rollout_processor.py +0 -0
  210. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
  211. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/store_experiment_link.py +0 -0
  212. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/store_results_url.py +0 -0
  213. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/tracing_utils.py +0 -0
  214. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/types.py +0 -0
  215. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/pytest/validate_signature.py +0 -0
  216. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/__init__.py +0 -0
  217. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  218. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  219. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  220. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  221. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  222. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  223. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  224. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/llm_judge.py +0 -0
  225. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  226. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
  227. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
  228. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  229. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/quickstart/utils.py +0 -0
  230. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/resources.py +0 -0
  231. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/reward_function.py +0 -0
  232. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/__init__.py +0 -0
  233. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/accuracy_length.py +0 -0
  234. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  235. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  236. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/apps_testing_util.py +0 -0
  237. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/bfcl_reward.py +0 -0
  238. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/code_execution.py +0 -0
  239. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/code_execution_utils.py +0 -0
  240. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/cpp_code.py +0 -0
  241. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  242. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/format.py +0 -0
  243. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/function_calling.py +0 -0
  244. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/lean_prover.py +0 -0
  245. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/length.py +0 -0
  246. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  247. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/math.py +0 -0
  248. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  249. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rewards/reasoning_steps.py +0 -0
  250. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/rl_processing.py +0 -0
  251. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/server.py +0 -0
  252. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/stats/__init__.py +0 -0
  253. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/stats/confidence_intervals.py +0 -0
  254. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/typed_interface.py +0 -0
  255. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/types/__init__.py +0 -0
  256. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/types/errors.py +0 -0
  257. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/types/remote_rollout_processor.py +0 -0
  258. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/types/types.py +0 -0
  259. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/__init__.py +0 -0
  260. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/batch_evaluation.py +0 -0
  261. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/batch_transformation.py +0 -0
  262. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/browser_utils.py +0 -0
  263. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/check_server_status.py +0 -0
  264. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/dataset_helpers.py +0 -0
  265. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  266. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/logs_models.py +0 -0
  267. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/logs_server.py +0 -0
  268. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/module_loader.py +0 -0
  269. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/packaging_utils.py +0 -0
  270. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/show_results_url.py +0 -0
  271. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/static_policy.py +0 -0
  272. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/subprocess_utils.py +0 -0
  273. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol/utils/vite_server.py +0 -0
  274. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol.egg-info/dependency_links.txt +0 -0
  275. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol.egg-info/entry_points.txt +0 -0
  276. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol.egg-info/requires.txt +0 -0
  277. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/eval_protocol.egg-info/top_level.txt +0 -0
  278. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/pyproject.toml +0 -0
  279. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/setup.cfg +0 -0
  280. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/setup.py +0 -0
  281. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_accuracy.py +0 -0
  282. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_accuracy_length.py +0 -0
  283. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_adapters_e2e.py +0 -0
  284. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_agent_orchestrator.py +0 -0
  285. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_agent_resources.py +0 -0
  286. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_auth.py +0 -0
  287. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_batch_evaluation.py +0 -0
  288. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_cli.py +0 -0
  289. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_cli_agent.py +0 -0
  290. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_cli_args.py +0 -0
  291. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_cli_create_rft.py +0 -0
  292. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_cli_local_test.py +0 -0
  293. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_code_execution.py +0 -0
  294. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_config.py +0 -0
  295. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_control_plane_separation.py +0 -0
  296. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_cpp_code.py +0 -0
  297. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_data_driven_task_manager.py +0 -0
  298. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_deepcoder_reward.py +0 -0
  299. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_deepeval_integration.py +0 -0
  300. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_deploy_integration.py +0 -0
  301. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_directory_utils.py +0 -0
  302. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_e2b_integration.py +0 -0
  303. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_e2b_js_integration.py +0 -0
  304. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_edge_cases.py +0 -0
  305. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_ep_upload_e2e.py +0 -0
  306. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_eval_protocol_import.py +0 -0
  307. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_evaluation.py +0 -0
  308. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_evaluation_integration.py +0 -0
  309. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_evaluation_postprocess.py +0 -0
  310. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_evaluation_preview_integration.py +0 -0
  311. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_event_bus.py +0 -0
  312. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_event_bus_helper.py +0 -0
  313. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_examples_end_to_end.py +0 -0
  314. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_exception_config.py +0 -0
  315. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_exceptions.py +0 -0
  316. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_fireworks_api.py +0 -0
  317. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_format.py +0 -0
  318. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_fractional_code.py +0 -0
  319. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_function_calling.py +0 -0
  320. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_gcp_tools.py +0 -0
  321. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_generic_server.py +0 -0
  322. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_human_id.py +0 -0
  323. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_integration.py +0 -0
  324. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_json_schema.py +0 -0
  325. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_kwargs_validation.py +0 -0
  326. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_language_consistency.py +0 -0
  327. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_lean_prover.py +0 -0
  328. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_lean_prover_runner.py +0 -0
  329. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_length.py +0 -0
  330. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_list_comparison_math_reward.py +0 -0
  331. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_logs_server.py +0 -0
  332. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_logs_server_simple.py +0 -0
  333. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_math.py +0 -0
  334. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_message_field_filtering.py +0 -0
  335. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_minimal.py +0 -0
  336. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_models.py +0 -0
  337. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_models_rl.py +0 -0
  338. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_multiple_choice_math_reward.py +0 -0
  339. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_n_variant_batch_integration.py +0 -0
  340. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_n_variant_integration.py +0 -0
  341. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_openai_compatibility.py +0 -0
  342. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_openai_rft_integration.py +0 -0
  343. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_openeval_integration.py +0 -0
  344. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_packaging.py +0 -0
  345. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_parallel_rollouts.py +0 -0
  346. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_platform_api.py +0 -0
  347. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_quickstart_utils.py +0 -0
  348. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_readiness.py +0 -0
  349. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_reasoning_steps.py +0 -0
  350. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_repetition.py +0 -0
  351. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_repetition_debug.py +0 -0
  352. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_retry_mechanism.py +0 -0
  353. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_reward_function.py +0 -0
  354. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_reward_protocol_import.py +0 -0
  355. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_rl_processing.py +0 -0
  356. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_rollout_control_plane_integration.py +0 -0
  357. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_server.py +0 -0
  358. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_show_results_url.py +0 -0
  359. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_status_migration_changes.py +0 -0
  360. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_status_migration_integration.py +0 -0
  361. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_status_model.py +0 -0
  362. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_tag_count.py +0 -0
  363. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_tau_bench_airline_smoke.py +0 -0
  364. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_typed_interface.py +0 -0
  365. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_typed_interface_rl.py +0 -0
  366. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_upload_entrypoint.py +0 -0
  367. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_url_handling.py +0 -0
  368. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/tests/test_vite_server.py +0 -0
  369. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/__init__.py +0 -0
  370. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/agent/__init__.py +0 -0
  371. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/agent/base.py +0 -0
  372. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/agent/llm_agent.py +0 -0
  373. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/api_service/__init__.py +0 -0
  374. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/api_service/api_config.py +0 -0
  375. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/api_service/data_model.py +0 -0
  376. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/api_service/simulation_service.py +0 -0
  377. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/cli.py +0 -0
  378. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/config.py +0 -0
  379. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/airline/policy.md +0 -0
  380. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/mock/policy.md +0 -0
  381. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  382. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/retail/policy.md +0 -0
  383. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  384. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  385. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  386. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  387. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  388. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  389. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  390. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data_model/__init__.py +0 -0
  391. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data_model/message.py +0 -0
  392. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data_model/simulation.py +0 -0
  393. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/data_model/tasks.py +0 -0
  394. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/__init__.py +0 -0
  395. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/airline/__init__.py +0 -0
  396. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/airline/data_model.py +0 -0
  397. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/airline/environment.py +0 -0
  398. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/airline/tools.py +0 -0
  399. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/airline/utils.py +0 -0
  400. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/mock/__init__.py +0 -0
  401. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/mock/data_model.py +0 -0
  402. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/mock/environment.py +0 -0
  403. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/mock/tools.py +0 -0
  404. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/mock/utils.py +0 -0
  405. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/retail/__init__.py +0 -0
  406. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/retail/data_model.py +0 -0
  407. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/retail/environment.py +0 -0
  408. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/retail/tools.py +0 -0
  409. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/retail/utils.py +0 -0
  410. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/__init__.py +0 -0
  411. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/data_model.py +0 -0
  412. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/environment.py +0 -0
  413. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  414. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  415. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  416. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  417. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  418. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  419. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  420. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  421. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/tools.py +0 -0
  422. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  423. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  424. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/domains/telecom/utils.py +0 -0
  425. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/__init__.py +0 -0
  426. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/db.py +0 -0
  427. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/environment.py +0 -0
  428. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/server.py +0 -0
  429. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/tool.py +0 -0
  430. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/toolkit.py +0 -0
  431. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  432. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/__init__.py +0 -0
  433. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/evaluator.py +0 -0
  434. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  435. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  436. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  437. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  438. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  439. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/metrics/__init__.py +0 -0
  440. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/metrics/agent_metrics.py +0 -0
  441. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  442. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/orchestrator/__init__.py +0 -0
  443. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  444. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  445. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/orchestrator/utils.py +0 -0
  446. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/registry.py +0 -0
  447. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/run.py +0 -0
  448. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/scripts/__init__.py +0 -0
  449. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/scripts/check_data.py +0 -0
  450. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  451. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/scripts/start_servers.py +0 -0
  452. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/scripts/view_simulations.py +0 -0
  453. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/user/__init__.py +0 -0
  454. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/user/base.py +0 -0
  455. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/user/user_simulator.py +0 -0
  456. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/utils/__init__.py +0 -0
  457. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/utils/display.py +0 -0
  458. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/utils/io_utils.py +0 -0
  459. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/utils/llm_utils.py +0 -0
  460. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/utils/pydantic_utils.py +0 -0
  461. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vendor/tau2/utils/utils.py +0 -0
  462. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/versioneer.py +0 -0
  463. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  464. {eval_protocol-0.2.94.dev3 → eval_protocol-0.2.95}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.94.dev3
3
+ Version: 0.2.95
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-12-02T21:46:38-0800",
11
+ "date": "2025-12-03T00:48:40-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "01bc8e998a3a0370fc0631d39b4fbd8b4b5c7941",
15
- "version": "0.2.94-dev.3"
14
+ "full-revisionid": "16ffbac0a2077b398cf59efb1421e4a276623bfb",
15
+ "version": "0.2.95"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -5,7 +5,6 @@ from eval_protocol.models import (
5
5
  EvaluationRow,
6
6
  Message,
7
7
  MetricResult,
8
- ChatCompletionContentPartParam,
9
8
  ChatCompletionContentPartTextParam,
10
9
  )
11
10
  from eval_protocol.pytest.default_single_turn_rollout_process import (
@@ -19,12 +18,10 @@ SYSTEM_PROMPT = (
19
18
 
20
19
 
21
20
  def _coerce_content_to_str(
22
- content: str | list[ChatCompletionContentPartParam] | None,
21
+ content: str | list[ChatCompletionContentPartTextParam] | None,
23
22
  ) -> str:
24
23
  if isinstance(content, list):
25
- return "".join(
26
- getattr(p, "text", str(p)) if isinstance(p, ChatCompletionContentPartTextParam) else "" for p in content
27
- )
24
+ return "".join([getattr(p, "text", str(p)) for p in content])
28
25
  return str(content or "")
29
26
 
30
27
 
@@ -10,7 +10,6 @@ from eval_protocol.models import (
10
10
  EvaluationRow,
11
11
  Message,
12
12
  MetricResult,
13
- ChatCompletionContentPartParam,
14
13
  ChatCompletionContentPartTextParam,
15
14
  )
16
15
  from eval_protocol.pytest.default_single_turn_rollout_process import (
@@ -55,12 +54,10 @@ def _load_gpqa_messages_from_csv() -> list[list[list[Message]]]:
55
54
 
56
55
 
57
56
  def _coerce_content_to_str(
58
- content: str | list[ChatCompletionContentPartParam] | None,
57
+ content: str | list[ChatCompletionContentPartTextParam] | None,
59
58
  ) -> str:
60
59
  if isinstance(content, list):
61
- return "".join(
62
- getattr(p, "text", str(p)) if isinstance(p, ChatCompletionContentPartTextParam) else "" for p in content
63
- )
60
+ return "".join([getattr(p, "text", str(p)) for p in content])
64
61
  return str(content or "")
65
62
 
66
63
 
@@ -8,7 +8,6 @@ from eval_protocol.models import (
8
8
  EvaluationRow,
9
9
  Message,
10
10
  MetricResult,
11
- ChatCompletionContentPartParam,
12
11
  ChatCompletionContentPartTextParam,
13
12
  )
14
13
  from eval_protocol.pytest.default_single_turn_rollout_process import (
@@ -38,11 +37,9 @@ def _extract_last_boxed_segment(text: str) -> Optional[str]:
38
37
  return matches[-1]
39
38
 
40
39
 
41
- def _coerce_content_to_str(content: str | list[ChatCompletionContentPartParam] | None) -> str:
40
+ def _coerce_content_to_str(content: str | list[ChatCompletionContentPartTextParam] | None) -> str:
42
41
  if isinstance(content, list):
43
- return "".join(
44
- getattr(p, "text", str(p)) if isinstance(p, ChatCompletionContentPartTextParam) else "" for p in content
45
- )
42
+ return "".join([getattr(p, "text", str(p)) for p in content])
46
43
  return str(content or "")
47
44
 
48
45
 
@@ -199,6 +199,13 @@ class LLMBasePolicy(PlaybackPolicyBase, ABC):
199
199
  if message.get("tool_calls"):
200
200
  assistant_message_for_history["tool_calls"] = message["tool_calls"]
201
201
 
202
+ # Preserve specific fields from provider_specific_fields if present
203
+ if message.get("provider_specific_fields"):
204
+ if message["provider_specific_fields"].get("reasoning_details"):
205
+ assistant_message_for_history["reasoning_details"] = message["provider_specific_fields"][
206
+ "reasoning_details"
207
+ ]
208
+
202
209
  # Add to actual conversation history
203
210
  conversation_history.append(assistant_message_for_history)
204
211
 
@@ -146,7 +146,7 @@ class LiteLLMPolicy(LLMBasePolicy):
146
146
  Clean messages with only OpenAI API compatible fields
147
147
  """
148
148
  # Standard OpenAI message fields
149
- allowed_fields = {"role", "content", "tool_calls", "tool_call_id", "name"}
149
+ allowed_fields = {"role", "content", "tool_calls", "tool_call_id", "name", "reasoning_details"}
150
150
 
151
151
  clean_messages = []
152
152
  for msg in messages:
@@ -217,12 +217,15 @@ class LiteLLMPolicy(LLMBasePolicy):
217
217
  logger.debug(f"🔄 API call for model: {self.model_id}")
218
218
 
219
219
  # LiteLLM already returns OpenAI-compatible format
220
+ message_obj = getattr(response.choices[0], "message", object())
221
+
220
222
  return {
221
223
  "choices": [
222
224
  {
223
225
  "message": {
224
- "role": getattr(getattr(response.choices[0], "message", object()), "role", "assistant"),
225
- "content": getattr(getattr(response.choices[0], "message", object()), "content", None),
226
+ "role": getattr(message_obj, "role", "assistant"),
227
+ "content": getattr(message_obj, "content", None),
228
+ "provider_specific_fields": getattr(message_obj, "provider_specific_fields", None),
226
229
  "tool_calls": (
227
230
  [
228
231
  {
@@ -233,12 +236,9 @@ class LiteLLMPolicy(LLMBasePolicy):
233
236
  "arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
234
237
  },
235
238
  }
236
- for tc in (
237
- getattr(getattr(response.choices[0], "message", object()), "tool_calls", [])
238
- or []
239
- )
239
+ for tc in (getattr(message_obj, "tool_calls", []) or [])
240
240
  ]
241
- if getattr(getattr(response.choices[0], "message", object()), "tool_calls", None)
241
+ if getattr(message_obj, "tool_calls", None)
242
242
  else []
243
243
  ),
244
244
  },
@@ -466,46 +466,11 @@ class ChatCompletionContentPartTextParam(BaseModel):
466
466
  return iter(["text", "type"])
467
467
 
468
468
 
469
- class ChatCompletionContentPartImageParam(BaseModel):
470
- type: Literal["image_url"] = Field("image_url", description="The type of the content part.")
471
- image_url: Dict[str, Any] = Field(
472
- ..., description="Image descriptor (e.g., {'url': 'data:image/png;base64,...', 'detail': 'high'})."
473
- )
474
-
475
- def __getitem__(self, key: str) -> Any:
476
- if key == "image_url":
477
- return self.image_url
478
- if key == "type":
479
- return self.type
480
- raise KeyError(key)
481
-
482
- def get(self, key: str, default: Any = None) -> Any:
483
- try:
484
- return self[key]
485
- except KeyError:
486
- return default
487
-
488
- def keys(self):
489
- return (k for k in ("image_url", "type"))
490
-
491
- def values(self):
492
- return (self.image_url, self.type)
493
-
494
- def items(self):
495
- return [("image_url", self.image_url), ("type", self.type)]
496
-
497
- def __iter__(self):
498
- return iter(["image_url", "type"])
499
-
500
-
501
- ChatCompletionContentPartParam = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam]
502
-
503
-
504
469
  class Message(BaseModel):
505
470
  """Chat message model with trajectory evaluation support."""
506
471
 
507
472
  role: str # assistant, user, system, tool
508
- content: Optional[Union[str, List[ChatCompletionContentPartParam]]] = Field(
473
+ content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] = Field(
509
474
  default="", description="The content of the message."
510
475
  )
511
476
  reasoning_content: Optional[str] = Field(
@@ -13,12 +13,7 @@ from openai.types.chat.chat_completion_message_param import ChatCompletionMessag
13
13
  from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
14
14
  from eval_protocol.mcp.execution.policy import LiteLLMPolicy
15
15
  from eval_protocol.mcp.mcp_multi_client import MCPMultiClient
16
- from eval_protocol.models import (
17
- EvaluationRow,
18
- Message,
19
- ChatCompletionContentPartParam,
20
- ChatCompletionContentPartTextParam,
21
- )
16
+ from eval_protocol.models import EvaluationRow, Message, ChatCompletionContentPartTextParam
22
17
  from openai.types import CompletionUsage
23
18
  from eval_protocol.pytest.rollout_processor import RolloutProcessor
24
19
  from eval_protocol.pytest.types import Dataset, RolloutProcessorConfig
@@ -103,7 +98,7 @@ class Agent:
103
98
  self.messages.append(message)
104
99
  self.logger.log(self.evaluation_row)
105
100
 
106
- async def call_agent(self) -> Optional[Union[str, List[ChatCompletionContentPartParam]]]:
101
+ async def call_agent(self) -> Optional[Union[str, List[ChatCompletionContentPartTextParam]]]:
107
102
  """
108
103
  Call the assistant with the user query.
109
104
  """
@@ -227,7 +222,7 @@ class Agent:
227
222
 
228
223
  def _format_tool_message_content(
229
224
  self, content: List[TextContent]
230
- ) -> Union[str, List[ChatCompletionContentPartParam]]:
225
+ ) -> Union[str, List[ChatCompletionContentPartTextParam]]:
231
226
  """Format tool result content for inclusion in a tool message.
232
227
 
233
228
  - If a single text item, return plain string per OpenAI semantics.
@@ -166,17 +166,13 @@ class SingleTurnRolloutProcessor(RolloutProcessor):
166
166
  row.execution_metadata.tool_call_count = (
167
167
  len(converted_tool_calls) if converted_tool_calls is not None else 0
168
168
  )
169
- usage = getattr(response, "usage", None)
170
- if usage:
171
- row.execution_metadata.usage = (
172
- CompletionUsage( # Note: LiteLLM sets usage dynamically via setattr(), not as a typed field
173
- prompt_tokens=getattr(usage, "prompt_tokens", 0),
174
- completion_tokens=getattr(usage, "completion_tokens", 0),
175
- total_tokens=getattr(usage, "total_tokens", 0),
176
- )
169
+ row.execution_metadata.usage = (
170
+ CompletionUsage( # Note: LiteLLM sets usage dynamically via setattr(), not as a typed field
171
+ prompt_tokens=response.usage.prompt_tokens, # pyright: ignore[reportAttributeAccessIssue]
172
+ completion_tokens=response.usage.completion_tokens, # pyright: ignore[reportAttributeAccessIssue]
173
+ total_tokens=response.usage.total_tokens, # pyright: ignore[reportAttributeAccessIssue]
177
174
  )
178
- else:
179
- row.execution_metadata.usage = None
175
+ )
180
176
 
181
177
  row.messages = messages
182
178
 
@@ -10,16 +10,10 @@ like normalization and LaTeX parsing.
10
10
  import re
11
11
  from typing import Any, Callable, Dict, List, Optional, Union, cast
12
12
 
13
- from ..models import (
14
- EvaluateResult,
15
- Message,
16
- MetricResult,
17
- ChatCompletionContentPartParam,
18
- ChatCompletionContentPartTextParam,
19
- )
13
+ from ..models import EvaluateResult, Message, MetricResult, ChatCompletionContentPartTextParam
20
14
 
21
15
 
22
- def _to_text(content: Optional[Union[str, List[ChatCompletionContentPartParam]]]) -> str:
16
+ def _to_text(content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]]) -> str:
23
17
  """Coerce Message.content into a plain string for regex and comparisons."""
24
18
  if content is None:
25
19
  return ""
@@ -27,11 +21,7 @@ def _to_text(content: Optional[Union[str, List[ChatCompletionContentPartParam]]]
27
21
  return content
28
22
  # List[ChatCompletionContentPartTextParam]
29
23
  try:
30
- texts: List[str] = []
31
- for part in content:
32
- if isinstance(part, ChatCompletionContentPartTextParam):
33
- texts.append(part.text)
34
- return "\n".join(texts)
24
+ return "\n".join(part.text for part in content)
35
25
  except Exception:
36
26
  return ""
37
27
 
@@ -2,13 +2,7 @@ import json
2
2
  import re
3
3
  from typing import Any, Dict, List, Optional, Union
4
4
 
5
- from ..models import (
6
- EvaluateResult,
7
- Message,
8
- MetricResult,
9
- ChatCompletionContentPartParam,
10
- ChatCompletionContentPartTextParam,
11
- )
5
+ from ..models import EvaluateResult, Message, MetricResult, ChatCompletionContentPartTextParam
12
6
  from ..typed_interface import reward_function
13
7
  from .function_calling import (
14
8
  calculate_jaccard_similarity,
@@ -65,10 +59,8 @@ def json_schema_reward(
65
59
  content_text = last_message.content
66
60
  else:
67
61
  try:
68
- parts: List[ChatCompletionContentPartParam] = last_message.content # type: ignore[assignment]
69
- content_text = "\n".join(
70
- getattr(p, "text", "") for p in parts if isinstance(p, ChatCompletionContentPartTextParam)
71
- )
62
+ parts: List[ChatCompletionContentPartTextParam] = last_message.content # type: ignore[assignment]
63
+ content_text = "\n".join(getattr(p, "text", "") for p in parts)
72
64
  except Exception:
73
65
  content_text = ""
74
66
  else:
@@ -9,13 +9,7 @@ are in the expected language.
9
9
  import re
10
10
  from typing import Any, Dict, List, Optional, Set, Tuple, Union
11
11
 
12
- from ..models import (
13
- EvaluateResult,
14
- Message,
15
- MetricResult,
16
- ChatCompletionContentPartParam,
17
- ChatCompletionContentPartTextParam,
18
- )
12
+ from ..models import EvaluateResult, Message, MetricResult, ChatCompletionContentPartTextParam
19
13
  from ..typed_interface import reward_function
20
14
 
21
15
  # Dictionary mapping language codes to common words/patterns in that language
@@ -579,17 +573,13 @@ def language_consistency_reward(
579
573
  },
580
574
  )
581
575
 
582
- def _to_text(content: Union[str, List[ChatCompletionContentPartParam], None]) -> str:
576
+ def _to_text(content: Union[str, List[ChatCompletionContentPartTextParam], None]) -> str:
583
577
  if content is None:
584
578
  return ""
585
579
  if isinstance(content, str):
586
580
  return content
587
581
  try:
588
- texts: List[str] = []
589
- for part in content:
590
- if isinstance(part, ChatCompletionContentPartTextParam):
591
- texts.append(part.text)
592
- return "\n".join(texts)
582
+ return "\n".join(part.text for part in content)
593
583
  except Exception:
594
584
  return ""
595
585
 
@@ -8,26 +8,16 @@ encouraging more diverse and information-rich outputs.
8
8
  import re
9
9
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
10
10
 
11
- from ..models import (
12
- EvaluateResult,
13
- Message,
14
- MetricResult,
15
- ChatCompletionContentPartParam,
16
- ChatCompletionContentPartTextParam,
17
- )
11
+ from ..models import EvaluateResult, Message, MetricResult, ChatCompletionContentPartTextParam
18
12
 
19
13
 
20
- def _to_text(content: Optional[Union[str, List[ChatCompletionContentPartParam]]]) -> str:
14
+ def _to_text(content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]]) -> str:
21
15
  if content is None:
22
16
  return ""
23
17
  if isinstance(content, str):
24
18
  return content
25
19
  try:
26
- texts: List[str] = []
27
- for part in content:
28
- if isinstance(part, ChatCompletionContentPartTextParam):
29
- texts.append(part.text)
30
- return "\n".join(texts)
20
+ return "\n".join(part.text for part in content)
31
21
  except Exception:
32
22
  return ""
33
23
 
@@ -8,26 +8,16 @@ specified XML/HTML-like tags in correct quantities.
8
8
  import re
9
9
  from typing import Any, Dict, List, Set, Union
10
10
 
11
- from ..models import (
12
- EvaluateResult,
13
- Message,
14
- MetricResult,
15
- ChatCompletionContentPartParam,
16
- ChatCompletionContentPartTextParam,
17
- )
11
+ from ..models import EvaluateResult, Message, MetricResult, ChatCompletionContentPartTextParam
18
12
 
19
13
 
20
- def _to_text(content: Union[str, List[ChatCompletionContentPartParam], None]) -> str:
14
+ def _to_text(content: Union[str, List[ChatCompletionContentPartTextParam], None]) -> str:
21
15
  if content is None:
22
16
  return ""
23
17
  if isinstance(content, str):
24
18
  return content
25
19
  try:
26
- texts: List[str] = []
27
- for part in content:
28
- if isinstance(part, ChatCompletionContentPartTextParam):
29
- texts.append(part.text)
30
- return "\n".join(texts)
20
+ return "\n".join(part.text for part in content)
31
21
  except Exception:
32
22
  return ""
33
23
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.94.dev3
3
+ Version: 0.2.95
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -42,9 +42,9 @@ eval_protocol.egg-info/requires.txt
42
42
  eval_protocol.egg-info/top_level.txt
43
43
  eval_protocol/../vite-app/dist/index.html
44
44
  eval_protocol/../vite-app/dist/assets/favicon-BkAAWQga.png
45
- eval_protocol/../vite-app/dist/assets/index-CuQbfdPD.js
46
- eval_protocol/../vite-app/dist/assets/index-CuQbfdPD.js.map
47
- eval_protocol/../vite-app/dist/assets/index-iZp_HgyW.css
45
+ eval_protocol/../vite-app/dist/assets/index-BIhepl19.css
46
+ eval_protocol/../vite-app/dist/assets/index-DaovgarD.js
47
+ eval_protocol/../vite-app/dist/assets/index-DaovgarD.js.map
48
48
  eval_protocol/../vite-app/dist/assets/logo-light-BprIBJQW.png
49
49
  eval_protocol/adapters/__init__.py
50
50
  eval_protocol/adapters/base.py
@@ -326,6 +326,7 @@ tests/test_lean_prover.py
326
326
  tests/test_lean_prover_runner.py
327
327
  tests/test_length.py
328
328
  tests/test_list_comparison_math_reward.py
329
+ tests/test_litellm_policy_provider_fields.py
329
330
  tests/test_logs_server.py
330
331
  tests/test_logs_server_simple.py
331
332
  tests/test_math.py
@@ -0,0 +1,95 @@
1
+ import types
2
+
3
+ import pytest
4
+
5
+ import eval_protocol.mcp.execution.policy as policy_mod
6
+ from eval_protocol.mcp.execution.policy import LiteLLMPolicy
7
+
8
+
9
+ @pytest.mark.asyncio
10
+ async def test_litellm_policy_surfaces_provider_specific_reasoning_details(monkeypatch):
11
+ """
12
+ Ensure that provider_specific_fields from the LiteLLM message object are
13
+ preserved on the returned message dict from LiteLLMPolicy._make_llm_call.
14
+ """
15
+
16
+ # Define a fake ModelResponse base class and patch the module's ModelResponse
17
+ class FakeModelResponseBase: ...
18
+
19
+ policy_mod.ModelResponse = FakeModelResponseBase
20
+
21
+ async def fake_acompletion(*args, **kwargs):
22
+ # This mimics the LiteLLM Message object shape we rely on in policy._make_llm_call
23
+ message_obj = types.SimpleNamespace(
24
+ role="assistant",
25
+ content="",
26
+ tool_calls=[
27
+ types.SimpleNamespace(
28
+ id="tool_get_reservation_details_123",
29
+ type="function",
30
+ function=types.SimpleNamespace(
31
+ name="get_reservation_details",
32
+ arguments='{"reservation_id":"EHGLP3"}',
33
+ ),
34
+ )
35
+ ],
36
+ provider_specific_fields={
37
+ "reasoning_details": [{"id": "tool_get_reservation_details_123", "type": "reasoning.encrypted"}],
38
+ "custom_field": "keep_me",
39
+ },
40
+ )
41
+
42
+ class FakeModelResponse(FakeModelResponseBase):
43
+ def __init__(self) -> None:
44
+ self.choices = [
45
+ types.SimpleNamespace(
46
+ finish_reason="tool_calls",
47
+ index=0,
48
+ message=message_obj,
49
+ )
50
+ ]
51
+ self.usage = types.SimpleNamespace(
52
+ prompt_tokens=10,
53
+ completion_tokens=5,
54
+ total_tokens=15,
55
+ )
56
+
57
+ return FakeModelResponse()
58
+
59
+ # Patch acompletion so we don't hit the network
60
+ monkeypatch.setattr(policy_mod, "acompletion", fake_acompletion)
61
+
62
+ # Use a concrete policy instance; base_url/model_id values don't matter for this unit test
63
+ policy = LiteLLMPolicy(model_id="openrouter/google/gemini-3-pro-preview", use_caching=False)
64
+
65
+ messages = [
66
+ {
67
+ "role": "assistant",
68
+ "content": "",
69
+ "tool_calls": [
70
+ {
71
+ "id": "tool_get_reservation_details_123",
72
+ "type": "function",
73
+ "function": {"name": "get_reservation_details", "arguments": '{"reservation_id":"EHGLP3"}'},
74
+ }
75
+ ],
76
+ }
77
+ ]
78
+
79
+ # No tools are needed for this test – we only care about the returned message shape
80
+ result = await policy._make_llm_call(messages, tools=[])
81
+
82
+ assert "choices" in result
83
+ assert len(result["choices"]) == 1
84
+ msg = result["choices"][0]["message"]
85
+
86
+ # Core fields should be present
87
+ assert msg["role"] == "assistant"
88
+ assert isinstance(msg.get("tool_calls"), list)
89
+
90
+ # provider_specific_fields should be preserved on the message
91
+ ps = msg.get("provider_specific_fields")
92
+ assert isinstance(ps, dict)
93
+ assert ps["reasoning_details"] == [{"id": "tool_get_reservation_details_123", "type": "reasoning.encrypted"}]
94
+ # Non-core provider_specific_fields should also be preserved
95
+ assert ps.get("custom_field") == "keep_me"
@@ -0,0 +1 @@
1
+ /*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-shadow:0 0 #0000;--tw-shadow-color:initial;--tw-shadow-alpha:100%;--tw-inset-shadow:0 0 #0000;--tw-inset-shadow-color:initial;--tw-inset-shadow-alpha:100%;--tw-ring-color:initial;--tw-ring-shadow:0 0 #0000;--tw-inset-ring-color:initial;--tw-inset-ring-shadow:0 0 #0000;--tw-ring-inset:initial;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-offset-shadow:0 0 #0000;--tw-outline-style:solid;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-50:oklch(97.1% .013 17.38);--color-red-200:oklch(88.5% .062 18.334);--color-red-500:oklch(63.7% .237 25.331);--color-red-600:oklch(57.7% .245 27.325);--color-red-700:oklch(50.5% .213 27.518);--color-red-800:oklch(44.4% .177 26.899);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-500:oklch(79.5% .184 86.047);--color-yellow-600:oklch(68.1% .162 75.834);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-600:oklch(62.7% .194 149.214);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-100:oklch(93.2% .032 255.585);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-600:oklch(54.6% .245 262.881);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-800:oklch(42.4% .199 265.638);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-2xl:42rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--font-weight-bold:700;--radius-md:.375rem;--radius-lg:.5rem;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.collapse{visibility:collapse}.invisible{visibility:hidden}.visible{visibility:visible}.absolute{position:absolute}.fixed{position:fixed}.relative{position:relative}.static{position:static}.sticky{position:sticky}.top-0{top:calc(var(--spacing)*0)}.top-1{top:calc(var(--spacing)*1)}.right-0{right:calc(var(--spacing)*0)}.right-1{right:calc(var(--spacing)*1)}.left-0{left:calc(var(--spacing)*0)}.z-10{z-index:10}.z-50{z-index:50}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-3{margin-bottom:calc(var(--spacing)*3)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-1{margin-left:calc(var(--spacing)*1)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.contents{display:contents}.flex{display:flex}.flow-root{display:flow-root}.grid{display:grid}.hidden{display:none}.inline{display:inline}.inline-block{display:inline-block}.inline-flex{display:inline-flex}.inline-grid{display:inline-grid}.inline-table{display:inline-table}.list-item{display:list-item}.table{display:table}.table-caption{display:table-caption}.table-cell{display:table-cell}.table-column{display:table-column}.table-column-group{display:table-column-group}.table-footer-group{display:table-footer-group}.table-header-group{display:table-header-group}.table-row{display:table-row}.table-row-group{display:table-row-group}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-5{height:calc(var(--spacing)*5)}.h-6{height:calc(var(--spacing)*6)}.h-8{height:calc(var(--spacing)*8)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.h-96{height:calc(var(--spacing)*96)}.max-h-48{max-height:calc(var(--spacing)*48)}.max-h-60{max-height:calc(var(--spacing)*60)}.max-h-\[800px\]{max-height:800px}.max-h-\[calc\(100vh-80px\)\]{max-height:calc(100vh - 80px)}.min-h-4{min-height:calc(var(--spacing)*4)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-6{width:calc(var(--spacing)*6)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-2xl{max-width:var(--container-2xl)}.max-w-7xl{max-width:var(--container-7xl)}.max-w-\[200px\]{max-width:200px}.max-w-\[1200px\]{max-width:1200px}.max-w-md{max-width:var(--container-md)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-32{min-width:calc(var(--spacing)*32)}.min-w-36{min-width:calc(var(--spacing)*36)}.min-w-40{min-width:calc(var(--spacing)*40)}.min-w-48{min-width:calc(var(--spacing)*48)}.min-w-64{min-width:calc(var(--spacing)*64)}.min-w-max{min-width:max-content}.flex-1{flex:1}.flex-shrink-0{flex-shrink:0}.shrink{flex-shrink:1}.grow{flex-grow:1}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.transform\!{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)!important}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-help{cursor:help}.cursor-not-allowed{cursor:not-allowed}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.flex-row{flex-direction:row}.items-center{align-items:center}.items-start{align-items:flex-start}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1{gap:calc(var(--spacing)*1)}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}.gap-4{gap:calc(var(--spacing)*4)}:where(.space-y-2>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*2)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-4>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*4)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*4)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-6>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*6)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*6)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-auto{overflow:auto}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-x-hidden{overflow-x:hidden}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.rounded-lg{border-radius:var(--radius-lg)}.rounded-t-md{border-top-left-radius:var(--radius-md);border-top-right-radius:var(--radius-md)}.border{border-style:var(--tw-border-style);border-width:1px}.border-2{border-style:var(--tw-border-style);border-width:2px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-t-0{border-top-style:var(--tw-border-style);border-top-width:0}.border-t-2{border-top-style:var(--tw-border-style);border-top-width:2px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-b-2{border-bottom-style:var(--tw-border-style);border-bottom-width:2px}.border-l-2{border-left-style:var(--tw-border-style);border-left-width:2px}.border-l-4{border-left-style:var(--tw-border-style);border-left-width:4px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-100{border-color:var(--color-gray-100)}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-gray-900{border-color:var(--color-gray-900)}.border-green-200{border-color:var(--color-green-200)}.border-red-200{border-color:var(--color-red-200)}.border-transparent{border-color:#0000}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-gray-600{border-top-color:var(--color-gray-600)}.border-t-transparent{border-top-color:#0000}.border-l-blue-500{border-left-color:var(--color-blue-500)}.border-l-gray-300{border-left-color:var(--color-gray-300)}.border-l-green-500{border-left-color:var(--color-green-500)}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-200{background-color:var(--color-gray-200)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-gray-800{background-color:var(--color-gray-800)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-50{background-color:var(--color-red-50)}.bg-red-500{background-color:var(--color-red-500)}.bg-transparent{background-color:#0000}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.bg-yellow-500{background-color:var(--color-yellow-500)}.p-0{padding:calc(var(--spacing)*0)}.p-0\.5{padding:calc(var(--spacing)*.5)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.py-6{padding-block:calc(var(--spacing)*6)}.py-8{padding-block:calc(var(--spacing)*8)}.pt-1{padding-top:calc(var(--spacing)*1)}.pt-2{padding-top:calc(var(--spacing)*2)}.pr-8{padding-right:calc(var(--spacing)*8)}.pb-2{padding-bottom:calc(var(--spacing)*2)}.pl-3{padding-left:calc(var(--spacing)*3)}.text-center{text-align:center}.text-left{text-align:left}.text-right{text-align:right}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.text-\[10px\]{font-size:10px}.font-bold{--tw-font-weight:var(--font-weight-bold);font-weight:var(--font-weight-bold)}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.text-nowrap{text-wrap:nowrap}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-600{color:var(--color-blue-600)}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-600{color:var(--color-green-600)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-600{color:var(--color-red-600)}.text-red-700{color:var(--color-red-700)}.text-white{color:var(--color-white)}.text-yellow-600{color:var(--color-yellow-600)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.line-through{text-decoration-line:line-through}.overline{text-decoration-line:overline}.underline{text-decoration-line:underline}.opacity-50{opacity:.5}.opacity-60{opacity:.6}.shadow{--tw-shadow:0 1px 3px 0 var(--tw-shadow-color,#0000001a),0 1px 2px -1px var(--tw-shadow-color,#0000001a);box-shadow:var(--tw-inset-shadow),var(--tw-inset-ring-shadow),var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow)}.outline{outline-style:var(--tw-outline-style);outline-width:1px}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter\!{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)!important}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-opacity{transition-property:opacity;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}.last\:border-b-0:last-child{border-bottom-style:var(--tw-border-style);border-bottom-width:0}@media (hover:hover){.hover\:border-gray-400:hover{border-color:var(--color-gray-400)}.hover\:bg-blue-100:hover{background-color:var(--color-blue-100)}.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-100:hover{background-color:var(--color-gray-100)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:bg-green-100:hover{background-color:var(--color-green-100)}.hover\:bg-green-200:hover{background-color:var(--color-green-200)}.hover\:bg-yellow-100:hover{background-color:var(--color-yellow-100)}.hover\:bg-yellow-200:hover{background-color:var(--color-yellow-200)}.hover\:text-blue-800:hover{color:var(--color-blue-800)}.hover\:text-gray-600:hover{color:var(--color-gray-600)}.hover\:text-gray-900:hover{color:var(--color-gray-900)}.hover\:text-red-800:hover{color:var(--color-red-800)}.hover\:no-underline:hover{text-decoration-line:none}.hover\:opacity-100:hover{opacity:1}}.focus\:border-gray-500:focus{border-color:var(--color-gray-500)}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-shadow-color{syntax:"*";inherits:false}@property --tw-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-inset-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-inset-shadow-color{syntax:"*";inherits:false}@property --tw-inset-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-ring-color{syntax:"*";inherits:false}@property --tw-ring-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-inset-ring-color{syntax:"*";inherits:false}@property --tw-inset-ring-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-ring-inset{syntax:"*";inherits:false}@property --tw-ring-offset-width{syntax:"<length>";inherits:false;initial-value:0}@property --tw-ring-offset-color{syntax:"*";inherits:false;initial-value:#fff}@property --tw-ring-offset-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-outline-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}}