eval-protocol 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. {eval_protocol-0.2.5/eval_protocol.egg-info → eval_protocol-0.2.6}/PKG-INFO +3 -4
  2. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/README.md +1 -1
  3. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/_version.py +3 -3
  4. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/manager.py +14 -22
  5. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/policy.py +11 -0
  6. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_env.py +4 -3
  7. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +12 -5
  8. {eval_protocol-0.2.5 → eval_protocol-0.2.6/eval_protocol.egg-info}/PKG-INFO +3 -4
  9. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/SOURCES.txt +6 -0
  10. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/requires.txt +1 -2
  11. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/pyproject.toml +5 -3
  12. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_rollout_control_plane_integration.py +1 -0
  13. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_nl_assertions.py +27 -10
  14. eval_protocol-0.2.6/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  15. eval_protocol-0.2.6/vite-app/dist/assets/index-BySN1scz.css +1 -0
  16. eval_protocol-0.2.6/vite-app/dist/assets/index-CRkZ6JGL.js +88 -0
  17. eval_protocol-0.2.6/vite-app/dist/assets/index-CRkZ6JGL.js.map +1 -0
  18. eval_protocol-0.2.6/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  19. eval_protocol-0.2.6/vite-app/dist/index.html +14 -0
  20. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/LICENSE +0 -0
  21. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/__init__.py +0 -0
  22. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/normalize_sandbox_fusion.py +0 -0
  23. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/utils/__init__.py +0 -0
  24. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/utils/generate_api_key.py +0 -0
  25. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/utils/subprocess_manager.py +0 -0
  26. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/__init__.py +0 -0
  27. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/__main__.py +0 -0
  28. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/__init__.py +0 -0
  29. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/braintrust.py +0 -0
  30. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/huggingface.py +0 -0
  31. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/langfuse.py +0 -0
  32. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/trl.py +0 -0
  33. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/__init__.py +0 -0
  34. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/models.py +0 -0
  35. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/orchestrator.py +0 -0
  36. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resource_abc.py +0 -0
  37. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resource_pool.py +0 -0
  38. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/__init__.py +0 -0
  39. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  40. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  41. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  42. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  43. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  44. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/docker_resource.py +0 -0
  45. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  46. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
  47. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
  48. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  49. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/sql_resource.py +0 -0
  50. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/task_manager.py +0 -0
  51. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/tool_registry.py +0 -0
  52. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/auth.py +0 -0
  53. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli.py +0 -0
  54. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/__init__.py +0 -0
  55. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  56. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/common.py +0 -0
  57. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/deploy.py +0 -0
  58. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  59. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/logs.py +0 -0
  60. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/preview.py +0 -0
  61. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  62. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/common_utils.py +0 -0
  63. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/config.py +0 -0
  64. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/dataset_logger/__init__.py +0 -0
  65. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  66. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  67. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/datasets/__init__.py +0 -0
  68. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/datasets/loader.py +0 -0
  69. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/evaluation.py +0 -0
  70. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/execution/__init__.py +0 -0
  71. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/execution/pipeline.py +0 -0
  72. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/gcp_tools.py +0 -0
  73. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generation/cache.py +0 -0
  74. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generation/clients/base.py +0 -0
  75. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generation/clients.py +0 -0
  76. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generic_server.py +0 -0
  77. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/human_id/__init__.py +0 -0
  78. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/human_id/dictionary.py +0 -0
  79. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/__init__.py +0 -0
  80. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/braintrust.py +0 -0
  81. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/deepeval.py +0 -0
  82. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/openeval.py +0 -0
  83. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/trl.py +0 -0
  84. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/__init__.py +0 -0
  85. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/adapter.py +0 -0
  86. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/client/__init__.py +0 -0
  87. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/client/connection.py +0 -0
  88. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/clients.py +0 -0
  89. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/__init__.py +0 -0
  90. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/base_policy.py +0 -0
  91. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/grid_renderer.py +0 -0
  92. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  93. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/mcpgym.py +0 -0
  94. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/process_manager.py +0 -0
  95. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/session/__init__.py +0 -0
  96. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/session/manager.py +0 -0
  97. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/simple_process_manager.py +0 -0
  98. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/simulation_server.py +0 -0
  99. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/__init__.py +0 -0
  100. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/config.py +0 -0
  101. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
  102. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/main.py +0 -0
  103. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  104. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  105. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  106. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
  107. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  108. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/session.py +0 -0
  109. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/models.py +0 -0
  110. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/packaging.py +0 -0
  111. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/platform_api.py +0 -0
  112. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/playback_policy.py +0 -0
  113. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/__init__.py +0 -0
  114. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  115. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  116. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
  117. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  118. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/evaluation_test.py +0 -0
  119. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/types.py +0 -0
  120. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/utils.py +0 -0
  121. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/resources.py +0 -0
  122. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/reward_function.py +0 -0
  123. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/__init__.py +0 -0
  124. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/accuracy.py +0 -0
  125. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/accuracy_length.py +0 -0
  126. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  127. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  128. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/apps_testing_util.py +0 -0
  129. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/bfcl_reward.py +0 -0
  130. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/code_execution.py +0 -0
  131. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/code_execution_utils.py +0 -0
  132. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/cpp_code.py +0 -0
  133. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  134. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/format.py +0 -0
  135. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/function_calling.py +0 -0
  136. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/json_schema.py +0 -0
  137. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/language_consistency.py +0 -0
  138. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/lean_prover.py +0 -0
  139. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/length.py +0 -0
  140. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  141. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/math.py +0 -0
  142. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  143. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/reasoning_steps.py +0 -0
  144. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/repetition.py +0 -0
  145. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/tag_count.py +0 -0
  146. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rl_processing.py +0 -0
  147. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/server.py +0 -0
  148. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/typed_interface.py +0 -0
  149. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/types/__init__.py +0 -0
  150. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/types/types.py +0 -0
  151. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/__init__.py +0 -0
  152. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/batch_evaluation.py +0 -0
  153. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/batch_transformation.py +0 -0
  154. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/dataset_helpers.py +0 -0
  155. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/logs_server.py +0 -0
  156. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/module_loader.py +0 -0
  157. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/packaging_utils.py +0 -0
  158. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/static_policy.py +0 -0
  159. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/vite_server.py +0 -0
  160. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/dependency_links.txt +0 -0
  161. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/entry_points.txt +0 -0
  162. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/top_level.txt +0 -0
  163. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/setup.cfg +0 -0
  164. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/setup.py +0 -0
  165. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_accuracy.py +0 -0
  166. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_accuracy_length.py +0 -0
  167. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_adapters_e2e.py +0 -0
  168. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_agent_orchestrator.py +0 -0
  169. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_agent_resources.py +0 -0
  170. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_auth.py +0 -0
  171. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_batch_evaluation.py +0 -0
  172. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_braintrust_adapter.py +0 -0
  173. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_braintrust_example.py +0 -0
  174. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cli.py +0 -0
  175. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cli_agent.py +0 -0
  176. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cli_args.py +0 -0
  177. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_code_execution.py +0 -0
  178. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_config.py +0 -0
  179. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_control_plane_separation.py +0 -0
  180. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cpp_code.py +0 -0
  181. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_data_driven_task_manager.py +0 -0
  182. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_deepcoder_reward.py +0 -0
  183. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_deepeval_integration.py +0 -0
  184. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_deploy_integration.py +0 -0
  185. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_e2b_integration.py +0 -0
  186. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_e2b_js_integration.py +0 -0
  187. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_edge_cases.py +0 -0
  188. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_eval_protocol_import.py +0 -0
  189. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_evaluation.py +0 -0
  190. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_evaluation_integration.py +0 -0
  191. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_evaluation_preview_integration.py +0 -0
  192. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_examples_end_to_end.py +0 -0
  193. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_fireworks_api.py +0 -0
  194. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_format.py +0 -0
  195. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_fractional_code.py +0 -0
  196. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_frozen_lake_http_server.py +0 -0
  197. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_frozen_lake_seed_evaluation.py +0 -0
  198. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_function_calling.py +0 -0
  199. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_gcp_tools.py +0 -0
  200. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_generic_server.py +0 -0
  201. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_integration.py +0 -0
  202. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_json_schema.py +0 -0
  203. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_kwargs_validation.py +0 -0
  204. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_language_consistency.py +0 -0
  205. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_lean_prover.py +0 -0
  206. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_lean_prover_runner.py +0 -0
  207. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_length.py +0 -0
  208. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_list_comparison_math_reward.py +0 -0
  209. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_math.py +0 -0
  210. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_minimal.py +0 -0
  211. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_models.py +0 -0
  212. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_models_rl.py +0 -0
  213. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_multiple_choice_math_reward.py +0 -0
  214. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_n_variant_batch_integration.py +0 -0
  215. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_n_variant_integration.py +0 -0
  216. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_openai_compatibility.py +0 -0
  217. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_openeval_integration.py +0 -0
  218. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_packaging.py +0 -0
  219. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_parallel_rollouts.py +0 -0
  220. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_platform_api.py +0 -0
  221. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_readiness.py +0 -0
  222. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_reasoning_steps.py +0 -0
  223. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_repetition.py +0 -0
  224. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_repetition_debug.py +0 -0
  225. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_reward_function.py +0 -0
  226. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_reward_protocol_import.py +0 -0
  227. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_rl_processing.py +0 -0
  228. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_server.py +0 -0
  229. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_tag_count.py +0 -0
  230. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_typed_interface.py +0 -0
  231. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_typed_interface_rl.py +0 -0
  232. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_url_handling.py +0 -0
  233. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/__init__.py +0 -0
  234. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/agent/__init__.py +0 -0
  235. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/agent/base.py +0 -0
  236. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/agent/llm_agent.py +0 -0
  237. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/__init__.py +0 -0
  238. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/api_config.py +0 -0
  239. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/data_model.py +0 -0
  240. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/simulation_service.py +0 -0
  241. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/cli.py +0 -0
  242. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/config.py +0 -0
  243. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/__init__.py +0 -0
  244. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/message.py +0 -0
  245. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/simulation.py +0 -0
  246. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/tasks.py +0 -0
  247. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/__init__.py +0 -0
  248. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/__init__.py +0 -0
  249. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/data_model.py +0 -0
  250. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/environment.py +0 -0
  251. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/tools.py +0 -0
  252. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/utils.py +0 -0
  253. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/__init__.py +0 -0
  254. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/data_model.py +0 -0
  255. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/environment.py +0 -0
  256. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/tools.py +0 -0
  257. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/utils.py +0 -0
  258. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/__init__.py +0 -0
  259. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/data_model.py +0 -0
  260. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/environment.py +0 -0
  261. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/tools.py +0 -0
  262. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/utils.py +0 -0
  263. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/__init__.py +0 -0
  264. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/data_model.py +0 -0
  265. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/environment.py +0 -0
  266. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  267. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  268. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  269. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  270. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  271. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  272. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  273. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  274. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tools.py +0 -0
  275. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  276. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  277. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/utils.py +0 -0
  278. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/__init__.py +0 -0
  279. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/db.py +0 -0
  280. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/environment.py +0 -0
  281. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/server.py +0 -0
  282. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/tool.py +0 -0
  283. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/toolkit.py +0 -0
  284. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  285. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/__init__.py +0 -0
  286. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator.py +0 -0
  287. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  288. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  289. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  290. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  291. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/metrics/__init__.py +0 -0
  292. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/metrics/agent_metrics.py +0 -0
  293. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  294. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/__init__.py +0 -0
  295. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  296. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  297. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/utils.py +0 -0
  298. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/registry.py +0 -0
  299. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/run.py +0 -0
  300. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/__init__.py +0 -0
  301. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/check_data.py +0 -0
  302. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  303. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/start_servers.py +0 -0
  304. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/view_simulations.py +0 -0
  305. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/user/__init__.py +0 -0
  306. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/user/base.py +0 -0
  307. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/user/user_simulator.py +0 -0
  308. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/__init__.py +0 -0
  309. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/display.py +0 -0
  310. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/io_utils.py +0 -0
  311. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/llm_utils.py +0 -0
  312. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/pydantic_utils.py +0 -0
  313. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/utils.py +0 -0
  314. {eval_protocol-0.2.5 → eval_protocol-0.2.6}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -40,7 +40,6 @@ Requires-Dist: deepdiff>=6.0.0
40
40
  Requires-Dist: pandas>=1.5.0
41
41
  Requires-Dist: watchdog>=2.1.0
42
42
  Requires-Dist: websockets>=15.0.1
43
- Requires-Dist: fireworks-ai>=0.19.12
44
43
  Requires-Dist: fastapi>=0.116.1
45
44
  Provides-Extra: dev
46
45
  Requires-Dist: build; extra == "dev"
@@ -79,7 +78,7 @@ Requires-Dist: accelerate>=0.28.0; extra == "trl"
79
78
  Provides-Extra: openevals
80
79
  Requires-Dist: openevals>=0.1.0; extra == "openevals"
81
80
  Provides-Extra: fireworks
82
- Requires-Dist: fireworks-ai>=0.19.10; extra == "fireworks"
81
+ Requires-Dist: fireworks-ai>=0.19.12; extra == "fireworks"
83
82
  Provides-Extra: box2d
84
83
  Requires-Dist: swig; extra == "box2d"
85
84
  Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
@@ -110,7 +109,7 @@ markdown generation tasks to customer service agents with tool calling
110
109
  capabilities.
111
110
 
112
111
  <p align="center">
113
- <img src="./assets/ui.png" alt="UI" />
112
+ <img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
114
113
  <br>
115
114
  <sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
116
115
  </p>
@@ -13,7 +13,7 @@ markdown generation tasks to customer service agents with tool calling
13
13
  capabilities.
14
14
 
15
15
  <p align="center">
16
- <img src="./assets/ui.png" alt="UI" />
16
+ <img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
17
17
  <br>
18
18
  <sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
19
19
  </p>
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-08-06T01:34:18-0700",
11
+ "date": "2025-08-06T23:10:26-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "1a37ee141ebe4084654889ace2aba9c1529acf1c",
15
- "version": "0.2.5"
14
+ "full-revisionid": "fffd75c146b297cbce37f768ca9850e2ee05e4b5",
15
+ "version": "0.2.6"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -42,6 +42,7 @@ class ExecutionManager:
42
42
  steps: int = 512,
43
43
  openai_format_log_file: Optional[str] = None,
44
44
  max_concurrent_rollouts: int = 8,
45
+ evaluation_rows: Optional[List[EvaluationRow]] = None,
45
46
  ) -> List[EvaluationRow]:
46
47
  """
47
48
  Execute general rollouts using tool calling interface with automatic record/playback.
@@ -135,9 +136,11 @@ class ExecutionManager:
135
136
  # Add note about control plane separation
136
137
  logger.info(f"🎛️ Trajectories include control plane separation")
137
138
 
138
- # Convert trajectories to unified EvaluationRow format
139
- evaluation_rows = []
140
- for trajectory in trajectories:
139
+ # Convert trajectories to unified EvaluationRow format. If no evaluation_rows are provided, create empty ones for backwards compatibility.
140
+ if evaluation_rows is None:
141
+ evaluation_rows = [EvaluationRow(messages=[], input_metadata=InputMetadata()) for _ in trajectories]
142
+
143
+ for idx, trajectory in enumerate(trajectories):
141
144
  # Handle multimodal content by extracting text from complex content structures
142
145
  messages = []
143
146
  for msg in trajectory.conversation_history:
@@ -155,26 +158,15 @@ class ExecutionManager:
155
158
 
156
159
  messages.append(Message.model_validate(msg_dict))
157
160
 
158
- input_metadata = InputMetadata(
159
- row_id=trajectory.session.dataset_row.id if trajectory.session.dataset_row else None,
160
- dataset_info=asdict(trajectory.session.dataset_row) if trajectory.session.dataset_row else {},
161
- completion_params=CompletionParams(
162
- model=policy.model_id,
163
- temperature=getattr(policy, "temperature", None),
164
- max_tokens=getattr(policy, "max_tokens", None),
165
- max_tool_calls=getattr(policy, "max_tools_per_turn", None),
166
- ),
167
- session_data={
168
- "timestamp": time.time(),
169
- },
170
- )
171
- evaluation_row = EvaluationRow(
172
- messages=messages,
173
- tools=shared_tool_schema,
174
- input_metadata=input_metadata,
175
- usage=trajectory.usage,
161
+ evaluation_rows[idx].messages = messages
162
+ evaluation_rows[idx].tools = shared_tool_schema
163
+ evaluation_rows[idx].usage = trajectory.usage
164
+ evaluation_rows[idx].input_metadata.completion_params = CompletionParams(
165
+ model=policy.model_id,
166
+ temperature=getattr(policy, "temperature", None),
167
+ max_tokens=getattr(policy, "max_tokens", None),
168
+ max_tool_calls=getattr(policy, "max_tools_per_turn", None),
176
169
  )
177
- evaluation_rows.append(evaluation_row)
178
170
 
179
171
  return evaluation_rows
180
172
 
@@ -64,6 +64,9 @@ class LiteLLMPolicy(LLMBasePolicy):
64
64
  self.num_retries = num_retries
65
65
  self.retry_strategy = retry_strategy
66
66
 
67
+ # Store additional API parameters from kwargs
68
+ self.additional_params = kwargs
69
+
67
70
  # Only initialize LiteLLM in live mode (not in playback mode)
68
71
  if not self._is_playback:
69
72
  self._setup_litellm_caching(use_caching, cache_type, redis_url)
@@ -166,6 +169,14 @@ class LiteLLMPolicy(LLMBasePolicy):
166
169
  "base_url": self.base_url,
167
170
  }
168
171
 
172
+ # Add additional parameters from kwargs (like reasoning_effort)
173
+ if self.additional_params:
174
+ request_params.update(self.additional_params)
175
+
176
+ # Tell LiteLLM to allow reasoning_effort if it's present
177
+ if "reasoning_effort" in self.additional_params:
178
+ request_params["allowed_openai_params"] = ["reasoning_effort"]
179
+
169
180
  # Add tools if provided
170
181
  if tools:
171
182
  request_params["tools"] = tools
@@ -40,6 +40,8 @@ MCP Integration:
40
40
  - Resources provide static/configuration data, tools provide dynamic actions
41
41
  """
42
42
 
43
+ import asyncio
44
+
43
45
  # For legacy compatibility - import the facade functions
44
46
  import logging
45
47
  import random
@@ -47,11 +49,10 @@ from typing import Any, Callable, Dict, List, Optional, Union
47
49
 
48
50
  # Import all functionality from the new modular components
49
51
  from .mcp.execution.manager import ExecutionManager
50
- from .mcp.execution.policy import AnthropicPolicy, FireworksPolicy, LLMBasePolicy, OpenAIPolicy, LiteLLMPolicy
52
+ from .mcp.execution.policy import AnthropicPolicy, FireworksPolicy, LiteLLMPolicy, LLMBasePolicy, OpenAIPolicy
51
53
  from .mcp.session.manager import GeneralMCPVectorEnv
52
54
  from .models import EvaluationRow
53
55
  from .types import DatasetRow, MCPSession, MCPToolCall
54
- import asyncio
55
56
 
56
57
  logger = logging.getLogger(__name__)
57
58
 
@@ -288,7 +289,7 @@ async def rollout(
288
289
  execution_manager = ExecutionManager()
289
290
 
290
291
  return await execution_manager.execute_rollouts(
291
- envs, policy, steps, openai_format_log_file, max_concurrent_rollouts
292
+ envs, policy, steps, openai_format_log_file, max_concurrent_rollouts, evaluation_rows
292
293
  )
293
294
 
294
295
 
@@ -1,8 +1,10 @@
1
1
  import asyncio
2
+ import atexit
2
3
  import os
4
+ import signal
5
+ import socket
3
6
  import subprocess
4
7
  import time
5
- import socket
6
8
  from pathlib import Path
7
9
  from typing import List, Optional
8
10
 
@@ -10,9 +12,6 @@ import eval_protocol as ep
10
12
  from eval_protocol.models import EvaluationRow, Message
11
13
  from eval_protocol.pytest.types import RolloutProcessorConfig
12
14
 
13
- import atexit
14
- import signal
15
-
16
15
 
17
16
  class MCPServerManager:
18
17
  """Manages MCP server lifecycle for testing."""
@@ -188,13 +187,16 @@ async def default_mcp_gym_rollout_processor(
188
187
  """
189
188
  Rollout processor for tau bench environments.
190
189
 
190
+
191
191
  This processor starts an MCP server, creates tau bench environments, and runs rollouts
192
192
  using the eval_protocol framework, following the pattern from test_tau2_e2e.py.
193
193
 
194
+
194
195
  Args:
195
196
  rows: List of EvaluationRow objects containing messages and dataset info in input_metadata
196
197
  config: RolloutProcessorConfig with model and other parameters
197
198
 
199
+
198
200
  Returns:
199
201
  List of EvaluationRow objects with completed conversations
200
202
  """
@@ -207,6 +209,7 @@ async def default_mcp_gym_rollout_processor(
207
209
  model_id=config.model,
208
210
  temperature=config.input_params.get("temperature", 0.0),
209
211
  max_tokens=config.input_params.get("max_tokens", 4096),
212
+ reasoning_effort=config.input_params.get("reasoning_effort", None),
210
213
  )
211
214
 
212
215
  # Create MCP environments directly from evaluation_rows
@@ -218,7 +221,11 @@ async def default_mcp_gym_rollout_processor(
218
221
 
219
222
  # Run rollout with environments and policy
220
223
  evaluation_rows = await ep.rollout(
221
- envs, policy=policy, steps=config.steps, max_concurrent_rollouts=config.max_concurrent_rollouts
224
+ envs,
225
+ policy=policy,
226
+ evaluation_rows=rows,
227
+ steps=config.steps,
228
+ max_concurrent_rollouts=config.max_concurrent_rollouts,
222
229
  )
223
230
 
224
231
  return evaluation_rows
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -40,7 +40,6 @@ Requires-Dist: deepdiff>=6.0.0
40
40
  Requires-Dist: pandas>=1.5.0
41
41
  Requires-Dist: watchdog>=2.1.0
42
42
  Requires-Dist: websockets>=15.0.1
43
- Requires-Dist: fireworks-ai>=0.19.12
44
43
  Requires-Dist: fastapi>=0.116.1
45
44
  Provides-Extra: dev
46
45
  Requires-Dist: build; extra == "dev"
@@ -79,7 +78,7 @@ Requires-Dist: accelerate>=0.28.0; extra == "trl"
79
78
  Provides-Extra: openevals
80
79
  Requires-Dist: openevals>=0.1.0; extra == "openevals"
81
80
  Provides-Extra: fireworks
82
- Requires-Dist: fireworks-ai>=0.19.10; extra == "fireworks"
81
+ Requires-Dist: fireworks-ai>=0.19.12; extra == "fireworks"
83
82
  Provides-Extra: box2d
84
83
  Requires-Dist: swig; extra == "box2d"
85
84
  Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
@@ -110,7 +109,7 @@ markdown generation tasks to customer service agents with tool calling
110
109
  capabilities.
111
110
 
112
111
  <p align="center">
113
- <img src="./assets/ui.png" alt="UI" />
112
+ <img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
114
113
  <br>
115
114
  <sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
116
115
  </p>
@@ -35,6 +35,12 @@ eval_protocol.egg-info/dependency_links.txt
35
35
  eval_protocol.egg-info/entry_points.txt
36
36
  eval_protocol.egg-info/requires.txt
37
37
  eval_protocol.egg-info/top_level.txt
38
+ eval_protocol/../vite-app/dist/index.html
39
+ eval_protocol/../vite-app/dist/assets/favicon-BkAAWQga.png
40
+ eval_protocol/../vite-app/dist/assets/index-BySN1scz.css
41
+ eval_protocol/../vite-app/dist/assets/index-CRkZ6JGL.js
42
+ eval_protocol/../vite-app/dist/assets/index-CRkZ6JGL.js.map
43
+ eval_protocol/../vite-app/dist/assets/logo-light-BprIBJQW.png
38
44
  eval_protocol/adapters/__init__.py
39
45
  eval_protocol/adapters/braintrust.py
40
46
  eval_protocol/adapters/huggingface.py
@@ -28,7 +28,6 @@ deepdiff>=6.0.0
28
28
  pandas>=1.5.0
29
29
  watchdog>=2.1.0
30
30
  websockets>=15.0.1
31
- fireworks-ai>=0.19.12
32
31
  fastapi>=0.116.1
33
32
 
34
33
  [adapters]
@@ -71,7 +70,7 @@ pip>=25.1.1
71
70
  haikus==0.3.8
72
71
 
73
72
  [fireworks]
74
- fireworks-ai>=0.19.10
73
+ fireworks-ai>=0.19.12
75
74
 
76
75
  [huggingface]
77
76
  datasets>=2.0.0
@@ -48,7 +48,6 @@ dependencies = [
48
48
  "pandas>=1.5.0",
49
49
  "watchdog>=2.1.0",
50
50
  "websockets>=15.0.1",
51
- "fireworks-ai>=0.19.12",
52
51
  "fastapi>=0.116.1",
53
52
  ]
54
53
 
@@ -96,7 +95,7 @@ openevals = [
96
95
  "openevals>=0.1.0",
97
96
  ]
98
97
  fireworks = [
99
- "fireworks-ai>=0.19.10",
98
+ "fireworks-ai>=0.19.12",
100
99
  ]
101
100
  box2d = [
102
101
  "swig",
@@ -112,7 +111,7 @@ huggingface = [
112
111
  ]
113
112
  adapters = [
114
113
  "langfuse>=2.0.0",
115
- "datasets>=2.0.0",
114
+ "datasets>=2.0.0",
116
115
  "transformers>=4.0.0",
117
116
  ]
118
117
 
@@ -123,6 +122,9 @@ eval-protocol = "eval_protocol.cli:main"
123
122
  [tool.setuptools.packages.find]
124
123
  include = ["eval_protocol*", "development*", "vendor*"]
125
124
 
125
+ [tool.setuptools.package-data]
126
+ "eval_protocol" = ["../vite-app/dist/**/*"]
127
+
126
128
  [tool.versioneer]
127
129
  VCS = "git"
128
130
  style = "pep440"
@@ -519,6 +519,7 @@ class TestRolloutControlPlaneIntegration:
519
519
  5,
520
520
  None,
521
521
  8,
522
+ None,
522
523
  )
523
524
 
524
525
  assert result == ["ok"]
@@ -1,4 +1,7 @@
1
1
  import json
2
+ from typing import List
3
+
4
+ from pydantic import BaseModel
2
5
 
3
6
  from vendor.tau2.config import DEFAULT_LLM_NL_ASSERTIONS, DEFAULT_LLM_NL_ASSERTIONS_ARGS
4
7
  from vendor.tau2.data_model.message import Message, SystemMessage, UserMessage
@@ -7,6 +10,20 @@ from vendor.tau2.data_model.tasks import RewardType, Task
7
10
  from vendor.tau2.utils.llm_utils import generate
8
11
 
9
12
 
13
+ class NLAssertionResult(BaseModel):
14
+ """Individual NL assertion evaluation result."""
15
+
16
+ expectedOutcome: str
17
+ reasoning: str
18
+ metExpectation: bool
19
+
20
+
21
+ class NLAssertionsResponse(BaseModel):
22
+ """Complete NL assertions evaluation response."""
23
+
24
+ results: List[NLAssertionResult]
25
+
26
+
10
27
  class NLAssertionsEvaluator:
11
28
  """
12
29
  Judge that evaluates whether a trajectory adheres to all the natural-language assertions.
@@ -37,9 +54,7 @@ class NLAssertionsEvaluator:
37
54
  reward_breakdown={RewardType.NL_ASSERTION: 1.0},
38
55
  )
39
56
 
40
- nl_assertions_checks = cls.evaluate_nl_assertions(
41
- full_trajectory, nl_assertions
42
- )
57
+ nl_assertions_checks = cls.evaluate_nl_assertions(full_trajectory, nl_assertions)
43
58
 
44
59
  # Calculate reward: 1 if all expectations are met, 0 otherwise
45
60
  all_expectations_met = all(result.met for result in nl_assertions_checks)
@@ -70,9 +85,7 @@ class NLAssertionsEvaluator:
70
85
  - metExpectation: Boolean indicating if the assertion was met
71
86
  - reasoning: Explanation for the evaluation
72
87
  """
73
- trajectory_str = "\n".join(
74
- [f"{message.role}: {message.content}" for message in trajectory]
75
- )
88
+ trajectory_str = "\n".join([f"{message.role}: {message.content}" for message in trajectory])
76
89
  # System prompt similar to the TypeScript implementation
77
90
  system_prompt = """
78
91
  TASK
@@ -86,7 +99,7 @@ class NLAssertionsEvaluator:
86
99
  - `reasoning`: a short explanation for your classification
87
100
  - `metExpectation`: `true` if the agent satisfies the expected outcomes, `false` otherwise
88
101
  - `expectedOutcome`: repeat the expectation from the input that you are grading
89
-
102
+
90
103
  Example response structure:
91
104
  {
92
105
  "results": [
@@ -102,7 +115,7 @@ class NLAssertionsEvaluator:
102
115
  user_prompt = f"""
103
116
  conversation:
104
117
  {trajectory_str}
105
-
118
+
106
119
  expectedOutcomes:
107
120
  {nl_assertions}
108
121
  """
@@ -115,8 +128,12 @@ class NLAssertionsEvaluator:
115
128
  assistant_message = generate(
116
129
  model=DEFAULT_LLM_NL_ASSERTIONS,
117
130
  messages=messages,
118
- **DEFAULT_LLM_NL_ASSERTIONS_ARGS,
119
- )
131
+ temperature=0.0,
132
+ response_format={
133
+ "type": "json_schema",
134
+ "json_schema": {"name": "NLAssertionsResponse", "schema": NLAssertionsResponse.model_json_schema()},
135
+ },
136
+ ) # Adding constrained generation to ensure the response is a valid JSON object
120
137
  result_data = json.loads(assistant_message.content)
121
138
  return [
122
139
  NLAssertionCheck(
@@ -0,0 +1 @@
1
+ /*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-500:oklch(63.7% .237 25.331);--color-red-700:oklch(50.5% .213 27.518);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.visible{visibility:visible}.absolute{position:absolute}.relative{position:relative}.static{position:static}.top-0{top:calc(var(--spacing)*0)}.right-0{right:calc(var(--spacing)*0)}.left-0{left:calc(var(--spacing)*0)}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.contents{display:contents}.flex{display:flex}.hidden{display:none}.inline-flex{display:inline-flex}.table{display:table}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-6{height:calc(var(--spacing)*6)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-7xl{max-width:var(--container-7xl)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-max{min-width:max-content}.flex-shrink-0{flex-shrink:0}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.items-center{align-items:center}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}:where(.space-y-1>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*1)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*1)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.border{border-style:var(--tw-border-style);border-width:1px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-green-200{border-color:var(--color-green-200)}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-transparent{border-top-color:#0000}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-500{background-color:var(--color-red-500)}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.p-0{padding:calc(var(--spacing)*0)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.pt-1{padding-top:calc(var(--spacing)*1)}.text-center{text-align:center}.text-left{text-align:left}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-700{color:var(--color-red-700)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.underline{text-decoration-line:underline}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}@media (hover:hover){.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:no-underline:hover{text-decoration-line:none}}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}}