eval-protocol 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. {eval_protocol-0.2.2/eval_protocol.egg-info → eval_protocol-0.2.3}/PKG-INFO +1 -1
  2. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/default_agent_rollout_processor.py +1 -0
  4. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +37 -7
  5. {eval_protocol-0.2.2 → eval_protocol-0.2.3/eval_protocol.egg-info}/PKG-INFO +1 -1
  6. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/LICENSE +0 -0
  7. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/README.md +0 -0
  8. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/development/__init__.py +0 -0
  9. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/development/normalize_sandbox_fusion.py +0 -0
  10. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/development/utils/__init__.py +0 -0
  11. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/development/utils/generate_api_key.py +0 -0
  12. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/development/utils/subprocess_manager.py +0 -0
  13. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/__init__.py +0 -0
  14. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/__main__.py +0 -0
  15. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/adapters/__init__.py +0 -0
  16. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/adapters/braintrust.py +0 -0
  17. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/adapters/huggingface.py +0 -0
  18. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/adapters/langfuse.py +0 -0
  19. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/adapters/trl.py +0 -0
  20. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/__init__.py +0 -0
  21. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/models.py +0 -0
  22. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/orchestrator.py +0 -0
  23. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resource_abc.py +0 -0
  24. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resource_pool.py +0 -0
  25. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/__init__.py +0 -0
  26. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  27. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  28. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  29. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  30. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  31. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/docker_resource.py +0 -0
  32. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  33. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
  34. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
  35. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  36. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/resources/sql_resource.py +0 -0
  37. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/task_manager.py +0 -0
  38. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/agent/tool_registry.py +0 -0
  39. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/auth.py +0 -0
  40. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli.py +0 -0
  41. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/__init__.py +0 -0
  42. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  43. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/common.py +0 -0
  44. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/deploy.py +0 -0
  45. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  46. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/preview.py +0 -0
  47. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  48. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/common_utils.py +0 -0
  49. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/config.py +0 -0
  50. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/datasets/__init__.py +0 -0
  51. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/datasets/loader.py +0 -0
  52. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/evaluation.py +0 -0
  53. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/execution/__init__.py +0 -0
  54. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/execution/pipeline.py +0 -0
  55. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/gcp_tools.py +0 -0
  56. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/generation/cache.py +0 -0
  57. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/generation/clients/base.py +0 -0
  58. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/generation/clients.py +0 -0
  59. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/generic_server.py +0 -0
  60. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/integrations/__init__.py +0 -0
  61. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/integrations/braintrust.py +0 -0
  62. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/integrations/deepeval.py +0 -0
  63. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/integrations/openeval.py +0 -0
  64. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/integrations/trl.py +0 -0
  65. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/__init__.py +0 -0
  66. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/adapter.py +0 -0
  67. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/client/__init__.py +0 -0
  68. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/client/connection.py +0 -0
  69. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/clients.py +0 -0
  70. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/execution/__init__.py +0 -0
  71. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/execution/base_policy.py +0 -0
  72. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/execution/manager.py +0 -0
  73. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/execution/policy.py +0 -0
  74. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/grid_renderer.py +0 -0
  75. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  76. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/mcpgym.py +0 -0
  77. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/process_manager.py +0 -0
  78. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/session/__init__.py +0 -0
  79. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/session/manager.py +0 -0
  80. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/simple_process_manager.py +0 -0
  81. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp/simulation_server.py +0 -0
  82. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/__init__.py +0 -0
  83. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/config.py +0 -0
  84. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
  85. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/main.py +0 -0
  86. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  87. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  88. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  89. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
  90. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  91. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_agent/session.py +0 -0
  92. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/mcp_env.py +0 -0
  93. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/models.py +0 -0
  94. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/packaging.py +0 -0
  95. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/platform_api.py +0 -0
  96. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/playback_policy.py +0 -0
  97. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/__init__.py +0 -0
  98. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  99. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
  100. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  101. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/evaluation_test.py +0 -0
  102. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/types.py +0 -0
  103. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/pytest/utils.py +0 -0
  104. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/resources.py +0 -0
  105. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/reward_function.py +0 -0
  106. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/__init__.py +0 -0
  107. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/accuracy.py +0 -0
  108. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/accuracy_length.py +0 -0
  109. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  110. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  111. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/apps_testing_util.py +0 -0
  112. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/bfcl_reward.py +0 -0
  113. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/code_execution.py +0 -0
  114. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/code_execution_utils.py +0 -0
  115. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/cpp_code.py +0 -0
  116. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  117. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/format.py +0 -0
  118. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/function_calling.py +0 -0
  119. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/json_schema.py +0 -0
  120. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/language_consistency.py +0 -0
  121. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/lean_prover.py +0 -0
  122. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/length.py +0 -0
  123. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  124. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/math.py +0 -0
  125. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  126. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/reasoning_steps.py +0 -0
  127. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/repetition.py +0 -0
  128. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rewards/tag_count.py +0 -0
  129. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/rl_processing.py +0 -0
  130. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/server.py +0 -0
  131. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/typed_interface.py +0 -0
  132. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/types/__init__.py +0 -0
  133. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/types/types.py +0 -0
  134. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/__init__.py +0 -0
  135. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/batch_evaluation.py +0 -0
  136. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/batch_transformation.py +0 -0
  137. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/dataset_helpers.py +0 -0
  138. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/module_loader.py +0 -0
  139. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/packaging_utils.py +0 -0
  140. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol/utils/static_policy.py +0 -0
  141. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol.egg-info/SOURCES.txt +0 -0
  142. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol.egg-info/dependency_links.txt +0 -0
  143. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol.egg-info/entry_points.txt +0 -0
  144. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol.egg-info/requires.txt +0 -0
  145. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/eval_protocol.egg-info/top_level.txt +0 -0
  146. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/pyproject.toml +0 -0
  147. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/setup.cfg +0 -0
  148. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/setup.py +0 -0
  149. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_accuracy.py +0 -0
  150. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_accuracy_length.py +0 -0
  151. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_adapters_e2e.py +0 -0
  152. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_agent_orchestrator.py +0 -0
  153. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_agent_resources.py +0 -0
  154. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_auth.py +0 -0
  155. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_batch_evaluation.py +0 -0
  156. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_braintrust_adapter.py +0 -0
  157. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_braintrust_example.py +0 -0
  158. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_cli.py +0 -0
  159. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_cli_agent.py +0 -0
  160. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_cli_args.py +0 -0
  161. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_code_execution.py +0 -0
  162. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_config.py +0 -0
  163. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_control_plane_separation.py +0 -0
  164. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_cpp_code.py +0 -0
  165. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_data_driven_task_manager.py +0 -0
  166. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_deepcoder_reward.py +0 -0
  167. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_deepeval_integration.py +0 -0
  168. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_deploy_integration.py +0 -0
  169. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_e2b_integration.py +0 -0
  170. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_e2b_js_integration.py +0 -0
  171. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_edge_cases.py +0 -0
  172. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_eval_protocol_import.py +0 -0
  173. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_evaluation.py +0 -0
  174. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_evaluation_integration.py +0 -0
  175. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_evaluation_preview_integration.py +0 -0
  176. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_examples_end_to_end.py +0 -0
  177. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_fireworks_api.py +0 -0
  178. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_format.py +0 -0
  179. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_fractional_code.py +0 -0
  180. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_frozen_lake_http_server.py +0 -0
  181. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_frozen_lake_seed_evaluation.py +0 -0
  182. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_function_calling.py +0 -0
  183. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_gcp_tools.py +0 -0
  184. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_generic_server.py +0 -0
  185. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_integration.py +0 -0
  186. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_json_schema.py +0 -0
  187. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_kwargs_validation.py +0 -0
  188. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_language_consistency.py +0 -0
  189. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_lean_prover.py +0 -0
  190. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_lean_prover_runner.py +0 -0
  191. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_length.py +0 -0
  192. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_list_comparison_math_reward.py +0 -0
  193. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_math.py +0 -0
  194. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_minimal.py +0 -0
  195. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_models.py +0 -0
  196. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_models_rl.py +0 -0
  197. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_multiple_choice_math_reward.py +0 -0
  198. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_n_variant_batch_integration.py +0 -0
  199. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_n_variant_integration.py +0 -0
  200. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_openai_compatibility.py +0 -0
  201. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_openeval_integration.py +0 -0
  202. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_packaging.py +0 -0
  203. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_parallel_rollouts.py +0 -0
  204. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_platform_api.py +0 -0
  205. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_readiness.py +0 -0
  206. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_reasoning_steps.py +0 -0
  207. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_repetition.py +0 -0
  208. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_repetition_debug.py +0 -0
  209. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_reward_function.py +0 -0
  210. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_reward_protocol_import.py +0 -0
  211. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_rl_processing.py +0 -0
  212. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_rollout_control_plane_integration.py +0 -0
  213. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_server.py +0 -0
  214. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_tag_count.py +0 -0
  215. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_typed_interface.py +0 -0
  216. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_typed_interface_rl.py +0 -0
  217. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/tests/test_url_handling.py +0 -0
  218. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/__init__.py +0 -0
  219. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/agent/__init__.py +0 -0
  220. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/agent/base.py +0 -0
  221. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/agent/llm_agent.py +0 -0
  222. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/api_service/__init__.py +0 -0
  223. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/api_service/api_config.py +0 -0
  224. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/api_service/data_model.py +0 -0
  225. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/api_service/simulation_service.py +0 -0
  226. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/cli.py +0 -0
  227. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/config.py +0 -0
  228. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/data_model/__init__.py +0 -0
  229. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/data_model/message.py +0 -0
  230. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/data_model/simulation.py +0 -0
  231. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/data_model/tasks.py +0 -0
  232. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/__init__.py +0 -0
  233. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/airline/__init__.py +0 -0
  234. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/airline/data_model.py +0 -0
  235. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/airline/environment.py +0 -0
  236. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/airline/tools.py +0 -0
  237. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/airline/utils.py +0 -0
  238. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/mock/__init__.py +0 -0
  239. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/mock/data_model.py +0 -0
  240. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/mock/environment.py +0 -0
  241. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/mock/tools.py +0 -0
  242. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/mock/utils.py +0 -0
  243. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/retail/__init__.py +0 -0
  244. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/retail/data_model.py +0 -0
  245. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/retail/environment.py +0 -0
  246. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/retail/tools.py +0 -0
  247. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/retail/utils.py +0 -0
  248. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/__init__.py +0 -0
  249. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/data_model.py +0 -0
  250. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/environment.py +0 -0
  251. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  252. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  253. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  254. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  255. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  256. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  257. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  258. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  259. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/tools.py +0 -0
  260. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  261. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  262. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/domains/telecom/utils.py +0 -0
  263. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/__init__.py +0 -0
  264. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/db.py +0 -0
  265. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/environment.py +0 -0
  266. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/server.py +0 -0
  267. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/tool.py +0 -0
  268. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/toolkit.py +0 -0
  269. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  270. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/__init__.py +0 -0
  271. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/evaluator.py +0 -0
  272. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  273. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  274. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  275. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  276. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  277. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/metrics/__init__.py +0 -0
  278. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/metrics/agent_metrics.py +0 -0
  279. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  280. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/orchestrator/__init__.py +0 -0
  281. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  282. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  283. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/orchestrator/utils.py +0 -0
  284. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/registry.py +0 -0
  285. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/run.py +0 -0
  286. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/scripts/__init__.py +0 -0
  287. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/scripts/check_data.py +0 -0
  288. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  289. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/scripts/start_servers.py +0 -0
  290. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/scripts/view_simulations.py +0 -0
  291. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/user/__init__.py +0 -0
  292. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/user/base.py +0 -0
  293. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/user/user_simulator.py +0 -0
  294. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/utils/__init__.py +0 -0
  295. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/utils/display.py +0 -0
  296. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/utils/io_utils.py +0 -0
  297. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/utils/llm_utils.py +0 -0
  298. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/utils/pydantic_utils.py +0 -0
  299. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/vendor/tau2/utils/utils.py +0 -0
  300. {eval_protocol-0.2.2 → eval_protocol-0.2.3}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: Apache-2.0
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-08-04T16:23:29-0700",
11
+ "date": "2025-08-04T20:35:33-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a74f14cb888ac8a6e3e6877822122f6252365d54",
15
- "version": "0.2.2"
14
+ "full-revisionid": "52b46a7d3f8455d848d8d5138ec4ca4d6343d3d2",
15
+ "version": "0.2.3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -75,6 +75,7 @@ class Agent:
75
75
  self, messages: list[Message], tools: Optional[list[ChatCompletionToolParam]]
76
76
  ) -> ChatCompletionMessage:
77
77
  messages = [message.model_dump() if hasattr(message, "model_dump") else message for message in messages]
78
+ tools = [{"function": tool["function"].model_dump(), "type": "function"} for tool in tools] if tools else []
78
79
  response = await self._policy._make_llm_call(messages=messages, tools=tools)
79
80
  return response["choices"][0]["message"]
80
81
 
@@ -2,6 +2,7 @@ import asyncio
2
2
  import os
3
3
  import subprocess
4
4
  import time
5
+ import socket
5
6
  from pathlib import Path
6
7
  from typing import List, Optional
7
8
 
@@ -69,11 +70,8 @@ class MCPServerManager:
69
70
  self._log_file = log_file
70
71
  self._log_file_path = log_file_path
71
72
 
72
- # Wait for server to start
73
- time.sleep(3)
74
-
75
- # Check if process is still running
76
- if self.process.poll() is not None:
73
+ # Wait for server to be ready with proper health check
74
+ if not self._wait_for_server_ready(timeout=15):
77
75
  try:
78
76
  with open(self._log_file_path, "r") as f:
79
77
  log_content = f.read()
@@ -82,13 +80,45 @@ class MCPServerManager:
82
80
  print("=" * 50)
83
81
  print(log_content)
84
82
  print("=" * 50)
85
- raise RuntimeError(f"Server failed to start. Check log above for details.")
83
+ raise RuntimeError(f"Server failed to start or become ready. Check log above for details.")
86
84
  except Exception as e:
87
85
  stdout, stderr = self.process.communicate()
88
- raise RuntimeError(f"Server failed to start. stderr: {stderr}, log error: {e}")
86
+ raise RuntimeError(f"Server failed to start or become ready. stderr: {stderr}, log error: {e}")
89
87
 
90
88
  print(f"✅ Server started successfully on port {self.port}")
91
89
 
90
+ def _wait_for_server_ready(self, timeout: int = 15) -> bool:
91
+ """
92
+ Wait for server to be ready by polling socket connection.
93
+ """
94
+ start_time = time.time()
95
+ health_check_failures = 0
96
+
97
+ while time.time() - start_time < timeout:
98
+ # Check if process is still running
99
+ if self.process.poll() is not None:
100
+ print(f"Server process exited early")
101
+ return False
102
+
103
+ try:
104
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
105
+ s.settimeout(1)
106
+ result = s.connect_ex(("localhost", self.port))
107
+ if result == 0:
108
+ time.sleep(0.5)
109
+ return True
110
+ except Exception as e:
111
+ health_check_failures += 1
112
+ # Print first few failures for debugging
113
+ if health_check_failures <= 3:
114
+ print(f"Health check failed: {e}")
115
+
116
+ # Wait before next check
117
+ time.sleep(0.1)
118
+
119
+ print(f"Server failed to become ready within {timeout} seconds")
120
+ return False
121
+
92
122
  def stop(self) -> None:
93
123
  """Stop the MCP server."""
94
124
  if self.process:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: Apache-2.0
File without changes
File without changes
File without changes
File without changes