eval-protocol 0.2.5.dev1__tar.gz → 0.2.6.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. {eval_protocol-0.2.5.dev1/eval_protocol.egg-info → eval_protocol-0.2.6.dev1}/PKG-INFO +3 -3
  2. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/README.md +1 -1
  3. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/__init__.py +4 -3
  4. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/_version.py +3 -3
  5. eval_protocol-0.2.6.dev1/eval_protocol/common_utils.py +30 -0
  6. eval_protocol-0.2.6.dev1/eval_protocol/dataset_logger/directory_utils.py +55 -0
  7. eval_protocol-0.2.6.dev1/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +98 -0
  8. eval_protocol-0.2.6.dev1/eval_protocol/get_pep440_version.py +133 -0
  9. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/human_id/__init__.py +3 -2
  10. eval_protocol-0.2.6.dev1/eval_protocol/logging_utils.py +175 -0
  11. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/client/connection.py +11 -39
  12. eval_protocol-0.2.6.dev1/eval_protocol/mcp/execution/manager.py +547 -0
  13. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/execution/policy.py +11 -0
  14. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/mcpgym.py +6 -1
  15. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/session/manager.py +3 -0
  16. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_env.py +32 -4
  17. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/models.py +32 -2
  18. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +12 -5
  19. eval_protocol-0.2.6.dev1/eval_protocol/pytest/default_single_turn_rollout_process.py +64 -0
  20. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/evaluation_test.py +6 -3
  21. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/utils.py +0 -2
  22. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/types/types.py +1 -0
  23. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/logs_server.py +5 -1
  24. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1/eval_protocol.egg-info}/PKG-INFO +3 -3
  25. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol.egg-info/SOURCES.txt +9 -0
  26. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol.egg-info/entry_points.txt +1 -0
  27. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/pyproject.toml +6 -2
  28. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_rollout_control_plane_integration.py +2 -1
  29. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/evaluator_nl_assertions.py +27 -10
  30. eval_protocol-0.2.6.dev1/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  31. eval_protocol-0.2.6.dev1/vite-app/dist/assets/index-BMc_e8JT.js +88 -0
  32. eval_protocol-0.2.6.dev1/vite-app/dist/assets/index-BMc_e8JT.js.map +1 -0
  33. eval_protocol-0.2.6.dev1/vite-app/dist/assets/index-Dp7ms4NJ.css +1 -0
  34. eval_protocol-0.2.6.dev1/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
  35. eval_protocol-0.2.6.dev1/vite-app/dist/index.html +14 -0
  36. eval_protocol-0.2.5.dev1/eval_protocol/common_utils.py +0 -36
  37. eval_protocol-0.2.5.dev1/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -114
  38. eval_protocol-0.2.5.dev1/eval_protocol/mcp/execution/manager.py +0 -526
  39. eval_protocol-0.2.5.dev1/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -50
  40. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/LICENSE +0 -0
  41. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/development/__init__.py +0 -0
  42. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/development/normalize_sandbox_fusion.py +0 -0
  43. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/development/utils/__init__.py +0 -0
  44. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/development/utils/generate_api_key.py +0 -0
  45. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/development/utils/subprocess_manager.py +0 -0
  46. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/__main__.py +0 -0
  47. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/adapters/__init__.py +0 -0
  48. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/adapters/braintrust.py +0 -0
  49. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/adapters/huggingface.py +0 -0
  50. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/adapters/langfuse.py +0 -0
  51. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/adapters/trl.py +0 -0
  52. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/__init__.py +0 -0
  53. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/models.py +0 -0
  54. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/orchestrator.py +0 -0
  55. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resource_abc.py +0 -0
  56. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resource_pool.py +0 -0
  57. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/__init__.py +0 -0
  58. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  59. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  60. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  61. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  62. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  63. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/docker_resource.py +0 -0
  64. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  65. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
  66. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
  67. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  68. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/resources/sql_resource.py +0 -0
  69. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/task_manager.py +0 -0
  70. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/agent/tool_registry.py +0 -0
  71. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/auth.py +0 -0
  72. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli.py +0 -0
  73. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/__init__.py +0 -0
  74. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  75. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/common.py +0 -0
  76. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/deploy.py +0 -0
  77. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
  78. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/logs.py +0 -0
  79. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/preview.py +0 -0
  80. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  81. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/config.py +0 -0
  82. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/dataset_logger/__init__.py +0 -0
  83. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  84. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/datasets/__init__.py +0 -0
  85. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/datasets/loader.py +0 -0
  86. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/evaluation.py +0 -0
  87. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/execution/__init__.py +0 -0
  88. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/execution/pipeline.py +0 -0
  89. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/gcp_tools.py +0 -0
  90. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/generation/cache.py +0 -0
  91. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/generation/clients/base.py +0 -0
  92. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/generation/clients.py +0 -0
  93. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/generic_server.py +0 -0
  94. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/human_id/dictionary.py +0 -0
  95. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/integrations/__init__.py +0 -0
  96. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/integrations/braintrust.py +0 -0
  97. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/integrations/deepeval.py +0 -0
  98. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/integrations/openeval.py +0 -0
  99. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/integrations/trl.py +0 -0
  100. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/__init__.py +0 -0
  101. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/adapter.py +0 -0
  102. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/client/__init__.py +0 -0
  103. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/clients.py +0 -0
  104. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/execution/__init__.py +0 -0
  105. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/execution/base_policy.py +0 -0
  106. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/grid_renderer.py +0 -0
  107. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  108. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/process_manager.py +0 -0
  109. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/session/__init__.py +0 -0
  110. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/simple_process_manager.py +0 -0
  111. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp/simulation_server.py +0 -0
  112. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/__init__.py +0 -0
  113. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/config.py +0 -0
  114. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
  115. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/main.py +0 -0
  116. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  117. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  118. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  119. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
  120. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  121. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/mcp_agent/session.py +0 -0
  122. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/packaging.py +0 -0
  123. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/platform_api.py +0 -0
  124. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/playback_policy.py +0 -0
  125. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/__init__.py +0 -0
  126. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  127. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  128. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
  129. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/pytest/types.py +0 -0
  130. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/resources.py +0 -0
  131. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/reward_function.py +0 -0
  132. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/__init__.py +0 -0
  133. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/accuracy.py +0 -0
  134. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/accuracy_length.py +0 -0
  135. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  136. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  137. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/apps_testing_util.py +0 -0
  138. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/bfcl_reward.py +0 -0
  139. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/code_execution.py +0 -0
  140. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/code_execution_utils.py +0 -0
  141. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/cpp_code.py +0 -0
  142. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  143. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/format.py +0 -0
  144. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/function_calling.py +0 -0
  145. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/json_schema.py +0 -0
  146. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/language_consistency.py +0 -0
  147. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/lean_prover.py +0 -0
  148. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/length.py +0 -0
  149. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  150. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/math.py +0 -0
  151. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  152. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/reasoning_steps.py +0 -0
  153. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/repetition.py +0 -0
  154. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rewards/tag_count.py +0 -0
  155. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/rl_processing.py +0 -0
  156. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/server.py +0 -0
  157. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/typed_interface.py +0 -0
  158. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/types/__init__.py +0 -0
  159. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/__init__.py +0 -0
  160. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/batch_evaluation.py +0 -0
  161. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/batch_transformation.py +0 -0
  162. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/dataset_helpers.py +0 -0
  163. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/module_loader.py +0 -0
  164. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/packaging_utils.py +0 -0
  165. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/static_policy.py +0 -0
  166. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol/utils/vite_server.py +0 -0
  167. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol.egg-info/dependency_links.txt +0 -0
  168. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol.egg-info/requires.txt +1 -1
  169. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/eval_protocol.egg-info/top_level.txt +0 -0
  170. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/setup.cfg +0 -0
  171. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/setup.py +0 -0
  172. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_accuracy.py +0 -0
  173. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_accuracy_length.py +0 -0
  174. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_adapters_e2e.py +0 -0
  175. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_agent_orchestrator.py +0 -0
  176. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_agent_resources.py +0 -0
  177. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_auth.py +0 -0
  178. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_batch_evaluation.py +0 -0
  179. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_braintrust_adapter.py +0 -0
  180. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_braintrust_example.py +0 -0
  181. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_cli.py +0 -0
  182. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_cli_agent.py +0 -0
  183. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_cli_args.py +0 -0
  184. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_code_execution.py +0 -0
  185. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_config.py +0 -0
  186. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_control_plane_separation.py +0 -0
  187. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_cpp_code.py +0 -0
  188. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_data_driven_task_manager.py +0 -0
  189. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_deepcoder_reward.py +0 -0
  190. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_deepeval_integration.py +0 -0
  191. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_deploy_integration.py +0 -0
  192. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_e2b_integration.py +0 -0
  193. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_e2b_js_integration.py +0 -0
  194. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_edge_cases.py +0 -0
  195. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_eval_protocol_import.py +0 -0
  196. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_evaluation.py +0 -0
  197. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_evaluation_integration.py +0 -0
  198. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_evaluation_preview_integration.py +0 -0
  199. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_examples_end_to_end.py +0 -0
  200. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_fireworks_api.py +0 -0
  201. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_format.py +0 -0
  202. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_fractional_code.py +0 -0
  203. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_frozen_lake_http_server.py +0 -0
  204. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_frozen_lake_seed_evaluation.py +0 -0
  205. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_function_calling.py +0 -0
  206. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_gcp_tools.py +0 -0
  207. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_generic_server.py +0 -0
  208. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_integration.py +0 -0
  209. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_json_schema.py +0 -0
  210. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_kwargs_validation.py +0 -0
  211. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_language_consistency.py +0 -0
  212. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_lean_prover.py +0 -0
  213. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_lean_prover_runner.py +0 -0
  214. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_length.py +0 -0
  215. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_list_comparison_math_reward.py +0 -0
  216. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_math.py +0 -0
  217. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_minimal.py +0 -0
  218. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_models.py +0 -0
  219. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_models_rl.py +0 -0
  220. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_multiple_choice_math_reward.py +0 -0
  221. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_n_variant_batch_integration.py +0 -0
  222. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_n_variant_integration.py +0 -0
  223. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_openai_compatibility.py +0 -0
  224. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_openeval_integration.py +0 -0
  225. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_packaging.py +0 -0
  226. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_parallel_rollouts.py +0 -0
  227. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_platform_api.py +0 -0
  228. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_readiness.py +0 -0
  229. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_reasoning_steps.py +0 -0
  230. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_repetition.py +0 -0
  231. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_repetition_debug.py +0 -0
  232. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_reward_function.py +0 -0
  233. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_reward_protocol_import.py +0 -0
  234. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_rl_processing.py +0 -0
  235. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_server.py +0 -0
  236. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_tag_count.py +0 -0
  237. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_typed_interface.py +0 -0
  238. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_typed_interface_rl.py +0 -0
  239. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/tests/test_url_handling.py +0 -0
  240. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/__init__.py +0 -0
  241. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/agent/__init__.py +0 -0
  242. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/agent/base.py +0 -0
  243. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/agent/llm_agent.py +0 -0
  244. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/api_service/__init__.py +0 -0
  245. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/api_service/api_config.py +0 -0
  246. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/api_service/data_model.py +0 -0
  247. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/api_service/simulation_service.py +0 -0
  248. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/cli.py +0 -0
  249. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/config.py +0 -0
  250. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/data_model/__init__.py +0 -0
  251. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/data_model/message.py +0 -0
  252. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/data_model/simulation.py +0 -0
  253. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/data_model/tasks.py +0 -0
  254. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/__init__.py +0 -0
  255. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/airline/__init__.py +0 -0
  256. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/airline/data_model.py +0 -0
  257. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/airline/environment.py +0 -0
  258. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/airline/tools.py +0 -0
  259. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/airline/utils.py +0 -0
  260. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/mock/__init__.py +0 -0
  261. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/mock/data_model.py +0 -0
  262. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/mock/environment.py +0 -0
  263. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/mock/tools.py +0 -0
  264. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/mock/utils.py +0 -0
  265. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/retail/__init__.py +0 -0
  266. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/retail/data_model.py +0 -0
  267. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/retail/environment.py +0 -0
  268. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/retail/tools.py +0 -0
  269. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/retail/utils.py +0 -0
  270. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/__init__.py +0 -0
  271. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/data_model.py +0 -0
  272. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/environment.py +0 -0
  273. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  274. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  275. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  276. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  277. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  278. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  279. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  280. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  281. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/tools.py +0 -0
  282. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  283. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  284. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/domains/telecom/utils.py +0 -0
  285. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/__init__.py +0 -0
  286. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/db.py +0 -0
  287. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/environment.py +0 -0
  288. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/server.py +0 -0
  289. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/tool.py +0 -0
  290. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/toolkit.py +0 -0
  291. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  292. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/__init__.py +0 -0
  293. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/evaluator.py +0 -0
  294. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  295. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  296. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  297. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  298. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/metrics/__init__.py +0 -0
  299. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/metrics/agent_metrics.py +0 -0
  300. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  301. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/orchestrator/__init__.py +0 -0
  302. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  303. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  304. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/orchestrator/utils.py +0 -0
  305. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/registry.py +0 -0
  306. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/run.py +0 -0
  307. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/scripts/__init__.py +0 -0
  308. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/scripts/check_data.py +0 -0
  309. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  310. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/scripts/start_servers.py +0 -0
  311. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/scripts/view_simulations.py +0 -0
  312. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/user/__init__.py +0 -0
  313. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/user/base.py +0 -0
  314. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/user/user_simulator.py +0 -0
  315. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/utils/__init__.py +0 -0
  316. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/utils/display.py +0 -0
  317. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/utils/io_utils.py +0 -0
  318. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/utils/llm_utils.py +0 -0
  319. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/utils/pydantic_utils.py +0 -0
  320. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/vendor/tau2/utils/utils.py +0 -0
  321. {eval_protocol-0.2.5.dev1 → eval_protocol-0.2.6.dev1}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.5.dev1
3
+ Version: 0.2.6.dev1
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -41,10 +41,10 @@ Requires-Dist: pandas>=1.5.0
41
41
  Requires-Dist: watchdog>=2.1.0
42
42
  Requires-Dist: websockets>=15.0.1
43
43
  Requires-Dist: fastapi>=0.116.1
44
+ Requires-Dist: pytest>=6.0.0
44
45
  Provides-Extra: dev
45
46
  Requires-Dist: build; extra == "dev"
46
47
  Requires-Dist: twine; extra == "dev"
47
- Requires-Dist: pytest>=6.0.0; extra == "dev"
48
48
  Requires-Dist: pytest-asyncio; extra == "dev"
49
49
  Requires-Dist: pytest-httpserver; extra == "dev"
50
50
  Requires-Dist: werkzeug>=2.0.0; extra == "dev"
@@ -109,7 +109,7 @@ markdown generation tasks to customer service agents with tool calling
109
109
  capabilities.
110
110
 
111
111
  <p align="center">
112
- <img src="./assets/ui.png" alt="UI" />
112
+ <img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
113
113
  <br>
114
114
  <sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
115
115
  </p>
@@ -13,7 +13,7 @@ markdown generation tasks to customer service agents with tool calling
13
13
  capabilities.
14
14
 
15
15
  <p align="center">
16
- <img src="./assets/ui.png" alt="UI" />
16
+ <img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
17
17
  <br>
18
18
  <sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
19
19
  </p>
@@ -10,15 +10,16 @@ tool-augmented models using self-contained task bundles.
10
10
 
11
11
  import warnings
12
12
 
13
- from .adapters.braintrust import reward_fn_to_scorer, scorer_to_reward_fn
13
+ from eval_protocol.adapters.braintrust import reward_fn_to_scorer, scorer_to_reward_fn
14
+
14
15
  from .auth import get_fireworks_account_id, get_fireworks_api_key
15
16
  from .common_utils import load_jsonl
16
17
  from .config import RewardKitConfig, get_config, load_config
17
18
  from .mcp_env import (
18
19
  AnthropicPolicy,
19
- OpenAIPolicy,
20
- LiteLLMPolicy,
21
20
  FireworksPolicy,
21
+ LiteLLMPolicy,
22
+ OpenAIPolicy,
22
23
  make,
23
24
  rollout,
24
25
  test_mcp,
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-08-06T17:51:29-0700",
11
+ "date": "2025-08-08T10:51:54-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "a807140937b9002c71ee42a6afef594ea6377c2d",
15
- "version": "0.2.5-dev1"
14
+ "full-revisionid": "986452fd04442f9a0d1ba902753a83480e413d43",
15
+ "version": "0.2.6-dev1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -0,0 +1,30 @@
1
+ import json
2
+ import re
3
+ from typing import Any, Dict, List
4
+
5
+
6
+ def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
7
+ """
8
+ Reads a JSONL file where each line is a valid JSON object and returns a list of these objects.
9
+
10
+ Args:
11
+ file_path: Path to the JSONL file.
12
+
13
+ Returns:
14
+ A list of dictionaries, where each dictionary is a parsed JSON object from a line.
15
+ Returns an empty list if the file is not found or if errors occur during parsing.
16
+ """
17
+ data: List[Dict[str, Any]] = []
18
+ with open(file_path, "r", encoding="utf-8") as f:
19
+ for line_number, line in enumerate(f):
20
+ try:
21
+ data.append(json.loads(line.strip()))
22
+ except json.JSONDecodeError as e:
23
+ print(f"Error parsing JSON line for file {file_path} at line {line_number}")
24
+ # attempt to find "row_id" in the line by finding index of "row_id" and performing regex of `"row_id": (.*),`
25
+ row_id_index = line.find("row_id")
26
+ if row_id_index != -1:
27
+ row_id = re.search(r'"row_id": (.*),', line[row_id_index:])
28
+ raise ValueError(f"{e.msg} at line {line_number}: {line} ({row_id})")
29
+ raise e
30
+ return data
@@ -0,0 +1,55 @@
1
+ import os
2
+ from typing import Optional
3
+
4
+ # Shared constants for directory discovery
5
+ EVAL_PROTOCOL_DIR = ".eval_protocol"
6
+ PYTHON_FILES = ["pyproject.toml", "requirements.txt"]
7
+ DATASETS_DIR = "datasets"
8
+
9
+
10
+ def find_eval_protocol_dir() -> str:
11
+ """
12
+ Find the .eval_protocol directory by looking up the directory tree.
13
+
14
+ Returns:
15
+ Path to the .eval_protocol directory
16
+ """
17
+ # recursively look up for a .eval_protocol directory
18
+ current_dir = os.path.dirname(os.path.abspath(__file__))
19
+ while current_dir != "/":
20
+ if os.path.exists(os.path.join(current_dir, EVAL_PROTOCOL_DIR)):
21
+ log_dir = os.path.join(current_dir, EVAL_PROTOCOL_DIR)
22
+ break
23
+ current_dir = os.path.dirname(current_dir)
24
+ else:
25
+ # if not found, recursively look up until a pyproject.toml or requirements.txt is found
26
+ current_dir = os.path.dirname(os.path.abspath(__file__))
27
+ while current_dir != "/":
28
+ if any(os.path.exists(os.path.join(current_dir, f)) for f in PYTHON_FILES):
29
+ log_dir = os.path.join(current_dir, EVAL_PROTOCOL_DIR)
30
+ break
31
+ current_dir = os.path.dirname(current_dir)
32
+ else:
33
+ # get the PWD that this python process is running in
34
+ log_dir = os.path.join(os.getcwd(), EVAL_PROTOCOL_DIR)
35
+
36
+ # create the .eval_protocol directory if it doesn't exist
37
+ os.makedirs(log_dir, exist_ok=True)
38
+
39
+ return log_dir
40
+
41
+
42
+ def find_eval_protocol_datasets_dir() -> str:
43
+ """
44
+ Find the .eval_protocol/datasets directory by looking up the directory tree.
45
+
46
+ Returns:
47
+ Path to the .eval_protocol/datasets directory
48
+ """
49
+ log_dir = find_eval_protocol_dir()
50
+
51
+ # create the datasets subdirectory
52
+ datasets_dir = os.path.join(log_dir, DATASETS_DIR)
53
+ os.makedirs(datasets_dir, exist_ok=True)
54
+
55
+ return datasets_dir
@@ -0,0 +1,98 @@
1
+ import json
2
+ import os
3
+ import time
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, List, Optional
7
+
8
+ from eval_protocol.common_utils import load_jsonl
9
+ from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
10
+ from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_datasets_dir
11
+
12
+ if TYPE_CHECKING:
13
+ from eval_protocol.models import EvaluationRow
14
+
15
+
16
+ class LocalFSDatasetLoggerAdapter(DatasetLogger):
17
+ """
18
+ Logger that stores logs in the local filesystem with file locking to prevent race conditions.
19
+ """
20
+
21
+ def __init__(self):
22
+ self.log_dir = os.path.dirname(find_eval_protocol_datasets_dir())
23
+ self.datasets_dir = find_eval_protocol_datasets_dir()
24
+
25
+ # ensure that log file exists
26
+ if not os.path.exists(self.current_jsonl_path):
27
+ with open(self.current_jsonl_path, "w") as f:
28
+ f.write("")
29
+
30
+ @property
31
+ def current_date(self) -> str:
32
+ # Use UTC timezone to be consistent across local device/locations/CI
33
+ return datetime.now(timezone.utc).strftime("%Y-%m-%d")
34
+
35
+ @property
36
+ def current_jsonl_path(self) -> str:
37
+ """
38
+ The current JSONL file path. Based on the current date.
39
+ """
40
+ return os.path.join(self.datasets_dir, f"{self.current_date}.jsonl")
41
+
42
+ def log(self, row: "EvaluationRow") -> None:
43
+ """Log a row, updating existing row with same ID or appending new row."""
44
+ row_id = row.input_metadata.row_id
45
+
46
+ # Check if row with this ID already exists in any JSONL file
47
+ if os.path.exists(self.datasets_dir):
48
+ for filename in os.listdir(self.datasets_dir):
49
+ if filename.endswith(".jsonl"):
50
+ file_path = os.path.join(self.datasets_dir, filename)
51
+ if os.path.exists(file_path):
52
+ with open(file_path, "r") as f:
53
+ lines = f.readlines()
54
+
55
+ # Find the line with matching ID
56
+ for i, line in enumerate(lines):
57
+ try:
58
+ line_data = json.loads(line.strip())
59
+ if line_data["input_metadata"]["row_id"] == row_id:
60
+ # Update existing row
61
+ lines[i] = row.model_dump_json(exclude_none=True) + os.linesep
62
+ with open(file_path, "w") as f:
63
+ f.writelines(lines)
64
+ return
65
+ except json.JSONDecodeError:
66
+ continue
67
+
68
+ # If no existing row found, append new row to current file
69
+ with open(self.current_jsonl_path, "a") as f:
70
+ f.write(row.model_dump_json(exclude_none=True) + os.linesep)
71
+
72
+ def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]:
73
+ """Read rows from all JSONL files in the datasets directory. Also
74
+ ensures that there are no duplicate row IDs."""
75
+ from eval_protocol.models import EvaluationRow
76
+
77
+ if not os.path.exists(self.datasets_dir):
78
+ return []
79
+
80
+ all_rows = []
81
+ existing_row_ids = set()
82
+ for filename in os.listdir(self.datasets_dir):
83
+ if filename.endswith(".jsonl"):
84
+ file_path = os.path.join(self.datasets_dir, filename)
85
+ data = load_jsonl(file_path)
86
+ for r in data:
87
+ row = EvaluationRow(**r)
88
+ if row.input_metadata.row_id not in existing_row_ids:
89
+ existing_row_ids.add(row.input_metadata.row_id)
90
+ else:
91
+ raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists")
92
+ all_rows.append(row)
93
+
94
+ if row_id:
95
+ # Filter by row_id if specified
96
+ return [row for row in all_rows if getattr(row.input_metadata, "row_id", None) == row_id]
97
+ else:
98
+ return all_rows
@@ -0,0 +1,133 @@
1
+ # Cache for PEP 440 version string
2
+ import subprocess
3
+
4
+ _version_cache = {"version": None, "base_version": None}
5
+
6
+
7
+ def get_pep440_version(base_version=None):
8
+ """
9
+ Generate a PEP 440 compliant version string based on git information.
10
+
11
+ This function is inspired by versioneer but doesn't require the full versioneer
12
+ setup, making it easier for downstream users to adopt without additional dependencies.
13
+
14
+ The result is cached statically to avoid repeated git calls.
15
+
16
+ Args:
17
+ base_version: The base version string (e.g., "1.0.0"). If None, will try to
18
+ find the most recent version tag in git.
19
+
20
+ Returns:
21
+ A PEP 440 compliant version string that includes:
22
+ - Development release number (devN) based on commit count since base_version
23
+ - Local version identifier with git commit hash
24
+ - Dirty indicator if there are uncommitted changes
25
+
26
+ Examples:
27
+ >>> get_pep440_version("1.0.0")
28
+ "1.0.0.dev42+g1234567" # 42 commits since 1.0.0, commit hash 1234567
29
+ >>> get_pep440_version("1.0.0") # with uncommitted changes
30
+ "1.0.0.dev42+g1234567.dirty" # indicates dirty working directory
31
+ >>> get_pep440_version("1.0.0") # no git available
32
+ "1.0.0+unknown" # indicates git info not available
33
+ """
34
+ # Check if we have a cached version for this base_version
35
+ if _version_cache["version"] is not None and _version_cache["base_version"] == base_version:
36
+ return _version_cache["version"]
37
+ try:
38
+ # Check if we're in a git repository
39
+ subprocess.run(
40
+ ["git", "rev-parse", "--git-dir"],
41
+ check=True,
42
+ stdout=subprocess.PIPE,
43
+ stderr=subprocess.PIPE,
44
+ universal_newlines=True,
45
+ )
46
+
47
+ # If base_version is None, try to find the most recent version tag
48
+ if base_version is None:
49
+ try:
50
+ base_version = subprocess.check_output(
51
+ ["git", "describe", "--tags", "--abbrev=0"], universal_newlines=True, stderr=subprocess.DEVNULL
52
+ ).strip()
53
+ except subprocess.CalledProcessError:
54
+ # No tags found, we'll handle this case specially
55
+ base_version = None
56
+
57
+ # Get commit count since base_version
58
+ if base_version is None:
59
+ # No base version (no tags), just count all commits
60
+ count = subprocess.check_output(
61
+ ["git", "rev-list", "--count", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL
62
+ ).strip()
63
+ base_version = "0.0.0" # Use this for the final version string
64
+ else:
65
+ try:
66
+ count = subprocess.check_output(
67
+ ["git", "rev-list", "--count", f"{base_version}..HEAD"],
68
+ universal_newlines=True,
69
+ stderr=subprocess.DEVNULL,
70
+ ).strip()
71
+ # If no commits found, try counting from the beginning
72
+ if count == "0" or not count:
73
+ count = subprocess.check_output(
74
+ ["git", "rev-list", "--count", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL
75
+ ).strip()
76
+ except subprocess.CalledProcessError:
77
+ # If base_version tag doesn't exist, count all commits
78
+ count = subprocess.check_output(
79
+ ["git", "rev-list", "--count", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL
80
+ ).strip()
81
+
82
+ # Get short commit hash
83
+ commit_hash = subprocess.check_output(
84
+ ["git", "rev-parse", "--short", "HEAD"], universal_newlines=True, stderr=subprocess.DEVNULL
85
+ ).strip()
86
+
87
+ # Check for uncommitted changes (dirty working directory)
88
+ try:
89
+ subprocess.run(
90
+ ["git", "diff-index", "--quiet", "HEAD", "--"],
91
+ check=True,
92
+ stdout=subprocess.PIPE,
93
+ stderr=subprocess.PIPE,
94
+ )
95
+ dirty_suffix = ""
96
+ except subprocess.CalledProcessError:
97
+ dirty_suffix = ".dirty"
98
+
99
+ # Ensure count is a valid integer
100
+ try:
101
+ dev_count = int(count)
102
+ except (ValueError, TypeError):
103
+ dev_count = 0
104
+
105
+ # Build PEP 440 compliant version string
106
+ # Format: <base_version>.dev<count>+g<hash>[.dirty]
107
+ version_parts = [base_version]
108
+
109
+ if dev_count > 0:
110
+ version_parts.append(f".dev{dev_count}")
111
+
112
+ version_parts.append(f"+g{commit_hash}")
113
+
114
+ if dirty_suffix:
115
+ version_parts.append(dirty_suffix)
116
+
117
+ result = "".join(version_parts)
118
+
119
+ # Cache the result
120
+ _version_cache["version"] = result
121
+ _version_cache["base_version"] = base_version
122
+
123
+ return result
124
+
125
+ except (subprocess.CalledProcessError, FileNotFoundError, OSError):
126
+ # Git is not available or not a git repository
127
+ result = f"{base_version}+unknown"
128
+
129
+ # Cache the result
130
+ _version_cache["version"] = result
131
+ _version_cache["base_version"] = base_version
132
+
133
+ return result
@@ -1,6 +1,7 @@
1
- import random
2
1
  import itertools
2
+ import random
3
3
  from typing import Hashable
4
+
4
5
  from . import dictionary
5
6
 
6
7
  __all__ = ["generate_id"]
@@ -8,7 +9,7 @@ __all__ = ["generate_id"]
8
9
  system_random = random.SystemRandom()
9
10
 
10
11
 
11
- def generate_id(separator="-", seed: int | float | str | bytes | bytearray | None = None, word_count=4) -> str:
12
+ def generate_id(separator="-", seed: int | float | str | bytes | bytearray | None = None, word_count=5) -> str:
12
13
  """
13
14
  Generate a human readable ID
14
15
 
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Logging utilities for the eval_protocol package.
4
+
5
+ This module provides centralized logging configuration and utilities
6
+ for consistent logging across the eval_protocol package.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_dir
16
+
17
+
18
+ def setup_logger(
19
+ name: str,
20
+ log_file: Optional[str] = None,
21
+ level: int = logging.INFO,
22
+ console_level: int = logging.INFO,
23
+ file_level: int = logging.DEBUG,
24
+ ) -> logging.Logger:
25
+ """
26
+ Set up a logger with both console and file handlers.
27
+
28
+ Args:
29
+ name: Logger name
30
+ log_file: Optional log file name (will be created in logs directory)
31
+ level: Overall logger level
32
+ console_level: Console handler level
33
+ file_level: File handler level
34
+
35
+ Returns:
36
+ Configured logger instance
37
+ """
38
+ # Create logs directory under eval_protocol
39
+ eval_protocol_dir = Path(find_eval_protocol_dir())
40
+ logs_dir = eval_protocol_dir / "logs"
41
+ logs_dir.mkdir(exist_ok=True)
42
+
43
+ # Create logger
44
+ logger = logging.getLogger(name)
45
+
46
+ # Only configure if not already configured (has handlers and proper level)
47
+ if logger.handlers and logger.level != logging.NOTSET:
48
+ return logger
49
+
50
+ logger.setLevel(level)
51
+
52
+ # Clear existing handlers to avoid duplicates
53
+ logger.handlers.clear()
54
+
55
+ # Create formatters
56
+ file_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
57
+ console_formatter = logging.Formatter("%(levelname)s - %(message)s")
58
+
59
+ # Console handler - explicitly write to sys.stdout
60
+ console_handler = logging.StreamHandler(sys.stdout)
61
+ console_handler.setLevel(console_level)
62
+ console_handler.setFormatter(console_formatter)
63
+ logger.addHandler(console_handler)
64
+
65
+ # File handler (if log_file specified) - explicitly write to file only
66
+ if log_file:
67
+ log_file_path = logs_dir / log_file
68
+ file_handler = logging.FileHandler(log_file_path)
69
+ file_handler.setLevel(file_level)
70
+ file_handler.setFormatter(file_formatter)
71
+ logger.addHandler(file_handler)
72
+
73
+ # Prevent propagation to avoid duplicate logging
74
+ logger.propagate = False
75
+
76
+ return logger
77
+
78
+
79
+ def get_logger(name: str) -> logging.Logger:
80
+ """
81
+ Get a logger instance. If it doesn't exist, create it with default settings.
82
+
83
+ Args:
84
+ name: Logger name
85
+
86
+ Returns:
87
+ Logger instance
88
+ """
89
+ logger = logging.getLogger(name)
90
+
91
+ # If logger doesn't have handlers, set it up with defaults
92
+ if not logger.handlers:
93
+ # For eval_watcher, check if running in daemon mode
94
+ if name == "eval_watcher":
95
+ import sys
96
+
97
+ # Check if running in daemon mode (subprocess)
98
+ if "--daemon" in sys.argv:
99
+ # Subprocess: log to file only
100
+ logger = setup_logger(name, f"{name}.log", console_level=logging.CRITICAL)
101
+ else:
102
+ # Top-level: log to console only
103
+ logger = setup_logger(name, None)
104
+ else:
105
+ logger = setup_logger(name, f"{name}.log")
106
+
107
+ return logger
108
+
109
+
110
+ def log_evaluation_event(
111
+ event_type: str, evaluation_id: str, message: str, level: int = logging.INFO, **kwargs
112
+ ) -> None:
113
+ """
114
+ Log evaluation-specific events to a dedicated evaluation log file.
115
+
116
+ Args:
117
+ event_type: Type of event (e.g., 'start', 'complete', 'error')
118
+ evaluation_id: Evaluation identifier
119
+ message: Log message
120
+ level: Log level
121
+ **kwargs: Additional context to include in log
122
+ """
123
+ logger = get_logger("evaluation_events")
124
+
125
+ # Create structured log entry
126
+ log_entry = {"event_type": event_type, "evaluation_id": evaluation_id, "message": message, **kwargs}
127
+
128
+ if level == logging.DEBUG:
129
+ logger.debug(f"EVENT: {log_entry}")
130
+ elif level == logging.INFO:
131
+ logger.info(f"EVENT: {log_entry}")
132
+ elif level == logging.WARNING:
133
+ logger.warning(f"EVENT: {log_entry}")
134
+ elif level == logging.ERROR:
135
+ logger.error(f"EVENT: {log_entry}")
136
+ elif level == logging.CRITICAL:
137
+ logger.critical(f"EVENT: {log_entry}")
138
+
139
+
140
+ def log_performance_metric(metric_name: str, value: float, unit: str = "", context: Optional[dict] = None) -> None:
141
+ """
142
+ Log performance metrics to a dedicated metrics log file.
143
+
144
+ Args:
145
+ metric_name: Name of the metric
146
+ value: Metric value
147
+ unit: Unit of measurement
148
+ context: Additional context information
149
+ """
150
+ logger = get_logger("performance_metrics")
151
+
152
+ metric_entry = {"metric": metric_name, "value": value, "unit": unit, "context": context or {}}
153
+
154
+ logger.info(f"METRIC: {metric_entry}")
155
+
156
+
157
+ def log_error_with_context(error: Exception, context: str, additional_info: Optional[dict] = None) -> None:
158
+ """
159
+ Log errors with additional context information.
160
+
161
+ Args:
162
+ error: The exception that occurred
163
+ context: Context where the error occurred
164
+ additional_info: Additional information about the error
165
+ """
166
+ logger = get_logger("errors")
167
+
168
+ error_entry = {
169
+ "error_type": type(error).__name__,
170
+ "error_message": str(error),
171
+ "context": context,
172
+ "additional_info": additional_info or {},
173
+ }
174
+
175
+ logger.error(f"ERROR: {error_entry}", exc_info=True)
@@ -16,6 +16,7 @@ from mcp.client.session import ClientSession
16
16
  from mcp.client.streamable_http import streamablehttp_client
17
17
 
18
18
  from ...types import MCPSession
19
+ from mcp.types import Implementation
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -50,19 +51,16 @@ class MCPConnectionManager:
50
51
 
51
52
  exit_stack = AsyncExitStack()
52
53
 
53
- client_info = None
54
- if session.seed is not None or (session.dataset_row and session.dataset_row.environment_context):
55
- from mcp.types import Implementation
56
-
57
- client_info = Implementation(name="reward-kit", version="1.0.0", _extra={})
58
- if session.seed is not None:
59
- client_info._extra["seed"] = session.seed
60
- if session.dataset_row and session.dataset_row.environment_context:
61
- client_info._extra["config"] = session.dataset_row.environment_context
62
- if session.dataset_row and session.dataset_row.id:
63
- client_info._extra["dataset_row_id"] = session.dataset_row.id
64
- if session.model_id:
65
- client_info._extra["model_id"] = session.model_id
54
+ client_info = Implementation(name="reward-kit", version="1.0.0", _extra={})
55
+ client_info._extra["session_id"] = session.session_id
56
+ if session.seed is not None:
57
+ client_info._extra["seed"] = session.seed
58
+ if session.dataset_row and session.dataset_row.environment_context:
59
+ client_info._extra["config"] = session.dataset_row.environment_context
60
+ if session.dataset_row and session.dataset_row.id:
61
+ client_info._extra["dataset_row_id"] = session.dataset_row.id
62
+ if session.model_id:
63
+ client_info._extra["model_id"] = session.model_id
66
64
 
67
65
  read_stream, write_stream, _ = await exit_stack.enter_async_context(
68
66
  streamablehttp_client(session.base_url, terminate_on_close=True)
@@ -77,32 +75,6 @@ class MCPConnectionManager:
77
75
  session._mcp_session = mcp_session
78
76
  session._exit_stack = exit_stack
79
77
 
80
- # Update session ID to match server's calculation (for control plane sync)
81
- if client_info and hasattr(client_info, "_extra"):
82
- extra_data = client_info._extra
83
- if extra_data and isinstance(extra_data, dict):
84
-
85
- seed_value = extra_data.get("seed")
86
- config_value = extra_data.get("config", {})
87
- dataset_row_id_value = extra_data.get("dataset_row_id")
88
- model_id_value = extra_data.get("model_id")
89
-
90
- stable_data = {
91
- "seed": seed_value,
92
- "config": config_value,
93
- "dataset_row_id": dataset_row_id_value,
94
- "model_id": model_id_value,
95
- "name": client_info.name,
96
- "version": client_info.version,
97
- }
98
-
99
- stable_str = json.dumps(stable_data, sort_keys=True)
100
- server_session_id = hashlib.md5(stable_str.encode()).hexdigest()
101
-
102
- # Update the session ID to match what the server generated
103
- session.session_id = server_session_id
104
- logger.info(f"Updated session ID to match server: {server_session_id}")
105
-
106
78
  # PRE-WARM: Discover and cache tools immediately after session initialization
107
79
  # This prevents concurrent list_tools() calls later
108
80
  await self._prewarm_tools_cache(session)