eval-protocol 0.3.9.dev1__tar.gz → 0.3.10.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (473) hide show
  1. {eval_protocol-0.3.9.dev1/eval_protocol.egg-info → eval_protocol-0.3.10.dev2}/PKG-INFO +2 -2
  2. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/_version.py +3 -3
  3. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/fireworks_tracing.py +9 -2
  4. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/auth.py +72 -2
  5. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli.py +8 -6
  6. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/create_rft.py +66 -100
  7. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/local_test.py +7 -0
  8. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/upload.py +3 -3
  9. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/evaluation.py +53 -32
  10. eval_protocol-0.3.10.dev2/eval_protocol/fireworks_client.py +132 -0
  11. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/platform_api.py +17 -27
  12. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test.py +2 -2
  13. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/remote_rollout_processor.py +16 -0
  14. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/tracing_utils.py +18 -3
  15. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2/eval_protocol.egg-info}/PKG-INFO +2 -2
  16. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/SOURCES.txt +5 -3
  17. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/requires.txt +1 -1
  18. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/pyproject.toml +1 -1
  19. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_create_rft.py +17 -61
  20. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_ep_upload_e2e.py +51 -140
  21. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_evaluation.py +22 -7
  22. eval_protocol-0.3.10.dev2/tests/test_fireworks_client.py +143 -0
  23. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_upload_entrypoint.py +10 -12
  24. eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-10cZ11iB.js +137 -0
  25. eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-10cZ11iB.js.map +1 -0
  26. eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-DOD73Wyg.css +1 -0
  27. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/index.html +2 -2
  28. eval_protocol-0.3.9.dev1/vite-app/dist/assets/index-CuQbfdPD.js +0 -46
  29. eval_protocol-0.3.9.dev1/vite-app/dist/assets/index-CuQbfdPD.js.map +0 -1
  30. eval_protocol-0.3.9.dev1/vite-app/dist/assets/index-iZp_HgyW.css +0 -1
  31. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/LICENSE +0 -0
  32. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/README.md +0 -0
  33. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/__init__.py +0 -0
  34. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/normalize_sandbox_fusion.py +0 -0
  35. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/utils/__init__.py +0 -0
  36. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/utils/generate_api_key.py +0 -0
  37. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/utils/subprocess_manager.py +0 -0
  38. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/__init__.py +0 -0
  39. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/__main__.py +0 -0
  40. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/__init__.py +0 -0
  41. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/base.py +0 -0
  42. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/bigquery.py +0 -0
  43. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/braintrust.py +0 -0
  44. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/dataframe.py +0 -0
  45. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/huggingface.py +0 -0
  46. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langchain.py +0 -0
  47. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langfuse.py +0 -0
  48. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langsmith.py +0 -0
  49. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/openai_responses.py +0 -0
  50. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/trl.py +0 -0
  51. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/utils.py +0 -0
  52. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/weave.py +0 -0
  53. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/__init__.py +0 -0
  54. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/models.py +0 -0
  55. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/orchestrator.py +0 -0
  56. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resource_abc.py +0 -0
  57. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resource_pool.py +0 -0
  58. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
  59. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
  60. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
  61. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
  62. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
  63. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
  64. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
  65. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
  66. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
  67. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
  68. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/task_manager.py +0 -0
  69. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/tool_registry.py +0 -0
  70. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/__init__.py +0 -0
  71. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
  72. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
  73. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_aime25.py +0 -0
  74. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
  75. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
  76. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_gpqa.py +0 -0
  77. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
  78. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
  79. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
  80. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
  81. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
  82. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/common.py +0 -0
  83. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/export_docs.py +0 -0
  84. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/logs.py +0 -0
  85. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
  86. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/utils.py +0 -0
  87. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/common_utils.py +0 -0
  88. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/config.py +0 -0
  89. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/__init__.py +0 -0
  90. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
  91. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/factory_data_loader.py +0 -0
  92. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/inline_data_loader.py +0 -0
  93. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
  94. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/models.py +0 -0
  95. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/__init__.py +0 -0
  96. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
  97. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
  98. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
  99. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
  100. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/datasets/__init__.py +0 -0
  101. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/datasets/loader.py +0 -0
  102. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/directory_utils.py +0 -0
  103. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/__init__.py +0 -0
  104. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/event_bus.py +0 -0
  105. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/logger.py +0 -0
  106. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
  107. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
  108. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/exceptions.py +0 -0
  109. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/execution/__init__.py +0 -0
  110. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/execution/pipeline.py +0 -0
  111. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/fireworks_rft.py +0 -0
  112. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/gcp_tools.py +0 -0
  113. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/cache.py +0 -0
  114. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/clients/base.py +0 -0
  115. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/clients.py +0 -0
  116. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generic_server.py +0 -0
  117. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/get_pep440_version.py +0 -0
  118. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/human_id/__init__.py +0 -0
  119. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/human_id/dictionary.py +0 -0
  120. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/__init__.py +0 -0
  121. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/deepeval.py +0 -0
  122. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/openai_rft.py +0 -0
  123. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/openeval.py +0 -0
  124. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/tinker_cookbook.py +0 -0
  125. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
  126. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/trl.py +0 -0
  127. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/__init__.py +0 -0
  128. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
  129. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
  130. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
  131. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
  132. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/init.py +0 -0
  133. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/rollout_context.py +0 -0
  134. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
  135. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/util.py +0 -0
  136. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/logging_utils.py +0 -0
  137. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/__init__.py +0 -0
  138. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/adapter.py +0 -0
  139. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
  140. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/client/connection.py +0 -0
  141. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/clients.py +0 -0
  142. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
  143. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/base_policy.py +0 -0
  144. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/manager.py +0 -0
  145. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
  146. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
  147. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
  148. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
  149. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/mcpgym.py +0 -0
  150. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/process_manager.py +0 -0
  151. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
  152. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/session/manager.py +0 -0
  153. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
  154. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
  155. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
  156. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/config.py +0 -0
  157. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/main.py +0 -0
  158. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
  159. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
  160. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
  161. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
  162. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_env.py +0 -0
  163. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/__init__.py +0 -0
  164. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
  165. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
  166. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
  167. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/README.md +0 -0
  168. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
  169. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
  170. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
  171. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
  172. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/server.py +0 -0
  173. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
  174. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
  175. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
  176. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
  177. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
  178. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/models.py +0 -0
  179. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/packaging.py +0 -0
  180. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/playback_policy.py +0 -0
  181. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/__init__.py +0 -0
  182. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
  183. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/app.py +0 -0
  184. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/auth.py +0 -0
  185. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
  186. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
  187. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/main.py +0 -0
  188. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/models.py +0 -0
  189. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
  190. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/__init__.py +0 -0
  191. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/buffer.py +0 -0
  192. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
  193. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
  194. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +0 -0
  195. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
  196. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
  197. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
  198. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
  199. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
  200. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
  201. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
  202. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
  203. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
  204. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/exception_config.py +0 -0
  205. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/execution.py +0 -0
  206. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
  207. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
  208. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/handle_persist_flow.py +0 -0
  209. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
  210. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
  211. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/parameterize.py +0 -0
  212. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/plugin.py +0 -0
  213. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/priority_scheduler.py +0 -0
  214. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/rollout_processor.py +0 -0
  215. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
  216. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/store_experiment_link.py +0 -0
  217. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/store_results_url.py +0 -0
  218. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/types.py +0 -0
  219. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/validate_signature.py +0 -0
  220. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/__init__.py +0 -0
  221. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
  222. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
  223. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
  224. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
  225. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
  226. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
  227. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
  228. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/llm_judge.py +0 -0
  229. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
  230. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
  231. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
  232. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
  233. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/utils.py +0 -0
  234. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/resources.py +0 -0
  235. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/reward_function.py +0 -0
  236. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/__init__.py +0 -0
  237. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/accuracy.py +0 -0
  238. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
  239. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
  240. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
  241. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
  242. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
  243. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/code_execution.py +0 -0
  244. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
  245. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
  246. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
  247. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/format.py +0 -0
  248. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/function_calling.py +0 -0
  249. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/json_schema.py +0 -0
  250. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
  251. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
  252. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/length.py +0 -0
  253. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
  254. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/math.py +0 -0
  255. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
  256. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
  257. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/repetition.py +0 -0
  258. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/tag_count.py +0 -0
  259. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rl_processing.py +0 -0
  260. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/server.py +0 -0
  261. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/stats/__init__.py +0 -0
  262. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/stats/confidence_intervals.py +0 -0
  263. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/__init__.py +0 -0
  264. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/gepa_trainer.py +0 -0
  265. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/gepa_utils.py +0 -0
  266. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/trainer.py +0 -0
  267. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/utils.py +0 -0
  268. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/typed_interface.py +0 -0
  269. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/__init__.py +0 -0
  270. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/errors.py +0 -0
  271. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/remote_rollout_processor.py +0 -0
  272. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/types.py +0 -0
  273. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/__init__.py +0 -0
  274. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
  275. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
  276. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/browser_utils.py +0 -0
  277. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/check_server_status.py +0 -0
  278. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
  279. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/evaluation_row_utils.py +0 -0
  280. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/logs_models.py +0 -0
  281. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/logs_server.py +0 -0
  282. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/module_loader.py +0 -0
  283. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
  284. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/show_results_url.py +0 -0
  285. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/static_policy.py +0 -0
  286. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/subprocess_utils.py +0 -0
  287. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/vite_server.py +0 -0
  288. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
  289. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/entry_points.txt +0 -0
  290. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
  291. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/setup.cfg +0 -0
  292. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/setup.py +0 -0
  293. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_accuracy.py +0 -0
  294. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_accuracy_length.py +0 -0
  295. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_adapters_e2e.py +0 -0
  296. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_agent_orchestrator.py +0 -0
  297. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_agent_resources.py +0 -0
  298. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_auth.py +0 -0
  299. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_batch_evaluation.py +0 -0
  300. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_agent.py +0 -0
  301. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_args.py +0 -0
  302. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_local_test.py +0 -0
  303. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_code_execution.py +0 -0
  304. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_config.py +0 -0
  305. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_control_plane_separation.py +0 -0
  306. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cpp_code.py +0 -0
  307. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_data_driven_task_manager.py +0 -0
  308. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_deepcoder_reward.py +0 -0
  309. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_deepeval_integration.py +0 -0
  310. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_directory_utils.py +0 -0
  311. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_e2b_integration.py +0 -0
  312. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_e2b_js_integration.py +0 -0
  313. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_edge_cases.py +0 -0
  314. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_eval_protocol_import.py +0 -0
  315. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_evaluation_postprocess.py +0 -0
  316. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_event_bus.py +0 -0
  317. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_event_bus_helper.py +0 -0
  318. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_examples_end_to_end.py +0 -0
  319. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_exception_config.py +0 -0
  320. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_exceptions.py +0 -0
  321. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_fireworks_api.py +0 -0
  322. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_format.py +0 -0
  323. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_fractional_code.py +0 -0
  324. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_function_calling.py +0 -0
  325. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_gcp_tools.py +0 -0
  326. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_generic_server.py +0 -0
  327. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_human_id.py +0 -0
  328. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_integration.py +0 -0
  329. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_json_schema.py +0 -0
  330. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_kwargs_validation.py +0 -0
  331. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_language_consistency.py +0 -0
  332. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_lean_prover.py +0 -0
  333. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_lean_prover_runner.py +0 -0
  334. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_length.py +0 -0
  335. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_list_comparison_math_reward.py +0 -0
  336. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_litellm_policy_provider_fields.py +0 -0
  337. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_logs_server.py +0 -0
  338. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_logs_server_simple.py +0 -0
  339. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_math.py +0 -0
  340. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_message_field_filtering.py +0 -0
  341. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_minimal.py +0 -0
  342. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_models.py +0 -0
  343. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_models_rl.py +0 -0
  344. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
  345. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_n_variant_batch_integration.py +0 -0
  346. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_n_variant_integration.py +0 -0
  347. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openai_compatibility.py +0 -0
  348. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openai_rft_integration.py +0 -0
  349. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openeval_integration.py +0 -0
  350. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_packaging.py +0 -0
  351. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_parallel_rollouts.py +0 -0
  352. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_platform_api.py +0 -0
  353. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_priority_scheduler.py +0 -0
  354. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_quickstart_utils.py +0 -0
  355. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_readiness.py +0 -0
  356. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reasoning_steps.py +0 -0
  357. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_repetition.py +0 -0
  358. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_repetition_debug.py +0 -0
  359. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_retry_mechanism.py +0 -0
  360. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reward_function.py +0 -0
  361. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reward_protocol_import.py +0 -0
  362. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rl_processing.py +0 -0
  363. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
  364. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rollout_logprobs.py +0 -0
  365. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_server.py +0 -0
  366. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_show_results_url.py +0 -0
  367. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_sqlite_hardening.py +0 -0
  368. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_migration_changes.py +0 -0
  369. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_migration_integration.py +0 -0
  370. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_model.py +0 -0
  371. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_tag_count.py +0 -0
  372. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_tau_bench_airline_smoke.py +0 -0
  373. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_training_utils.py +0 -0
  374. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_typed_interface.py +0 -0
  375. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_typed_interface_rl.py +0 -0
  376. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_url_handling.py +0 -0
  377. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_vite_server.py +0 -0
  378. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/__init__.py +0 -0
  379. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/__init__.py +0 -0
  380. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/base.py +0 -0
  381. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/llm_agent.py +0 -0
  382. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/__init__.py +0 -0
  383. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/api_config.py +0 -0
  384. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/data_model.py +0 -0
  385. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
  386. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/cli.py +0 -0
  387. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/config.py +0 -0
  388. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/airline/policy.md +0 -0
  389. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/mock/policy.md +0 -0
  390. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
  391. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/retail/policy.md +0 -0
  392. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
  393. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
  394. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
  395. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
  396. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
  397. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
  398. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
  399. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/__init__.py +0 -0
  400. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/message.py +0 -0
  401. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/simulation.py +0 -0
  402. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/tasks.py +0 -0
  403. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/__init__.py +0 -0
  404. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
  405. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
  406. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
  407. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
  408. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
  409. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
  410. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
  411. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
  412. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
  413. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
  414. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
  415. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
  416. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
  417. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
  418. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
  419. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
  420. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
  421. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
  422. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
  423. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
  424. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
  425. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
  426. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
  427. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
  428. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
  429. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
  430. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
  431. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
  432. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
  433. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
  434. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/__init__.py +0 -0
  435. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/db.py +0 -0
  436. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/environment.py +0 -0
  437. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/server.py +0 -0
  438. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/tool.py +0 -0
  439. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/toolkit.py +0 -0
  440. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
  441. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
  442. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
  443. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
  444. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
  445. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
  446. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
  447. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
  448. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/__init__.py +0 -0
  449. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
  450. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
  451. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
  452. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
  453. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
  454. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
  455. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/registry.py +0 -0
  456. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/run.py +0 -0
  457. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/__init__.py +0 -0
  458. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/check_data.py +0 -0
  459. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
  460. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
  461. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
  462. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/__init__.py +0 -0
  463. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/base.py +0 -0
  464. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/user_simulator.py +0 -0
  465. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/__init__.py +0 -0
  466. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/display.py +0 -0
  467. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/io_utils.py +0 -0
  468. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/llm_utils.py +0 -0
  469. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
  470. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/utils.py +0 -0
  471. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/versioneer.py +0 -0
  472. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
  473. {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.3.9.dev1
3
+ Version: 0.3.10.dev2
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -29,7 +29,7 @@ Requires-Dist: pytest>=6.0.0
29
29
  Requires-Dist: pytest-asyncio>=0.21.0
30
30
  Requires-Dist: peewee>=3.18.2
31
31
  Requires-Dist: backoff>=2.2.0
32
- Requires-Dist: fireworks-ai==1.0.0a20
32
+ Requires-Dist: fireworks-ai==1.0.0a22
33
33
  Requires-Dist: questionary>=2.0.0
34
34
  Requires-Dist: toml>=0.10.0
35
35
  Requires-Dist: loguru>=0.6.0
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2026-01-08T13:29:17-0800",
11
+ "date": "2026-01-13T16:25:00-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "764ac4f132c35fe01c354b4150cbc19c7eedea12",
15
- "version": "0.3.9.dev.1"
14
+ "full-revisionid": "66f191a09db5364b9cd9bb21230e1f48e50be724",
15
+ "version": "0.3.10.dev.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -253,6 +253,7 @@ class FireworksTracingAdapter(BaseAdapter):
253
253
  project_id: Optional[str] = None,
254
254
  base_url: str = "https://tracing.fireworks.ai",
255
255
  timeout: int = 300,
256
+ api_key: Optional[str] = None,
256
257
  ):
257
258
  """Initialize the Fireworks Tracing adapter.
258
259
 
@@ -260,10 +261,16 @@ class FireworksTracingAdapter(BaseAdapter):
260
261
  project_id: Optional project ID. If not provided, uses the default project configured on the server.
261
262
  base_url: The base URL of the tracing proxy (default: https://tracing.fireworks.ai)
262
263
  timeout: Request timeout in seconds (default: 300)
264
+ api_key: Optional API key. If not provided, falls back to FIREWORKS_API_KEY environment variable.
263
265
  """
264
266
  self.project_id = project_id
265
267
  self.base_url = base_url.rstrip("/")
266
268
  self.timeout = timeout
269
+ self._api_key = api_key
270
+
271
+ def _get_api_key(self) -> Optional[str]:
272
+ """Get the API key, preferring instance-level key over environment variable."""
273
+ return self._api_key or os.environ.get("FIREWORKS_API_KEY")
267
274
 
268
275
  def search_logs(self, tags: List[str], limit: int = 100, hours_back: int = 24) -> List[Dict[str, Any]]:
269
276
  """Fetch logs from Fireworks tracing gateway /logs endpoint.
@@ -276,7 +283,7 @@ class FireworksTracingAdapter(BaseAdapter):
276
283
  from ..common_utils import get_user_agent
277
284
 
278
285
  headers = {
279
- "Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
286
+ "Authorization": f"Bearer {self._get_api_key()}",
280
287
  "User-Agent": get_user_agent(),
281
288
  }
282
289
  params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
@@ -407,7 +414,7 @@ class FireworksTracingAdapter(BaseAdapter):
407
414
  from ..common_utils import get_user_agent
408
415
 
409
416
  headers = {
410
- "Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
417
+ "Authorization": f"Bearer {self._get_api_key()}",
411
418
  "User-Agent": get_user_agent(),
412
419
  }
413
420
 
@@ -1,12 +1,75 @@
1
1
  import logging
2
2
  import os
3
- from typing import Optional
3
+ from typing import Dict, Optional
4
4
 
5
5
  import requests
6
+ from dotenv import dotenv_values, find_dotenv, load_dotenv
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
9
10
 
11
+ def find_dotenv_path(search_path: Optional[str] = None) -> Optional[str]:
12
+ """
13
+ Find the .env file path, searching .env.dev first, then .env.
14
+
15
+ Args:
16
+ search_path: Directory to search from. If None, uses current working directory.
17
+
18
+ Returns:
19
+ Path to the .env file if found, otherwise None.
20
+ """
21
+ # If a specific search path is provided, look there first
22
+ if search_path:
23
+ env_dev_path = os.path.join(search_path, ".env.dev")
24
+ if os.path.isfile(env_dev_path):
25
+ return env_dev_path
26
+ env_path = os.path.join(search_path, ".env")
27
+ if os.path.isfile(env_path):
28
+ return env_path
29
+ return None
30
+
31
+ # Otherwise use find_dotenv to search up the directory tree
32
+ env_dev_path = find_dotenv(filename=".env.dev", raise_error_if_not_found=False, usecwd=True)
33
+ if env_dev_path:
34
+ return env_dev_path
35
+ env_path = find_dotenv(filename=".env", raise_error_if_not_found=False, usecwd=True)
36
+ if env_path:
37
+ return env_path
38
+ return None
39
+
40
+
41
+ def get_dotenv_values(search_path: Optional[str] = None) -> Dict[str, Optional[str]]:
42
+ """
43
+ Get all key-value pairs from the .env file.
44
+
45
+ Args:
46
+ search_path: Directory to search from. If None, uses current working directory.
47
+
48
+ Returns:
49
+ Dictionary of environment variable names to values.
50
+ """
51
+ dotenv_path = find_dotenv_path(search_path)
52
+ if dotenv_path:
53
+ return dotenv_values(dotenv_path)
54
+ return {}
55
+
56
+
57
+ # --- Load .env files ---
58
+ # Attempt to load .env.dev first, then .env as a fallback.
59
+ # This happens when the module is imported.
60
+ # We use override=False (default) so that existing environment variables
61
+ # (e.g., set in the shell) are NOT overridden by .env files.
62
+ _DOTENV_PATH = find_dotenv_path()
63
+ if _DOTENV_PATH:
64
+ load_dotenv(dotenv_path=_DOTENV_PATH, override=False)
65
+ logger.debug(f"eval_protocol.auth: Loaded environment variables from: {_DOTENV_PATH}")
66
+ else:
67
+ logger.debug(
68
+ "eval_protocol.auth: No .env.dev or .env file found. Relying on shell/existing environment variables."
69
+ )
70
+ # --- End .env loading ---
71
+
72
+
10
73
  def get_fireworks_api_key() -> Optional[str]:
11
74
  """
12
75
  Retrieves the Fireworks API key.
@@ -73,6 +136,8 @@ def verify_api_key_and_get_account_id(
73
136
  Args:
74
137
  api_key: Optional explicit API key. When None, resolves via get_fireworks_api_key().
75
138
  api_base: Optional explicit API base. When None, resolves via get_fireworks_api_base().
139
+ If api_base is api.fireworks.ai, it is used directly. Otherwise, defaults to
140
+ dev.api.fireworks.ai for the verification call.
76
141
 
77
142
  Returns:
78
143
  The resolved account id if verification succeeds and the header is present; otherwise None.
@@ -81,7 +146,12 @@ def verify_api_key_and_get_account_id(
81
146
  resolved_key = api_key or get_fireworks_api_key()
82
147
  if not resolved_key:
83
148
  return None
84
- resolved_base = api_base or get_fireworks_api_base()
149
+ provided_base = api_base or get_fireworks_api_base()
150
+ # Use api.fireworks.ai if explicitly provided, otherwise fall back to dev
151
+ if "api.fireworks.ai" in provided_base:
152
+ resolved_base = provided_base
153
+ else:
154
+ resolved_base = "https://dev.api.fireworks.ai"
85
155
 
86
156
  from .common_utils import get_user_agent
87
157
 
@@ -81,13 +81,12 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
81
81
  "--env-file",
82
82
  help="Path to .env file containing secrets to upload (default: .env in current directory)",
83
83
  )
84
- upload_parser.add_argument(
85
- "--force",
86
- action="store_true",
87
- help="Overwrite existing evaluator with the same ID",
88
- )
89
84
 
90
85
  # Auto-generate flags from SDK Fireworks().evaluators.create() signature
86
+ # Note: We use Fireworks() directly here instead of create_fireworks_client()
87
+ # because we only need the method signature for introspection, not a fully
88
+ # authenticated client. create_fireworks_client() would trigger an HTTP request
89
+ # to verify the API key, causing delays even for --help invocations.
91
90
  create_evaluator_fn = Fireworks().evaluators.create
92
91
 
93
92
  upload_skip_fields = {
@@ -137,7 +136,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
137
136
 
138
137
  rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
139
138
  rft_parser.add_argument("--dry-run", action="store_true", help="Print planned SDK call without sending")
140
- rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
141
139
  rft_parser.add_argument("--skip-validation", action="store_true", help="Skip local dataset/evaluator validation")
142
140
  rft_parser.add_argument(
143
141
  "--ignore-docker",
@@ -198,6 +196,10 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
198
196
  "loss_config.method": "RL loss method for underlying trainers. One of {grpo,dapo}.",
199
197
  }
200
198
 
199
+ # Note: We use Fireworks() directly here instead of create_fireworks_client()
200
+ # because we only need the method signature for introspection, not a fully
201
+ # authenticated client. create_fireworks_client() would trigger an HTTP request
202
+ # to verify the API key, causing delays even for --help invocations.
201
203
  create_rft_job_fn = Fireworks().reinforcement_fine_tuning_jobs.create
202
204
 
203
205
  add_args_from_callable_signature(
@@ -7,19 +7,18 @@ import sys
7
7
  import time
8
8
  from typing import Any, Callable, Dict, Optional
9
9
  import inspect
10
- import requests
11
10
  import tempfile
12
11
  from pydantic import ValidationError
13
12
 
14
13
  from ..auth import get_fireworks_api_base, get_fireworks_api_key
15
- from ..common_utils import get_user_agent, load_jsonl
14
+ from ..fireworks_client import create_fireworks_client
15
+ from ..common_utils import load_jsonl
16
16
  from ..fireworks_rft import (
17
17
  create_dataset_from_jsonl,
18
18
  detect_dataset_builder,
19
19
  materialize_dataset_via_builder,
20
20
  )
21
21
  from ..models import EvaluationRow
22
- from .upload import upload_command
23
22
  from .utils import (
24
23
  _build_entry_point,
25
24
  _build_trimmed_dataset_id,
@@ -35,8 +34,6 @@ from .utils import (
35
34
  )
36
35
  from .local_test import run_evaluator_test
37
36
 
38
- from fireworks import Fireworks
39
-
40
37
 
41
38
  def _extract_dataset_adapter(
42
39
  test_file_path: str, test_func_name: str
@@ -223,64 +220,68 @@ def _extract_jsonl_from_input_dataset(test_file_path: str, test_func_name: str)
223
220
  return None
224
221
 
225
222
 
226
- def _poll_evaluator_status(
227
- evaluator_resource_name: str, api_key: str, api_base: str, timeout_minutes: int = 10
223
+ def _poll_evaluator_version_status(
224
+ evaluator_id: str,
225
+ version_id: str,
226
+ api_key: str,
227
+ api_base: str,
228
+ timeout_minutes: int = 10,
228
229
  ) -> bool:
229
230
  """
230
- Poll evaluator status until it becomes ACTIVE or times out.
231
+ Poll a specific evaluator version status until it becomes ACTIVE or times out.
232
+
233
+ Uses the Fireworks SDK to get the specified version of the evaluator and checks
234
+ its build state.
231
235
 
232
236
  Args:
233
- evaluator_resource_name: Full evaluator resource name (e.g., accounts/xxx/evaluators/yyy)
237
+ evaluator_id: The evaluator ID (not full resource name)
238
+ version_id: The specific version ID to poll
234
239
  api_key: Fireworks API key
235
240
  api_base: Fireworks API base URL
236
241
  timeout_minutes: Maximum time to wait in minutes
237
242
 
238
243
  Returns:
239
- True if evaluator becomes ACTIVE, False if timeout or BUILD_FAILED
244
+ True if evaluator version becomes ACTIVE, False if timeout or BUILD_FAILED
240
245
  """
241
- headers = {
242
- "Authorization": f"Bearer {api_key}",
243
- "Content-Type": "application/json",
244
- "User-Agent": get_user_agent(),
245
- }
246
-
247
- check_url = f"{api_base}/v1/{evaluator_resource_name}"
248
246
  timeout_seconds = timeout_minutes * 60
249
247
  poll_interval = 10 # seconds
250
248
  start_time = time.time()
251
249
 
252
- print(f"Polling evaluator status (timeout: {timeout_minutes}m, interval: {poll_interval}s)...")
250
+ print(
251
+ f"Polling evaluator version '{version_id}' status (timeout: {timeout_minutes}m, interval: {poll_interval}s)..."
252
+ )
253
+
254
+ client = create_fireworks_client(api_key=api_key, base_url=api_base)
253
255
 
254
256
  while time.time() - start_time < timeout_seconds:
255
257
  try:
256
- response = requests.get(check_url, headers=headers, timeout=30)
257
- response.raise_for_status()
258
-
259
- evaluator_data = response.json()
260
- state = evaluator_data.get("state", "STATE_UNSPECIFIED")
261
- status = evaluator_data.get("status", "")
258
+ version = client.evaluator_versions.get(version_id, evaluator_id=evaluator_id)
259
+ state = version.state or "STATE_UNSPECIFIED"
260
+ status_msg = ""
261
+ if version.status and version.status.message:
262
+ status_msg = version.status.message
262
263
 
263
264
  if state == "ACTIVE":
264
- print("✅ Evaluator is ACTIVE and ready!")
265
+ print("✅ Evaluator version is ACTIVE and ready!")
265
266
  return True
266
267
  elif state == "BUILD_FAILED":
267
- print(f"❌ Evaluator build failed. Status: {status}")
268
+ print(f"❌ Evaluator version build failed. Status: {status_msg}")
268
269
  return False
269
270
  elif state == "BUILDING":
270
271
  elapsed_minutes = (time.time() - start_time) / 60
271
- print(f"⏳ Evaluator is still building... ({elapsed_minutes:.1f}m elapsed)")
272
+ print(f"⏳ Evaluator version is still building... ({elapsed_minutes:.1f}m elapsed)")
272
273
  else:
273
- print(f"⏳ Evaluator state: {state}, status: {status}")
274
+ print(f"⏳ Evaluator version state: {state}, status: {status_msg}")
274
275
 
275
- except requests.exceptions.RequestException as e:
276
- print(f"Warning: Failed to check evaluator status: {e}")
276
+ except Exception as e:
277
+ print(f"Warning: Failed to check evaluator version status: {e}")
277
278
 
278
279
  # Wait before next poll
279
280
  time.sleep(poll_interval)
280
281
 
281
282
  # Timeout reached
282
283
  elapsed_minutes = (time.time() - start_time) / 60
283
- print(f"⏰ Timeout after {elapsed_minutes:.1f}m - evaluator is not yet ACTIVE")
284
+ print(f"⏰ Timeout after {elapsed_minutes:.1f}m - evaluator version is not yet ACTIVE")
284
285
  return False
285
286
 
286
287
 
@@ -565,42 +566,16 @@ def _upload_dataset(
565
566
  def _upload_and_ensure_evaluator(
566
567
  project_root: str,
567
568
  evaluator_id: str,
568
- evaluator_resource_name: str,
569
569
  api_key: str,
570
570
  api_base: str,
571
- force: bool,
572
571
  ) -> bool:
573
- """Ensure the evaluator exists and is ACTIVE, uploading it if needed."""
574
- # Optional short-circuit: if evaluator already exists and not forcing, skip upload path
575
- if not force:
576
- try:
577
- headers = {
578
- "Authorization": f"Bearer {api_key}",
579
- "Content-Type": "application/json",
580
- "User-Agent": get_user_agent(),
581
- }
582
- resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
583
- if resp.ok:
584
- state = resp.json().get("state", "STATE_UNSPECIFIED")
585
- print(f"✓ Evaluator exists (state: {state}). Skipping upload (use --force to overwrite).")
586
- # Poll for ACTIVE before proceeding
587
- print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
588
- if not _poll_evaluator_status(
589
- evaluator_resource_name=evaluator_resource_name,
590
- api_key=api_key,
591
- api_base=api_base,
592
- timeout_minutes=10,
593
- ):
594
- dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
595
- print("\n❌ Evaluator is not ready within the timeout period.")
596
- print(f"📊 Please check the evaluator status at: {dashboard_url}")
597
- print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
598
- return False
599
- return True
600
- except requests.exceptions.RequestException:
601
- pass
572
+ """Upload evaluator and ensure its version becomes ACTIVE.
573
+
574
+ Creates/updates the evaluator and uploads the code, then polls the specific
575
+ version until it becomes ACTIVE.
576
+ """
577
+ from eval_protocol.evaluation import create_evaluation
602
578
 
603
- # Ensure evaluator exists by invoking the upload flow programmatically
604
579
  try:
605
580
  tests = _discover_tests(project_root)
606
581
  selected_entry: Optional[str] = None
@@ -617,43 +592,37 @@ def _upload_and_ensure_evaluator(
617
592
  )
618
593
  return False
619
594
 
620
- upload_args = argparse.Namespace(
621
- path=project_root,
622
- entry=selected_entry,
623
- id=evaluator_id,
624
- display_name=None,
625
- description=None,
626
- force=force, # Pass through the --force flag
627
- yes=True,
628
- env_file=None, # Add the new env_file parameter
595
+ print(f"\nUploading evaluator '{evaluator_id}'...")
596
+ result, version_id = create_evaluation(
597
+ evaluator_id=evaluator_id,
598
+ display_name=evaluator_id,
599
+ description=f"Evaluator for {evaluator_id}",
600
+ entry_point=selected_entry,
629
601
  )
630
602
 
631
- if force:
632
- print(f"🔄 Force flag enabled - will overwrite existing evaluator '{evaluator_id}'")
603
+ if not version_id:
604
+ print("Warning: Evaluator created but version upload failed.")
605
+ return False
633
606
 
634
- rc = upload_command(upload_args)
635
- if rc == 0:
636
- print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
607
+ print(f"✓ Uploaded evaluator: {evaluator_id} (version: {version_id})")
637
608
 
638
- # Poll for evaluator status
639
- print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
640
- is_active = _poll_evaluator_status(
641
- evaluator_resource_name=evaluator_resource_name,
642
- api_key=api_key,
643
- api_base=api_base,
644
- timeout_minutes=10,
645
- )
609
+ # Poll for the specific evaluator version status
610
+ print(f"Waiting for evaluator '{evaluator_id}' version '{version_id}' to become ACTIVE...")
611
+ is_active = _poll_evaluator_version_status(
612
+ evaluator_id=evaluator_id,
613
+ version_id=version_id,
614
+ api_key=api_key,
615
+ api_base=api_base,
616
+ timeout_minutes=10,
617
+ )
646
618
 
647
- if not is_active:
648
- dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
649
- print("\n❌ Evaluator is not ready within the timeout period.")
650
- print(f"📊 Please check the evaluator status at: {dashboard_url}")
651
- print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
652
- return False
653
- return True
654
- else:
655
- print("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation.")
619
+ if not is_active:
620
+ dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
621
+ print("\n❌ Evaluator version is not ready within the timeout period.")
622
+ print(f"📊 Please check the evaluator status at: {dashboard_url}")
623
+ print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
656
624
  return False
625
+ return True
657
626
  except Exception as e:
658
627
  print(f"Warning: Failed to upload evaluator automatically: {e}")
659
628
  return False
@@ -672,7 +641,7 @@ def _create_rft_job(
672
641
  ) -> int:
673
642
  """Build and submit the RFT job request (via Fireworks SDK)."""
674
643
 
675
- signature = inspect.signature(Fireworks().reinforcement_fine_tuning_jobs.create)
644
+ signature = inspect.signature(create_fireworks_client().reinforcement_fine_tuning_jobs.create)
676
645
 
677
646
  # Build top-level SDK kwargs
678
647
  sdk_kwargs: Dict[str, Any] = {
@@ -711,7 +680,7 @@ def _create_rft_job(
711
680
  return 0
712
681
 
713
682
  try:
714
- fw: Fireworks = Fireworks(api_key=api_key, base_url=api_base)
683
+ fw: Fireworks = create_fireworks_client(api_key=api_key, base_url=api_base)
715
684
  job: ReinforcementFineTuningJob = fw.reinforcement_fine_tuning_jobs.create(account_id=account_id, **sdk_kwargs)
716
685
  job_name = job.name
717
686
  print(f"\n✅ Created Reinforcement Fine-tuning Job: {job_name}")
@@ -739,7 +708,6 @@ def create_rft_command(args) -> int:
739
708
  evaluator_arg: Optional[str] = getattr(args, "evaluator", None)
740
709
  non_interactive: bool = bool(getattr(args, "yes", False))
741
710
  dry_run: bool = bool(getattr(args, "dry_run", False))
742
- force: bool = bool(getattr(args, "force", False))
743
711
  skip_validation: bool = bool(getattr(args, "skip_validation", False))
744
712
  ignore_docker: bool = bool(getattr(args, "ignore_docker", False))
745
713
  docker_build_extra: str = getattr(args, "docker_build_extra", "") or ""
@@ -810,14 +778,12 @@ def create_rft_command(args) -> int:
810
778
  if not dataset_id or not dataset_resource:
811
779
  return 1
812
780
 
813
- # 5) Ensure evaluator exists and is ACTIVE (upload + poll if needed)
781
+ # 5) Ensure evaluator exists and its latest version is ACTIVE (upload + poll if needed)
814
782
  if not _upload_and_ensure_evaluator(
815
783
  project_root=project_root,
816
784
  evaluator_id=evaluator_id,
817
- evaluator_resource_name=evaluator_resource_name,
818
785
  api_key=api_key,
819
786
  api_base=api_base,
820
- force=force,
821
787
  ):
822
788
  return 1
823
789
 
@@ -5,6 +5,7 @@ import subprocess
5
5
  import sys
6
6
  from typing import List
7
7
 
8
+ from ..auth import get_dotenv_values
8
9
  from .utils import _build_entry_point, _discover_and_select_tests
9
10
 
10
11
 
@@ -71,6 +72,12 @@ def _run_pytest_in_docker(
71
72
  workdir,
72
73
  ]
73
74
 
75
+ # Forward environment variables from .env file to the container
76
+ dotenv_vars = get_dotenv_values(project_root)
77
+ for key, value in dotenv_vars.items():
78
+ if value is not None:
79
+ cmd += ["-e", f"{key}={value}"]
80
+
74
81
  # If EP_SUMMARY_JSON is set on the host, mirror it into the container so that
75
82
  # pytest evaluation tests can write summary artifacts that are visible to the
76
83
  # host. We map paths under the host logs directory (~/.eval_protocol) into the
@@ -289,7 +289,6 @@ def upload_command(args: argparse.Namespace) -> int:
289
289
  base_id = getattr(args, "id", None)
290
290
  display_name = getattr(args, "display_name", None)
291
291
  description = getattr(args, "description", None)
292
- force = bool(getattr(args, "force", False))
293
292
  env_file = getattr(args, "env_file", None)
294
293
 
295
294
  # Load secrets from .env file and ensure they're available on Fireworks
@@ -378,17 +377,18 @@ def upload_command(args: argparse.Namespace) -> int:
378
377
 
379
378
  print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
380
379
  try:
381
- result = create_evaluation(
380
+ result, version_id = create_evaluation(
382
381
  evaluator_id=evaluator_id,
383
382
  display_name=display_name or evaluator_id,
384
383
  description=description or f"Evaluator for {qualname}",
385
- force=force,
386
384
  entry_point=entry_point,
387
385
  )
388
386
  name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id
389
387
 
390
388
  # Print success message with Fireworks dashboard link
391
389
  print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
390
+ if version_id:
391
+ print(f" Version: {version_id}")
392
392
  print("📊 View in Fireworks Dashboard:")
393
393
  dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
394
394
  print(f" {dashboard_url}\n")